1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 /*
26 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
27 * All Rights Reserved
28 */
29
30 #include <sys/param.h>
31 #include <sys/types.h>
32 #include <sys/systm.h>
33 #include <sys/cred.h>
34 #include <sys/buf.h>
35 #include <sys/vfs.h>
36 #include <sys/vfs_opreg.h>
37 #include <sys/vnode.h>
38 #include <sys/uio.h>
39 #include <sys/errno.h>
40 #include <sys/sysmacros.h>
41 #include <sys/statvfs.h>
42 #include <sys/kmem.h>
43 #include <sys/dirent.h>
44 #include <sys/cmn_err.h>
45 #include <sys/debug.h>
46 #include <sys/systeminfo.h>
47 #include <sys/flock.h>
48 #include <sys/pathname.h>
49 #include <sys/nbmlock.h>
50 #include <sys/share.h>
51 #include <sys/atomic.h>
52 #include <sys/policy.h>
53 #include <sys/fem.h>
54 #include <sys/sdt.h>
55 #include <sys/ddi.h>
56 #include <sys/zone.h>
57
58 #include <fs/fs_reparse.h>
59
60 #include <rpc/types.h>
61 #include <rpc/auth.h>
62 #include <rpc/rpcsec_gss.h>
63 #include <rpc/svc.h>
64
65 #include <nfs/nfs.h>
66 #include <nfs/export.h>
67 #include <nfs/nfs_cmd.h>
68 #include <nfs/lm.h>
69 #include <nfs/nfs4.h>
70
71 #include <sys/strsubr.h>
72 #include <sys/strsun.h>
73
74 #include <inet/common.h>
75 #include <inet/ip.h>
76 #include <inet/ip6.h>
77
78 #include <sys/tsol/label.h>
79 #include <sys/tsol/tndb.h>
80
81 #define RFS4_MAXLOCK_TRIES 4 /* Try to get the lock this many times */
82 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
83 #define RFS4_LOCK_DELAY 10 /* Milliseconds */
84 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY;
85 extern struct svc_ops rdma_svc_ops;
86 extern int nfs_loaned_buffers;
87 /* End of Tunables */
88
89 static int rdma_setup_read_data4(READ4args *, READ4res *);
90
91 /*
92 * Used to bump the stateid4.seqid value and show changes in the stateid
93 */
94 #define next_stateid(sp) (++(sp)->bits.chgseq)
95
96 /*
97 * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
98 * This is used to return NFS4ERR_TOOSMALL when clients specify
99 * maxcount that isn't large enough to hold the smallest possible
100 * XDR encoded dirent.
101 *
102 * sizeof cookie (8 bytes) +
103 * sizeof name_len (4 bytes) +
104 * sizeof smallest (padded) name (4 bytes) +
105 * sizeof bitmap4_len (12 bytes) + NOTE: we always encode len=2 bm4
106 * sizeof attrlist4_len (4 bytes) +
107 * sizeof next boolean (4 bytes)
108 *
109 * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
110 * the smallest possible entry4 (assumes no attrs requested).
111 * sizeof nfsstat4 (4 bytes) +
112 * sizeof verifier4 (8 bytes) +
113 * sizeof entry4list bool (4 bytes) +
114 * sizeof entry4 (36 bytes) +
115 * sizeof eof bool (4 bytes)
116 *
117 * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
118 * VOP_READDIR. Its value is the size of the maximum possible dirent
119 * for solaris. The DIRENT64_RECLEN macro returns the size of dirent
120 * required for a given name length. MAXNAMELEN is the maximum
121 * filename length allowed in Solaris. The first two DIRENT64_RECLEN()
122 * macros are to allow for . and .. entries -- just a minor tweak to try
123 * and guarantee that buffer we give to VOP_READDIR will be large enough
124 * to hold ., .., and the largest possible solaris dirent64.
125 */
126 #define RFS4_MINLEN_ENTRY4 36
127 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
128 #define RFS4_MINLEN_RDDIR_BUF \
129 (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
130
131 /*
132 * It would be better to pad to 4 bytes since that's what XDR would do,
133 * but the dirents UFS gives us are already padded to 8, so just take
134 * what we're given. Dircount is only a hint anyway. Currently the
135 * solaris kernel is ASCII only, so there's no point in calling the
136 * UTF8 functions.
137 *
138 * dirent64: named padded to provide 8 byte struct alignment
139 * d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
140 *
141 * cookie: uint64_t + utf8namelen: uint_t + utf8name padded to 8 bytes
142 *
143 */
144 #define DIRENT64_TO_DIRCOUNT(dp) \
145 (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
146
147 time_t rfs4_start_time; /* Initialized in rfs4_srvrinit */
148
149 static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */
150
151 u_longlong_t nfs4_srv_caller_id;
152 uint_t nfs4_srv_vkey = 0;
153
154 verifier4 Write4verf;
155 verifier4 Readdir4verf;
156
157 void rfs4_init_compound_state(struct compound_state *);
158
159 static void nullfree(caddr_t);
160 static void rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
161 struct compound_state *);
162 static void rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
163 struct compound_state *);
164 static void rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
165 struct compound_state *);
166 static void rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
167 struct compound_state *);
168 static void rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
169 struct compound_state *);
170 static void rfs4_op_create_free(nfs_resop4 *resop);
171 static void rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
172 struct svc_req *, struct compound_state *);
173 static void rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
174 struct svc_req *, struct compound_state *);
175 static void rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
176 struct compound_state *);
177 static void rfs4_op_getattr_free(nfs_resop4 *);
178 static void rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
179 struct compound_state *);
180 static void rfs4_op_getfh_free(nfs_resop4 *);
181 static void rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
182 struct compound_state *);
183 static void rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
184 struct compound_state *);
185 static void rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
186 struct compound_state *);
187 static void lock_denied_free(nfs_resop4 *);
188 static void rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
189 struct compound_state *);
190 static void rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
191 struct compound_state *);
192 static void rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
193 struct compound_state *);
194 static void rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
195 struct compound_state *);
196 static void rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
197 struct svc_req *req, struct compound_state *cs);
198 static void rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
199 struct compound_state *);
200 static void rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
201 struct compound_state *);
202 static void rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
203 struct svc_req *, struct compound_state *);
204 static void rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
205 struct svc_req *, struct compound_state *);
206 static void rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
207 struct compound_state *);
208 static void rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
209 struct compound_state *);
210 static void rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
211 struct compound_state *);
212 static void rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
213 struct compound_state *);
214 static void rfs4_op_read_free(nfs_resop4 *);
215 static void rfs4_op_readdir_free(nfs_resop4 *resop);
216 static void rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
217 struct compound_state *);
218 static void rfs4_op_readlink_free(nfs_resop4 *);
219 static void rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
220 struct svc_req *, struct compound_state *);
221 static void rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
222 struct compound_state *);
223 static void rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
224 struct compound_state *);
225 static void rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
226 struct compound_state *);
227 static void rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
228 struct compound_state *);
229 static void rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
230 struct compound_state *);
231 static void rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
232 struct compound_state *);
233 static void rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
234 struct compound_state *);
235 static void rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
236 struct compound_state *);
237 static void rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
238 struct svc_req *, struct compound_state *);
239 static void rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
240 struct svc_req *req, struct compound_state *);
241 static void rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
242 struct compound_state *);
243 static void rfs4_op_secinfo_free(nfs_resop4 *);
244
245 static nfsstat4 check_open_access(uint32_t,
246 struct compound_state *, struct svc_req *);
247 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
248 void rfs4_ss_clid(rfs4_client_t *);
249
250 /*
251 * translation table for attrs
252 */
253 struct nfs4_ntov_table {
254 union nfs4_attr_u *na;
255 uint8_t amap[NFS4_MAXNUM_ATTRS];
256 int attrcnt;
257 bool_t vfsstat;
258 };
259
260 static void nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
261 static void nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
262 struct nfs4_svgetit_arg *sargp);
263
264 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
265 struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
266 struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
267
268 fem_t *deleg_rdops;
269 fem_t *deleg_wrops;
270
271 rfs4_servinst_t *rfs4_cur_servinst = NULL; /* current server instance */
272 kmutex_t rfs4_servinst_lock; /* protects linked list */
273 int rfs4_seen_first_compound; /* set first time we see one */
274
275 /*
276 * NFS4 op dispatch table
277 */
278
279 struct rfsv4disp {
280 void (*dis_proc)(); /* proc to call */
281 void (*dis_resfree)(); /* frees space allocated by proc */
282 int dis_flags; /* RPC_IDEMPOTENT, etc... */
283 };
284
285 static struct rfsv4disp rfsv4disptab[] = {
286 /*
287 * NFS VERSION 4
288 */
289
290 /* RFS_NULL = 0 */
291 {rfs4_op_illegal, nullfree, 0},
292
293 /* UNUSED = 1 */
294 {rfs4_op_illegal, nullfree, 0},
295
296 /* UNUSED = 2 */
297 {rfs4_op_illegal, nullfree, 0},
298
299 /* OP_ACCESS = 3 */
300 {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
301
302 /* OP_CLOSE = 4 */
303 {rfs4_op_close, nullfree, 0},
304
305 /* OP_COMMIT = 5 */
306 {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
307
308 /* OP_CREATE = 6 */
309 {rfs4_op_create, nullfree, 0},
310
311 /* OP_DELEGPURGE = 7 */
312 {rfs4_op_delegpurge, nullfree, 0},
313
314 /* OP_DELEGRETURN = 8 */
315 {rfs4_op_delegreturn, nullfree, 0},
316
317 /* OP_GETATTR = 9 */
318 {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
319
320 /* OP_GETFH = 10 */
321 {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
322
323 /* OP_LINK = 11 */
324 {rfs4_op_link, nullfree, 0},
325
326 /* OP_LOCK = 12 */
327 {rfs4_op_lock, lock_denied_free, 0},
328
329 /* OP_LOCKT = 13 */
330 {rfs4_op_lockt, lock_denied_free, 0},
331
332 /* OP_LOCKU = 14 */
333 {rfs4_op_locku, nullfree, 0},
334
335 /* OP_LOOKUP = 15 */
336 {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
337
338 /* OP_LOOKUPP = 16 */
339 {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
340
341 /* OP_NVERIFY = 17 */
342 {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
343
344 /* OP_OPEN = 18 */
345 {rfs4_op_open, rfs4_free_reply, 0},
346
347 /* OP_OPENATTR = 19 */
348 {rfs4_op_openattr, nullfree, 0},
349
350 /* OP_OPEN_CONFIRM = 20 */
351 {rfs4_op_open_confirm, nullfree, 0},
352
353 /* OP_OPEN_DOWNGRADE = 21 */
354 {rfs4_op_open_downgrade, nullfree, 0},
355
356 /* OP_OPEN_PUTFH = 22 */
357 {rfs4_op_putfh, nullfree, RPC_ALL},
358
359 /* OP_PUTPUBFH = 23 */
360 {rfs4_op_putpubfh, nullfree, RPC_ALL},
361
362 /* OP_PUTROOTFH = 24 */
363 {rfs4_op_putrootfh, nullfree, RPC_ALL},
364
365 /* OP_READ = 25 */
366 {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
367
368 /* OP_READDIR = 26 */
369 {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
370
371 /* OP_READLINK = 27 */
372 {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
373
374 /* OP_REMOVE = 28 */
375 {rfs4_op_remove, nullfree, 0},
376
377 /* OP_RENAME = 29 */
378 {rfs4_op_rename, nullfree, 0},
379
380 /* OP_RENEW = 30 */
381 {rfs4_op_renew, nullfree, 0},
382
383 /* OP_RESTOREFH = 31 */
384 {rfs4_op_restorefh, nullfree, RPC_ALL},
385
386 /* OP_SAVEFH = 32 */
387 {rfs4_op_savefh, nullfree, RPC_ALL},
388
389 /* OP_SECINFO = 33 */
390 {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
391
392 /* OP_SETATTR = 34 */
393 {rfs4_op_setattr, nullfree, 0},
394
395 /* OP_SETCLIENTID = 35 */
396 {rfs4_op_setclientid, nullfree, 0},
397
398 /* OP_SETCLIENTID_CONFIRM = 36 */
399 {rfs4_op_setclientid_confirm, nullfree, 0},
400
401 /* OP_VERIFY = 37 */
402 {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
403
404 /* OP_WRITE = 38 */
405 {rfs4_op_write, nullfree, 0},
406
407 /* OP_RELEASE_LOCKOWNER = 39 */
408 {rfs4_op_release_lockowner, nullfree, 0},
409 };
410
411 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
412
413 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
414
415 #ifdef DEBUG
416
417 int rfs4_fillone_debug = 0;
418 int rfs4_no_stub_access = 1;
419 int rfs4_rddir_debug = 0;
420
421 static char *rfs4_op_string[] = {
422 "rfs4_op_null",
423 "rfs4_op_1 unused",
424 "rfs4_op_2 unused",
425 "rfs4_op_access",
426 "rfs4_op_close",
427 "rfs4_op_commit",
428 "rfs4_op_create",
429 "rfs4_op_delegpurge",
430 "rfs4_op_delegreturn",
431 "rfs4_op_getattr",
432 "rfs4_op_getfh",
433 "rfs4_op_link",
434 "rfs4_op_lock",
435 "rfs4_op_lockt",
436 "rfs4_op_locku",
437 "rfs4_op_lookup",
438 "rfs4_op_lookupp",
439 "rfs4_op_nverify",
440 "rfs4_op_open",
441 "rfs4_op_openattr",
442 "rfs4_op_open_confirm",
443 "rfs4_op_open_downgrade",
444 "rfs4_op_putfh",
445 "rfs4_op_putpubfh",
446 "rfs4_op_putrootfh",
447 "rfs4_op_read",
448 "rfs4_op_readdir",
449 "rfs4_op_readlink",
450 "rfs4_op_remove",
451 "rfs4_op_rename",
452 "rfs4_op_renew",
453 "rfs4_op_restorefh",
454 "rfs4_op_savefh",
455 "rfs4_op_secinfo",
456 "rfs4_op_setattr",
457 "rfs4_op_setclientid",
458 "rfs4_op_setclient_confirm",
459 "rfs4_op_verify",
460 "rfs4_op_write",
461 "rfs4_op_release_lockowner",
462 "rfs4_op_illegal"
463 };
464 #endif
465
466 void rfs4_ss_chkclid(rfs4_client_t *);
467
468 extern size_t strlcpy(char *dst, const char *src, size_t dstsize);
469
470 extern void rfs4_free_fs_locations4(fs_locations4 *);
471
472 #ifdef nextdp
473 #undef nextdp
474 #endif
475 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
476
477 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
478 VOPNAME_OPEN, { .femop_open = deleg_rd_open },
479 VOPNAME_WRITE, { .femop_write = deleg_rd_write },
480 VOPNAME_SETATTR, { .femop_setattr = deleg_rd_setattr },
481 VOPNAME_RWLOCK, { .femop_rwlock = deleg_rd_rwlock },
482 VOPNAME_SPACE, { .femop_space = deleg_rd_space },
483 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_rd_setsecattr },
484 VOPNAME_VNEVENT, { .femop_vnevent = deleg_rd_vnevent },
485 NULL, NULL
486 };
487 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
488 VOPNAME_OPEN, { .femop_open = deleg_wr_open },
489 VOPNAME_READ, { .femop_read = deleg_wr_read },
490 VOPNAME_WRITE, { .femop_write = deleg_wr_write },
491 VOPNAME_SETATTR, { .femop_setattr = deleg_wr_setattr },
492 VOPNAME_RWLOCK, { .femop_rwlock = deleg_wr_rwlock },
493 VOPNAME_SPACE, { .femop_space = deleg_wr_space },
494 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_wr_setsecattr },
495 VOPNAME_VNEVENT, { .femop_vnevent = deleg_wr_vnevent },
496 NULL, NULL
497 };
498
499 int
rfs4_srvrinit(void)500 rfs4_srvrinit(void)
501 {
502 timespec32_t verf;
503 int error;
504 extern void rfs4_attr_init();
505 extern krwlock_t rfs4_deleg_policy_lock;
506
507 /*
508 * The following algorithm attempts to find a unique verifier
509 * to be used as the write verifier returned from the server
510 * to the client. It is important that this verifier change
511 * whenever the server reboots. Of secondary importance, it
512 * is important for the verifier to be unique between two
513 * different servers.
514 *
515 * Thus, an attempt is made to use the system hostid and the
516 * current time in seconds when the nfssrv kernel module is
517 * loaded. It is assumed that an NFS server will not be able
518 * to boot and then to reboot in less than a second. If the
519 * hostid has not been set, then the current high resolution
520 * time is used. This will ensure different verifiers each
521 * time the server reboots and minimize the chances that two
522 * different servers will have the same verifier.
523 * XXX - this is broken on LP64 kernels.
524 */
525 verf.tv_sec = (time_t)zone_get_hostid(NULL);
526 if (verf.tv_sec != 0) {
527 verf.tv_nsec = gethrestime_sec();
528 } else {
529 timespec_t tverf;
530
531 gethrestime(&tverf);
532 verf.tv_sec = (time_t)tverf.tv_sec;
533 verf.tv_nsec = tverf.tv_nsec;
534 }
535
536 Write4verf = *(uint64_t *)&verf;
537
538 rfs4_attr_init();
539 mutex_init(&rfs4_deleg_lock, NULL, MUTEX_DEFAULT, NULL);
540
541 /* Used to manage create/destroy of server state */
542 mutex_init(&rfs4_state_lock, NULL, MUTEX_DEFAULT, NULL);
543
544 /* Used to manage access to server instance linked list */
545 mutex_init(&rfs4_servinst_lock, NULL, MUTEX_DEFAULT, NULL);
546
547 /* Used to manage access to rfs4_deleg_policy */
548 rw_init(&rfs4_deleg_policy_lock, NULL, RW_DEFAULT, NULL);
549
550 error = fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops);
551 if (error != 0) {
552 rfs4_disable_delegation();
553 } else {
554 error = fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
555 &deleg_wrops);
556 if (error != 0) {
557 rfs4_disable_delegation();
558 fem_free(deleg_rdops);
559 }
560 }
561
562 nfs4_srv_caller_id = fs_new_caller_id();
563
564 lockt_sysid = lm_alloc_sysidt();
565
566 vsd_create(&nfs4_srv_vkey, NULL);
567
568 return (0);
569 }
570
571 void
rfs4_srvrfini(void)572 rfs4_srvrfini(void)
573 {
574 extern krwlock_t rfs4_deleg_policy_lock;
575
576 if (lockt_sysid != LM_NOSYSID) {
577 lm_free_sysidt(lockt_sysid);
578 lockt_sysid = LM_NOSYSID;
579 }
580
581 mutex_destroy(&rfs4_deleg_lock);
582 mutex_destroy(&rfs4_state_lock);
583 rw_destroy(&rfs4_deleg_policy_lock);
584
585 fem_free(deleg_rdops);
586 fem_free(deleg_wrops);
587 }
588
589 void
rfs4_init_compound_state(struct compound_state * cs)590 rfs4_init_compound_state(struct compound_state *cs)
591 {
592 bzero(cs, sizeof (*cs));
593 cs->cont = TRUE;
594 cs->access = CS_ACCESS_DENIED;
595 cs->deleg = FALSE;
596 cs->mandlock = FALSE;
597 cs->fh.nfs_fh4_val = cs->fhbuf;
598 }
599
600 void
rfs4_grace_start(rfs4_servinst_t * sip)601 rfs4_grace_start(rfs4_servinst_t *sip)
602 {
603 rw_enter(&sip->rwlock, RW_WRITER);
604 sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
605 sip->grace_period = rfs4_grace_period;
606 rw_exit(&sip->rwlock);
607 }
608
609 /*
610 * returns true if the instance's grace period has never been started
611 */
612 int
rfs4_servinst_grace_new(rfs4_servinst_t * sip)613 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
614 {
615 time_t start_time;
616
617 rw_enter(&sip->rwlock, RW_READER);
618 start_time = sip->start_time;
619 rw_exit(&sip->rwlock);
620
621 return (start_time == 0);
622 }
623
624 /*
625 * Indicates if server instance is within the
626 * grace period.
627 */
628 int
rfs4_servinst_in_grace(rfs4_servinst_t * sip)629 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
630 {
631 time_t grace_expiry;
632
633 rw_enter(&sip->rwlock, RW_READER);
634 grace_expiry = sip->start_time + sip->grace_period;
635 rw_exit(&sip->rwlock);
636
637 return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
638 }
639
640 int
rfs4_clnt_in_grace(rfs4_client_t * cp)641 rfs4_clnt_in_grace(rfs4_client_t *cp)
642 {
643 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
644
645 return (rfs4_servinst_in_grace(cp->rc_server_instance));
646 }
647
648 /*
649 * reset all currently active grace periods
650 */
651 void
rfs4_grace_reset_all(void)652 rfs4_grace_reset_all(void)
653 {
654 rfs4_servinst_t *sip;
655
656 mutex_enter(&rfs4_servinst_lock);
657 for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
658 if (rfs4_servinst_in_grace(sip))
659 rfs4_grace_start(sip);
660 mutex_exit(&rfs4_servinst_lock);
661 }
662
663 /*
664 * start any new instances' grace periods
665 */
666 void
rfs4_grace_start_new(void)667 rfs4_grace_start_new(void)
668 {
669 rfs4_servinst_t *sip;
670
671 mutex_enter(&rfs4_servinst_lock);
672 for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
673 if (rfs4_servinst_grace_new(sip))
674 rfs4_grace_start(sip);
675 mutex_exit(&rfs4_servinst_lock);
676 }
677
678 static rfs4_dss_path_t *
rfs4_dss_newpath(rfs4_servinst_t * sip,char * path,unsigned index)679 rfs4_dss_newpath(rfs4_servinst_t *sip, char *path, unsigned index)
680 {
681 size_t len;
682 rfs4_dss_path_t *dss_path;
683
684 dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
685
686 /*
687 * Take a copy of the string, since the original may be overwritten.
688 * Sadly, no strdup() in the kernel.
689 */
690 /* allow for NUL */
691 len = strlen(path) + 1;
692 dss_path->path = kmem_alloc(len, KM_SLEEP);
693 (void) strlcpy(dss_path->path, path, len);
694
695 /* associate with servinst */
696 dss_path->sip = sip;
697 dss_path->index = index;
698
699 /*
700 * Add to list of served paths.
701 * No locking required, as we're only ever called at startup.
702 */
703 if (rfs4_dss_pathlist == NULL) {
704 /* this is the first dss_path_t */
705
706 /* needed for insque/remque */
707 dss_path->next = dss_path->prev = dss_path;
708
709 rfs4_dss_pathlist = dss_path;
710 } else {
711 insque(dss_path, rfs4_dss_pathlist);
712 }
713
714 return (dss_path);
715 }
716
717 /*
718 * Create a new server instance, and make it the currently active instance.
719 * Note that starting the grace period too early will reduce the clients'
720 * recovery window.
721 */
722 void
rfs4_servinst_create(int start_grace,int dss_npaths,char ** dss_paths)723 rfs4_servinst_create(int start_grace, int dss_npaths, char **dss_paths)
724 {
725 unsigned i;
726 rfs4_servinst_t *sip;
727 rfs4_oldstate_t *oldstate;
728
729 sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
730 rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
731
732 sip->start_time = (time_t)0;
733 sip->grace_period = (time_t)0;
734 sip->next = NULL;
735 sip->prev = NULL;
736
737 rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
738 /*
739 * This initial dummy entry is required to setup for insque/remque.
740 * It must be skipped over whenever the list is traversed.
741 */
742 oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
743 /* insque/remque require initial list entry to be self-terminated */
744 oldstate->next = oldstate;
745 oldstate->prev = oldstate;
746 sip->oldstate = oldstate;
747
748
749 sip->dss_npaths = dss_npaths;
750 sip->dss_paths = kmem_alloc(dss_npaths *
751 sizeof (rfs4_dss_path_t *), KM_SLEEP);
752
753 for (i = 0; i < dss_npaths; i++) {
754 sip->dss_paths[i] = rfs4_dss_newpath(sip, dss_paths[i], i);
755 }
756
757 mutex_enter(&rfs4_servinst_lock);
758 if (rfs4_cur_servinst != NULL) {
759 /* add to linked list */
760 sip->prev = rfs4_cur_servinst;
761 rfs4_cur_servinst->next = sip;
762 }
763 if (start_grace)
764 rfs4_grace_start(sip);
765 /* make the new instance "current" */
766 rfs4_cur_servinst = sip;
767
768 mutex_exit(&rfs4_servinst_lock);
769 }
770
771 /*
772 * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
773 * all instances directly.
774 */
775 void
rfs4_servinst_destroy_all(void)776 rfs4_servinst_destroy_all(void)
777 {
778 rfs4_servinst_t *sip, *prev, *current;
779 #ifdef DEBUG
780 int n = 0;
781 #endif
782
783 mutex_enter(&rfs4_servinst_lock);
784 ASSERT(rfs4_cur_servinst != NULL);
785 current = rfs4_cur_servinst;
786 rfs4_cur_servinst = NULL;
787 for (sip = current; sip != NULL; sip = prev) {
788 prev = sip->prev;
789 rw_destroy(&sip->rwlock);
790 if (sip->oldstate)
791 kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
792 if (sip->dss_paths)
793 kmem_free(sip->dss_paths,
794 sip->dss_npaths * sizeof (rfs4_dss_path_t *));
795 kmem_free(sip, sizeof (rfs4_servinst_t));
796 #ifdef DEBUG
797 n++;
798 #endif
799 }
800 mutex_exit(&rfs4_servinst_lock);
801 }
802
803 /*
804 * Assign the current server instance to a client_t.
805 * Should be called with cp->rc_dbe held.
806 */
807 void
rfs4_servinst_assign(rfs4_client_t * cp,rfs4_servinst_t * sip)808 rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip)
809 {
810 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
811
812 /*
813 * The lock ensures that if the current instance is in the process
814 * of changing, we will see the new one.
815 */
816 mutex_enter(&rfs4_servinst_lock);
817 cp->rc_server_instance = sip;
818 mutex_exit(&rfs4_servinst_lock);
819 }
820
821 rfs4_servinst_t *
rfs4_servinst(rfs4_client_t * cp)822 rfs4_servinst(rfs4_client_t *cp)
823 {
824 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
825
826 return (cp->rc_server_instance);
827 }
828
829 /* ARGSUSED */
830 static void
nullfree(caddr_t resop)831 nullfree(caddr_t resop)
832 {
833 }
834
835 /*
836 * This is a fall-through for invalid or not implemented (yet) ops
837 */
838 /* ARGSUSED */
839 static void
rfs4_op_inval(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)840 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
841 struct compound_state *cs)
842 {
843 *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
844 }
845
846 /*
847 * Check if the security flavor, nfsnum, is in the flavor_list.
848 */
849 bool_t
in_flavor_list(int nfsnum,int * flavor_list,int count)850 in_flavor_list(int nfsnum, int *flavor_list, int count)
851 {
852 int i;
853
854 for (i = 0; i < count; i++) {
855 if (nfsnum == flavor_list[i])
856 return (TRUE);
857 }
858 return (FALSE);
859 }
860
861 /*
862 * Used by rfs4_op_secinfo to get the security information from the
863 * export structure associated with the component.
864 */
865 /* ARGSUSED */
866 static nfsstat4
do_rfs4_op_secinfo(struct compound_state * cs,char * nm,SECINFO4res * resp)867 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
868 {
869 int error, different_export = 0;
870 vnode_t *dvp, *vp, *tvp;
871 struct exportinfo *exi = NULL;
872 fid_t fid;
873 uint_t count, i;
874 secinfo4 *resok_val;
875 struct secinfo *secp;
876 seconfig_t *si;
877 bool_t did_traverse = FALSE;
878 int dotdot, walk;
879
880 dvp = cs->vp;
881 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
882
883 /*
884 * If dotdotting, then need to check whether it's above the
885 * root of a filesystem, or above an export point.
886 */
887 if (dotdot) {
888
889 /*
890 * If dotdotting at the root of a filesystem, then
891 * need to traverse back to the mounted-on filesystem
892 * and do the dotdot lookup there.
893 */
894 if (cs->vp->v_flag & VROOT) {
895
896 /*
897 * If at the system root, then can
898 * go up no further.
899 */
900 if (VN_CMP(dvp, rootdir))
901 return (puterrno4(ENOENT));
902
903 /*
904 * Traverse back to the mounted-on filesystem
905 */
906 dvp = untraverse(cs->vp);
907
908 /*
909 * Set the different_export flag so we remember
910 * to pick up a new exportinfo entry for
911 * this new filesystem.
912 */
913 different_export = 1;
914 } else {
915
916 /*
917 * If dotdotting above an export point then set
918 * the different_export to get new export info.
919 */
920 different_export = nfs_exported(cs->exi, cs->vp);
921 }
922 }
923
924 /*
925 * Get the vnode for the component "nm".
926 */
927 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
928 NULL, NULL, NULL);
929 if (error)
930 return (puterrno4(error));
931
932 /*
933 * If the vnode is in a pseudo filesystem, or if the security flavor
934 * used in the request is valid but not an explicitly shared flavor,
935 * or the access bit indicates that this is a limited access,
936 * check whether this vnode is visible.
937 */
938 if (!different_export &&
939 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
940 cs->access & CS_ACCESS_LIMITED)) {
941 if (! nfs_visible(cs->exi, vp, &different_export)) {
942 VN_RELE(vp);
943 return (puterrno4(ENOENT));
944 }
945 }
946
947 /*
948 * If it's a mountpoint, then traverse it.
949 */
950 if (vn_ismntpt(vp)) {
951 tvp = vp;
952 if ((error = traverse(&tvp)) != 0) {
953 VN_RELE(vp);
954 return (puterrno4(error));
955 }
956 /* remember that we had to traverse mountpoint */
957 did_traverse = TRUE;
958 vp = tvp;
959 different_export = 1;
960 } else if (vp->v_vfsp != dvp->v_vfsp) {
961 /*
962 * If vp isn't a mountpoint and the vfs ptrs aren't the same,
963 * then vp is probably an LOFS object. We don't need the
964 * realvp, we just need to know that we might have crossed
965 * a server fs boundary and need to call checkexport4.
966 * (LOFS lookup hides server fs mountpoints, and actually calls
967 * traverse)
968 */
969 different_export = 1;
970 }
971
972 /*
973 * Get the export information for it.
974 */
975 if (different_export) {
976
977 bzero(&fid, sizeof (fid));
978 fid.fid_len = MAXFIDSZ;
979 error = vop_fid_pseudo(vp, &fid);
980 if (error) {
981 VN_RELE(vp);
982 return (puterrno4(error));
983 }
984
985 if (dotdot)
986 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
987 else
988 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
989
990 if (exi == NULL) {
991 if (did_traverse == TRUE) {
992 /*
993 * If this vnode is a mounted-on vnode,
994 * but the mounted-on file system is not
995 * exported, send back the secinfo for
996 * the exported node that the mounted-on
997 * vnode lives in.
998 */
999 exi = cs->exi;
1000 } else {
1001 VN_RELE(vp);
1002 return (puterrno4(EACCES));
1003 }
1004 }
1005 } else {
1006 exi = cs->exi;
1007 }
1008 ASSERT(exi != NULL);
1009
1010
1011 /*
1012 * Create the secinfo result based on the security information
1013 * from the exportinfo structure (exi).
1014 *
1015 * Return all flavors for a pseudo node.
1016 * For a real export node, return the flavor that the client
1017 * has access with.
1018 */
1019 ASSERT(RW_LOCK_HELD(&exported_lock));
1020 if (PSEUDO(exi)) {
1021 count = exi->exi_export.ex_seccnt; /* total sec count */
1022 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1023 secp = exi->exi_export.ex_secinfo;
1024
1025 for (i = 0; i < count; i++) {
1026 si = &secp[i].s_secinfo;
1027 resok_val[i].flavor = si->sc_rpcnum;
1028 if (resok_val[i].flavor == RPCSEC_GSS) {
1029 rpcsec_gss_info *info;
1030
1031 info = &resok_val[i].flavor_info;
1032 info->qop = si->sc_qop;
1033 info->service = (rpc_gss_svc_t)si->sc_service;
1034
1035 /* get oid opaque data */
1036 info->oid.sec_oid4_len =
1037 si->sc_gss_mech_type->length;
1038 info->oid.sec_oid4_val = kmem_alloc(
1039 si->sc_gss_mech_type->length, KM_SLEEP);
1040 bcopy(
1041 si->sc_gss_mech_type->elements,
1042 info->oid.sec_oid4_val,
1043 info->oid.sec_oid4_len);
1044 }
1045 }
1046 resp->SECINFO4resok_len = count;
1047 resp->SECINFO4resok_val = resok_val;
1048 } else {
1049 int ret_cnt = 0, k = 0;
1050 int *flavor_list;
1051
1052 count = exi->exi_export.ex_seccnt; /* total sec count */
1053 secp = exi->exi_export.ex_secinfo;
1054
1055 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1056 /* find out which flavors to return */
1057 for (i = 0; i < count; i ++) {
1058 int access, flavor, perm;
1059
1060 flavor = secp[i].s_secinfo.sc_nfsnum;
1061 perm = secp[i].s_flags;
1062
1063 access = nfsauth4_secinfo_access(exi, cs->req,
1064 flavor, perm);
1065
1066 if (! (access & NFSAUTH_DENIED) &&
1067 ! (access & NFSAUTH_WRONGSEC)) {
1068 flavor_list[ret_cnt] = flavor;
1069 ret_cnt++;
1070 }
1071 }
1072
1073 /* Create the returning SECINFO value */
1074 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1075
1076 for (i = 0; i < count; i++) {
1077 /*
1078 * If the flavor is in the flavor list,
1079 * fill in resok_val.
1080 */
1081 si = &secp[i].s_secinfo;
1082 if (in_flavor_list(si->sc_nfsnum,
1083 flavor_list, ret_cnt)) {
1084 resok_val[k].flavor = si->sc_rpcnum;
1085 if (resok_val[k].flavor == RPCSEC_GSS) {
1086 rpcsec_gss_info *info;
1087
1088 info = &resok_val[k].flavor_info;
1089 info->qop = si->sc_qop;
1090 info->service = (rpc_gss_svc_t)
1091 si->sc_service;
1092
1093 /* get oid opaque data */
1094 info->oid.sec_oid4_len =
1095 si->sc_gss_mech_type->length;
1096 info->oid.sec_oid4_val = kmem_alloc(
1097 si->sc_gss_mech_type->length,
1098 KM_SLEEP);
1099 bcopy(si->sc_gss_mech_type->elements,
1100 info->oid.sec_oid4_val,
1101 info->oid.sec_oid4_len);
1102 }
1103 k++;
1104 }
1105 if (k >= ret_cnt)
1106 break;
1107 }
1108 resp->SECINFO4resok_len = ret_cnt;
1109 resp->SECINFO4resok_val = resok_val;
1110 kmem_free(flavor_list, count * sizeof (int));
1111 }
1112
1113 VN_RELE(vp);
1114 return (NFS4_OK);
1115 }
1116
1117 /*
1118 * SECINFO (Operation 33): Obtain required security information on
1119 * the component name in the format of (security-mechanism-oid, qop, service)
1120 * triplets.
1121 */
1122 /* ARGSUSED */
1123 static void
rfs4_op_secinfo(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)1124 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1125 struct compound_state *cs)
1126 {
1127 SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1128 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1129 utf8string *utfnm = &args->name;
1130 uint_t len;
1131 char *nm;
1132 struct sockaddr *ca;
1133 char *name = NULL;
1134
1135 DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1136 SECINFO4args *, args);
1137
1138 /*
1139 * Current file handle (cfh) should have been set before getting
1140 * into this function. If not, return error.
1141 */
1142 if (cs->vp == NULL) {
1143 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1144 goto out;
1145 }
1146
1147 if (cs->vp->v_type != VDIR) {
1148 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1149 goto out;
1150 }
1151
1152 /*
1153 * Verify the component name. If failed, error out, but
1154 * do not error out if the component name is a "..".
1155 * SECINFO will return its parents secinfo data for SECINFO "..".
1156 */
1157 if (!utf8_dir_verify(utfnm)) {
1158 if (utfnm->utf8string_len != 2 ||
1159 utfnm->utf8string_val[0] != '.' ||
1160 utfnm->utf8string_val[1] != '.') {
1161 *cs->statusp = resp->status = NFS4ERR_INVAL;
1162 goto out;
1163 }
1164 }
1165
1166 nm = utf8_to_str(utfnm, &len, NULL);
1167 if (nm == NULL) {
1168 *cs->statusp = resp->status = NFS4ERR_INVAL;
1169 goto out;
1170 }
1171
1172 if (len > MAXNAMELEN) {
1173 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1174 kmem_free(nm, len);
1175 goto out;
1176 }
1177
1178 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1179 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1180 MAXPATHLEN + 1);
1181
1182 if (name == NULL) {
1183 *cs->statusp = resp->status = NFS4ERR_INVAL;
1184 kmem_free(nm, len);
1185 goto out;
1186 }
1187
1188
1189 *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1190
1191 if (name != nm)
1192 kmem_free(name, MAXPATHLEN + 1);
1193 kmem_free(nm, len);
1194
1195 out:
1196 DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1197 SECINFO4res *, resp);
1198 }
1199
1200 /*
1201 * Free SECINFO result.
1202 */
1203 /* ARGSUSED */
1204 static void
rfs4_op_secinfo_free(nfs_resop4 * resop)1205 rfs4_op_secinfo_free(nfs_resop4 *resop)
1206 {
1207 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1208 int count, i;
1209 secinfo4 *resok_val;
1210
1211 /* If this is not an Ok result, nothing to free. */
1212 if (resp->status != NFS4_OK) {
1213 return;
1214 }
1215
1216 count = resp->SECINFO4resok_len;
1217 resok_val = resp->SECINFO4resok_val;
1218
1219 for (i = 0; i < count; i++) {
1220 if (resok_val[i].flavor == RPCSEC_GSS) {
1221 rpcsec_gss_info *info;
1222
1223 info = &resok_val[i].flavor_info;
1224 kmem_free(info->oid.sec_oid4_val,
1225 info->oid.sec_oid4_len);
1226 }
1227 }
1228 kmem_free(resok_val, count * sizeof (secinfo4));
1229 resp->SECINFO4resok_len = 0;
1230 resp->SECINFO4resok_val = NULL;
1231 }
1232
1233 /* ARGSUSED */
1234 static void
rfs4_op_access(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)1235 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1236 struct compound_state *cs)
1237 {
1238 ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1239 ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1240 int error;
1241 vnode_t *vp;
1242 struct vattr va;
1243 int checkwriteperm;
1244 cred_t *cr = cs->cr;
1245 bslabel_t *clabel, *slabel;
1246 ts_label_t *tslabel;
1247 boolean_t admin_low_client;
1248
1249 DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1250 ACCESS4args *, args);
1251
1252 #if 0 /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1253 if (cs->access == CS_ACCESS_DENIED) {
1254 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1255 goto out;
1256 }
1257 #endif
1258 if (cs->vp == NULL) {
1259 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1260 goto out;
1261 }
1262
1263 ASSERT(cr != NULL);
1264
1265 vp = cs->vp;
1266
1267 /*
1268 * If the file system is exported read only, it is not appropriate
1269 * to check write permissions for regular files and directories.
1270 * Special files are interpreted by the client, so the underlying
1271 * permissions are sent back to the client for interpretation.
1272 */
1273 if (rdonly4(cs->exi, cs->vp, req) &&
1274 (vp->v_type == VREG || vp->v_type == VDIR))
1275 checkwriteperm = 0;
1276 else
1277 checkwriteperm = 1;
1278
1279 /*
1280 * XXX
1281 * We need the mode so that we can correctly determine access
1282 * permissions relative to a mandatory lock file. Access to
1283 * mandatory lock files is denied on the server, so it might
1284 * as well be reflected to the server during the open.
1285 */
1286 va.va_mask = AT_MODE;
1287 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1288 if (error) {
1289 *cs->statusp = resp->status = puterrno4(error);
1290 goto out;
1291 }
1292 resp->access = 0;
1293 resp->supported = 0;
1294
1295 if (is_system_labeled()) {
1296 ASSERT(req->rq_label != NULL);
1297 clabel = req->rq_label;
1298 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1299 "got client label from request(1)",
1300 struct svc_req *, req);
1301 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1302 if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1303 *cs->statusp = resp->status = puterrno4(EACCES);
1304 goto out;
1305 }
1306 slabel = label2bslabel(tslabel);
1307 DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1308 char *, "got server label(1) for vp(2)",
1309 bslabel_t *, slabel, vnode_t *, vp);
1310
1311 admin_low_client = B_FALSE;
1312 } else
1313 admin_low_client = B_TRUE;
1314 }
1315
1316 if (args->access & ACCESS4_READ) {
1317 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1318 if (!error && !MANDLOCK(vp, va.va_mode) &&
1319 (!is_system_labeled() || admin_low_client ||
1320 bldominates(clabel, slabel)))
1321 resp->access |= ACCESS4_READ;
1322 resp->supported |= ACCESS4_READ;
1323 }
1324 if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1325 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1326 if (!error && (!is_system_labeled() || admin_low_client ||
1327 bldominates(clabel, slabel)))
1328 resp->access |= ACCESS4_LOOKUP;
1329 resp->supported |= ACCESS4_LOOKUP;
1330 }
1331 if (checkwriteperm &&
1332 (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1333 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1334 if (!error && !MANDLOCK(vp, va.va_mode) &&
1335 (!is_system_labeled() || admin_low_client ||
1336 blequal(clabel, slabel)))
1337 resp->access |=
1338 (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1339 resp->supported |= (ACCESS4_MODIFY | ACCESS4_EXTEND);
1340 }
1341
1342 if (checkwriteperm &&
1343 (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1344 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1345 if (!error && (!is_system_labeled() || admin_low_client ||
1346 blequal(clabel, slabel)))
1347 resp->access |= ACCESS4_DELETE;
1348 resp->supported |= ACCESS4_DELETE;
1349 }
1350 if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1351 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1352 if (!error && !MANDLOCK(vp, va.va_mode) &&
1353 (!is_system_labeled() || admin_low_client ||
1354 bldominates(clabel, slabel)))
1355 resp->access |= ACCESS4_EXECUTE;
1356 resp->supported |= ACCESS4_EXECUTE;
1357 }
1358
1359 if (is_system_labeled() && !admin_low_client)
1360 label_rele(tslabel);
1361
1362 *cs->statusp = resp->status = NFS4_OK;
1363 out:
1364 DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1365 ACCESS4res *, resp);
1366 }
1367
1368 /* ARGSUSED */
1369 static void
rfs4_op_commit(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)1370 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1371 struct compound_state *cs)
1372 {
1373 COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1374 COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1375 int error;
1376 vnode_t *vp = cs->vp;
1377 cred_t *cr = cs->cr;
1378 vattr_t va;
1379
1380 DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1381 COMMIT4args *, args);
1382
1383 if (vp == NULL) {
1384 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1385 goto out;
1386 }
1387 if (cs->access == CS_ACCESS_DENIED) {
1388 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1389 goto out;
1390 }
1391
1392 if (args->offset + args->count < args->offset) {
1393 *cs->statusp = resp->status = NFS4ERR_INVAL;
1394 goto out;
1395 }
1396
1397 va.va_mask = AT_UID;
1398 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1399
1400 /*
1401 * If we can't get the attributes, then we can't do the
1402 * right access checking. So, we'll fail the request.
1403 */
1404 if (error) {
1405 *cs->statusp = resp->status = puterrno4(error);
1406 goto out;
1407 }
1408 if (rdonly4(cs->exi, cs->vp, req)) {
1409 *cs->statusp = resp->status = NFS4ERR_ROFS;
1410 goto out;
1411 }
1412
1413 if (vp->v_type != VREG) {
1414 if (vp->v_type == VDIR)
1415 resp->status = NFS4ERR_ISDIR;
1416 else
1417 resp->status = NFS4ERR_INVAL;
1418 *cs->statusp = resp->status;
1419 goto out;
1420 }
1421
1422 if (crgetuid(cr) != va.va_uid &&
1423 (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1424 *cs->statusp = resp->status = puterrno4(error);
1425 goto out;
1426 }
1427
1428 error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1429
1430 if (error) {
1431 *cs->statusp = resp->status = puterrno4(error);
1432 goto out;
1433 }
1434
1435 *cs->statusp = resp->status = NFS4_OK;
1436 resp->writeverf = Write4verf;
1437 out:
1438 DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1439 COMMIT4res *, resp);
1440 }
1441
1442 /*
1443 * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1444 * was completed. It does the nfsv4 create for special files.
1445 */
1446 /* ARGSUSED */
1447 static vnode_t *
do_rfs4_op_mknod(CREATE4args * args,CREATE4res * resp,struct svc_req * req,struct compound_state * cs,vattr_t * vap,char * nm)1448 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1449 struct compound_state *cs, vattr_t *vap, char *nm)
1450 {
1451 int error;
1452 cred_t *cr = cs->cr;
1453 vnode_t *dvp = cs->vp;
1454 vnode_t *vp = NULL;
1455 int mode;
1456 enum vcexcl excl;
1457
1458 switch (args->type) {
1459 case NF4CHR:
1460 case NF4BLK:
1461 if (secpolicy_sys_devices(cr) != 0) {
1462 *cs->statusp = resp->status = NFS4ERR_PERM;
1463 return (NULL);
1464 }
1465 if (args->type == NF4CHR)
1466 vap->va_type = VCHR;
1467 else
1468 vap->va_type = VBLK;
1469 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1470 args->ftype4_u.devdata.specdata2);
1471 vap->va_mask |= AT_RDEV;
1472 break;
1473 case NF4SOCK:
1474 vap->va_type = VSOCK;
1475 break;
1476 case NF4FIFO:
1477 vap->va_type = VFIFO;
1478 break;
1479 default:
1480 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1481 return (NULL);
1482 }
1483
1484 /*
1485 * Must specify the mode.
1486 */
1487 if (!(vap->va_mask & AT_MODE)) {
1488 *cs->statusp = resp->status = NFS4ERR_INVAL;
1489 return (NULL);
1490 }
1491
1492 excl = EXCL;
1493
1494 mode = 0;
1495
1496 error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1497 if (error) {
1498 *cs->statusp = resp->status = puterrno4(error);
1499 return (NULL);
1500 }
1501 return (vp);
1502 }
1503
1504 /*
1505 * nfsv4 create is used to create non-regular files. For regular files,
1506 * use nfsv4 open.
1507 */
1508 /* ARGSUSED */
1509 static void
rfs4_op_create(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)1510 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1511 struct compound_state *cs)
1512 {
1513 CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1514 CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1515 int error;
1516 struct vattr bva, iva, iva2, ava, *vap;
1517 cred_t *cr = cs->cr;
1518 vnode_t *dvp = cs->vp;
1519 vnode_t *vp = NULL;
1520 vnode_t *realvp;
1521 char *nm, *lnm;
1522 uint_t len, llen;
1523 int syncval = 0;
1524 struct nfs4_svgetit_arg sarg;
1525 struct nfs4_ntov_table ntov;
1526 struct statvfs64 sb;
1527 nfsstat4 status;
1528 struct sockaddr *ca;
1529 char *name = NULL;
1530 char *lname = NULL;
1531
1532 DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1533 CREATE4args *, args);
1534
1535 resp->attrset = 0;
1536
1537 if (dvp == NULL) {
1538 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1539 goto out;
1540 }
1541
1542 /*
1543 * If there is an unshared filesystem mounted on this vnode,
1544 * do not allow to create an object in this directory.
1545 */
1546 if (vn_ismntpt(dvp)) {
1547 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1548 goto out;
1549 }
1550
1551 /* Verify that type is correct */
1552 switch (args->type) {
1553 case NF4LNK:
1554 case NF4BLK:
1555 case NF4CHR:
1556 case NF4SOCK:
1557 case NF4FIFO:
1558 case NF4DIR:
1559 break;
1560 default:
1561 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1562 goto out;
1563 };
1564
1565 if (cs->access == CS_ACCESS_DENIED) {
1566 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1567 goto out;
1568 }
1569 if (dvp->v_type != VDIR) {
1570 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1571 goto out;
1572 }
1573 if (!utf8_dir_verify(&args->objname)) {
1574 *cs->statusp = resp->status = NFS4ERR_INVAL;
1575 goto out;
1576 }
1577
1578 if (rdonly4(cs->exi, cs->vp, req)) {
1579 *cs->statusp = resp->status = NFS4ERR_ROFS;
1580 goto out;
1581 }
1582
1583 /*
1584 * Name of newly created object
1585 */
1586 nm = utf8_to_fn(&args->objname, &len, NULL);
1587 if (nm == NULL) {
1588 *cs->statusp = resp->status = NFS4ERR_INVAL;
1589 goto out;
1590 }
1591
1592 if (len > MAXNAMELEN) {
1593 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1594 kmem_free(nm, len);
1595 goto out;
1596 }
1597
1598 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1599 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1600 MAXPATHLEN + 1);
1601
1602 if (name == NULL) {
1603 *cs->statusp = resp->status = NFS4ERR_INVAL;
1604 kmem_free(nm, len);
1605 goto out;
1606 }
1607
1608 resp->attrset = 0;
1609
1610 sarg.sbp = &sb;
1611 sarg.is_referral = B_FALSE;
1612 nfs4_ntov_table_init(&ntov);
1613
1614 status = do_rfs4_set_attrs(&resp->attrset,
1615 &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1616
1617 if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1618 status = NFS4ERR_INVAL;
1619
1620 if (status != NFS4_OK) {
1621 *cs->statusp = resp->status = status;
1622 if (name != nm)
1623 kmem_free(name, MAXPATHLEN + 1);
1624 kmem_free(nm, len);
1625 nfs4_ntov_table_free(&ntov, &sarg);
1626 resp->attrset = 0;
1627 goto out;
1628 }
1629
1630 /* Get "before" change value */
1631 bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1632 error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1633 if (error) {
1634 *cs->statusp = resp->status = puterrno4(error);
1635 if (name != nm)
1636 kmem_free(name, MAXPATHLEN + 1);
1637 kmem_free(nm, len);
1638 nfs4_ntov_table_free(&ntov, &sarg);
1639 resp->attrset = 0;
1640 goto out;
1641 }
1642 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1643
1644 vap = sarg.vap;
1645
1646 /*
1647 * Set the default initial values for attributes when the parent
1648 * directory does not have the VSUID/VSGID bit set and they have
1649 * not been specified in createattrs.
1650 */
1651 if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1652 vap->va_uid = crgetuid(cr);
1653 vap->va_mask |= AT_UID;
1654 }
1655 if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1656 vap->va_gid = crgetgid(cr);
1657 vap->va_mask |= AT_GID;
1658 }
1659
1660 vap->va_mask |= AT_TYPE;
1661 switch (args->type) {
1662 case NF4DIR:
1663 vap->va_type = VDIR;
1664 if ((vap->va_mask & AT_MODE) == 0) {
1665 vap->va_mode = 0700; /* default: owner rwx only */
1666 vap->va_mask |= AT_MODE;
1667 }
1668 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1669 if (error)
1670 break;
1671
1672 /*
1673 * Get the initial "after" sequence number, if it fails,
1674 * set to zero
1675 */
1676 iva.va_mask = AT_SEQ;
1677 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1678 iva.va_seq = 0;
1679 break;
1680 case NF4LNK:
1681 vap->va_type = VLNK;
1682 if ((vap->va_mask & AT_MODE) == 0) {
1683 vap->va_mode = 0700; /* default: owner rwx only */
1684 vap->va_mask |= AT_MODE;
1685 }
1686
1687 /*
1688 * symlink names must be treated as data
1689 */
1690 lnm = utf8_to_str(&args->ftype4_u.linkdata, &llen, NULL);
1691
1692 if (lnm == NULL) {
1693 *cs->statusp = resp->status = NFS4ERR_INVAL;
1694 if (name != nm)
1695 kmem_free(name, MAXPATHLEN + 1);
1696 kmem_free(nm, len);
1697 nfs4_ntov_table_free(&ntov, &sarg);
1698 resp->attrset = 0;
1699 goto out;
1700 }
1701
1702 if (llen > MAXPATHLEN) {
1703 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1704 if (name != nm)
1705 kmem_free(name, MAXPATHLEN + 1);
1706 kmem_free(nm, len);
1707 kmem_free(lnm, llen);
1708 nfs4_ntov_table_free(&ntov, &sarg);
1709 resp->attrset = 0;
1710 goto out;
1711 }
1712
1713 lname = nfscmd_convname(ca, cs->exi, lnm,
1714 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1715
1716 if (lname == NULL) {
1717 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1718 if (name != nm)
1719 kmem_free(name, MAXPATHLEN + 1);
1720 kmem_free(nm, len);
1721 kmem_free(lnm, llen);
1722 nfs4_ntov_table_free(&ntov, &sarg);
1723 resp->attrset = 0;
1724 goto out;
1725 }
1726
1727 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1728 if (lname != lnm)
1729 kmem_free(lname, MAXPATHLEN + 1);
1730 kmem_free(lnm, llen);
1731 if (error)
1732 break;
1733
1734 /*
1735 * Get the initial "after" sequence number, if it fails,
1736 * set to zero
1737 */
1738 iva.va_mask = AT_SEQ;
1739 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1740 iva.va_seq = 0;
1741
1742 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1743 NULL, NULL, NULL);
1744 if (error)
1745 break;
1746
1747 /*
1748 * va_seq is not safe over VOP calls, check it again
1749 * if it has changed zero out iva to force atomic = FALSE.
1750 */
1751 iva2.va_mask = AT_SEQ;
1752 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1753 iva2.va_seq != iva.va_seq)
1754 iva.va_seq = 0;
1755 break;
1756 default:
1757 /*
1758 * probably a special file.
1759 */
1760 if ((vap->va_mask & AT_MODE) == 0) {
1761 vap->va_mode = 0600; /* default: owner rw only */
1762 vap->va_mask |= AT_MODE;
1763 }
1764 syncval = FNODSYNC;
1765 /*
1766 * We know this will only generate one VOP call
1767 */
1768 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1769
1770 if (vp == NULL) {
1771 if (name != nm)
1772 kmem_free(name, MAXPATHLEN + 1);
1773 kmem_free(nm, len);
1774 nfs4_ntov_table_free(&ntov, &sarg);
1775 resp->attrset = 0;
1776 goto out;
1777 }
1778
1779 /*
1780 * Get the initial "after" sequence number, if it fails,
1781 * set to zero
1782 */
1783 iva.va_mask = AT_SEQ;
1784 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1785 iva.va_seq = 0;
1786
1787 break;
1788 }
1789 if (name != nm)
1790 kmem_free(name, MAXPATHLEN + 1);
1791 kmem_free(nm, len);
1792
1793 if (error) {
1794 *cs->statusp = resp->status = puterrno4(error);
1795 }
1796
1797 /*
1798 * Force modified data and metadata out to stable storage.
1799 */
1800 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1801
1802 if (resp->status != NFS4_OK) {
1803 if (vp != NULL)
1804 VN_RELE(vp);
1805 nfs4_ntov_table_free(&ntov, &sarg);
1806 resp->attrset = 0;
1807 goto out;
1808 }
1809
1810 /*
1811 * Finish setup of cinfo response, "before" value already set.
1812 * Get "after" change value, if it fails, simply return the
1813 * before value.
1814 */
1815 ava.va_mask = AT_CTIME|AT_SEQ;
1816 if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1817 ava.va_ctime = bva.va_ctime;
1818 ava.va_seq = 0;
1819 }
1820 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1821
1822 /*
1823 * True verification that object was created with correct
1824 * attrs is impossible. The attrs could have been changed
1825 * immediately after object creation. If attributes did
1826 * not verify, the only recourse for the server is to
1827 * destroy the object. Maybe if some attrs (like gid)
1828 * are set incorrectly, the object should be destroyed;
1829 * however, seems bad as a default policy. Do we really
1830 * want to destroy an object over one of the times not
1831 * verifying correctly? For these reasons, the server
1832 * currently sets bits in attrset for createattrs
1833 * that were set; however, no verification is done.
1834 *
1835 * vmask_to_nmask accounts for vattr bits set on create
1836 * [do_rfs4_set_attrs() only sets resp bits for
1837 * non-vattr/vfs bits.]
1838 * Mask off any bits set by default so as not to return
1839 * more attrset bits than were requested in createattrs
1840 */
1841 nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1842 resp->attrset &= args->createattrs.attrmask;
1843 nfs4_ntov_table_free(&ntov, &sarg);
1844
1845 error = makefh4(&cs->fh, vp, cs->exi);
1846 if (error) {
1847 *cs->statusp = resp->status = puterrno4(error);
1848 }
1849
1850 /*
1851 * The cinfo.atomic = TRUE only if we got no errors, we have
1852 * non-zero va_seq's, and it has incremented by exactly one
1853 * during the creation and it didn't change during the VOP_LOOKUP
1854 * or VOP_FSYNC.
1855 */
1856 if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1857 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1858 resp->cinfo.atomic = TRUE;
1859 else
1860 resp->cinfo.atomic = FALSE;
1861
1862 /*
1863 * Force modified metadata out to stable storage.
1864 *
1865 * if a underlying vp exists, pass it to VOP_FSYNC
1866 */
1867 if (VOP_REALVP(vp, &realvp, NULL) == 0)
1868 (void) VOP_FSYNC(realvp, syncval, cr, NULL);
1869 else
1870 (void) VOP_FSYNC(vp, syncval, cr, NULL);
1871
1872 if (resp->status != NFS4_OK) {
1873 VN_RELE(vp);
1874 goto out;
1875 }
1876 if (cs->vp)
1877 VN_RELE(cs->vp);
1878
1879 cs->vp = vp;
1880 *cs->statusp = resp->status = NFS4_OK;
1881 out:
1882 DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1883 CREATE4res *, resp);
1884 }
1885
1886 /*ARGSUSED*/
1887 static void
rfs4_op_delegpurge(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)1888 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1889 struct compound_state *cs)
1890 {
1891 DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1892 DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1893
1894 rfs4_op_inval(argop, resop, req, cs);
1895
1896 DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1897 DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1898 }
1899
1900 /*ARGSUSED*/
1901 static void
rfs4_op_delegreturn(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)1902 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1903 struct compound_state *cs)
1904 {
1905 DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1906 DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1907 rfs4_deleg_state_t *dsp;
1908 nfsstat4 status;
1909
1910 DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
1911 DELEGRETURN4args *, args);
1912
1913 status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
1914 resp->status = *cs->statusp = status;
1915 if (status != NFS4_OK)
1916 goto out;
1917
1918 /* Ensure specified filehandle matches */
1919 if (cs->vp != dsp->rds_finfo->rf_vp) {
1920 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
1921 } else
1922 rfs4_return_deleg(dsp, FALSE);
1923
1924 rfs4_update_lease(dsp->rds_client);
1925
1926 rfs4_deleg_state_rele(dsp);
1927 out:
1928 DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
1929 DELEGRETURN4res *, resp);
1930 }
1931
1932 /*
1933 * Check to see if a given "flavor" is an explicitly shared flavor.
1934 * The assumption of this routine is the "flavor" is already a valid
1935 * flavor in the secinfo list of "exi".
1936 *
1937 * e.g.
1938 * # share -o sec=flavor1 /export
1939 * # share -o sec=flavor2 /export/home
1940 *
1941 * flavor2 is not an explicitly shared flavor for /export,
1942 * however it is in the secinfo list for /export thru the
1943 * server namespace setup.
1944 */
1945 int
is_exported_sec(int flavor,struct exportinfo * exi)1946 is_exported_sec(int flavor, struct exportinfo *exi)
1947 {
1948 int i;
1949 struct secinfo *sp;
1950
1951 sp = exi->exi_export.ex_secinfo;
1952 for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
1953 if (flavor == sp[i].s_secinfo.sc_nfsnum ||
1954 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
1955 return (SEC_REF_EXPORTED(&sp[i]));
1956 }
1957 }
1958
1959 /* Should not reach this point based on the assumption */
1960 return (0);
1961 }
1962
1963 /*
1964 * Check if the security flavor used in the request matches what is
1965 * required at the export point or at the root pseudo node (exi_root).
1966 *
1967 * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
1968 *
1969 */
1970 static int
secinfo_match_or_authnone(struct compound_state * cs)1971 secinfo_match_or_authnone(struct compound_state *cs)
1972 {
1973 int i;
1974 struct secinfo *sp;
1975
1976 /*
1977 * Check cs->nfsflavor (from the request) against
1978 * the current export data in cs->exi.
1979 */
1980 sp = cs->exi->exi_export.ex_secinfo;
1981 for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
1982 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
1983 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
1984 return (1);
1985 }
1986
1987 return (0);
1988 }
1989
1990 /*
1991 * Check the access authority for the client and return the correct error.
1992 */
1993 nfsstat4
call_checkauth4(struct compound_state * cs,struct svc_req * req)1994 call_checkauth4(struct compound_state *cs, struct svc_req *req)
1995 {
1996 int authres;
1997
1998 /*
1999 * First, check if the security flavor used in the request
2000 * are among the flavors set in the server namespace.
2001 */
2002 if (!secinfo_match_or_authnone(cs)) {
2003 *cs->statusp = NFS4ERR_WRONGSEC;
2004 return (*cs->statusp);
2005 }
2006
2007 authres = checkauth4(cs, req);
2008
2009 if (authres > 0) {
2010 *cs->statusp = NFS4_OK;
2011 if (! (cs->access & CS_ACCESS_LIMITED))
2012 cs->access = CS_ACCESS_OK;
2013 } else if (authres == 0) {
2014 *cs->statusp = NFS4ERR_ACCESS;
2015 } else if (authres == -2) {
2016 *cs->statusp = NFS4ERR_WRONGSEC;
2017 } else {
2018 *cs->statusp = NFS4ERR_DELAY;
2019 }
2020 return (*cs->statusp);
2021 }
2022
2023 /*
2024 * bitmap4_to_attrmask is called by getattr and readdir.
2025 * It sets up the vattr mask and determines whether vfsstat call is needed
2026 * based on the input bitmap.
2027 * Returns nfsv4 status.
2028 */
2029 static nfsstat4
bitmap4_to_attrmask(bitmap4 breq,struct nfs4_svgetit_arg * sargp)2030 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2031 {
2032 int i;
2033 uint_t va_mask;
2034 struct statvfs64 *sbp = sargp->sbp;
2035
2036 sargp->sbp = NULL;
2037 sargp->flag = 0;
2038 sargp->rdattr_error = NFS4_OK;
2039 sargp->mntdfid_set = FALSE;
2040 if (sargp->cs->vp)
2041 sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2042 FH4_ATTRDIR | FH4_NAMEDATTR);
2043 else
2044 sargp->xattr = 0;
2045
2046 /*
2047 * Set rdattr_error_req to true if return error per
2048 * failed entry rather than fail the readdir.
2049 */
2050 if (breq & FATTR4_RDATTR_ERROR_MASK)
2051 sargp->rdattr_error_req = 1;
2052 else
2053 sargp->rdattr_error_req = 0;
2054
2055 /*
2056 * generate the va_mask
2057 * Handle the easy cases first
2058 */
2059 switch (breq) {
2060 case NFS4_NTOV_ATTR_MASK:
2061 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2062 return (NFS4_OK);
2063
2064 case NFS4_FS_ATTR_MASK:
2065 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2066 sargp->sbp = sbp;
2067 return (NFS4_OK);
2068
2069 case NFS4_NTOV_ATTR_CACHE_MASK:
2070 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2071 return (NFS4_OK);
2072
2073 case FATTR4_LEASE_TIME_MASK:
2074 sargp->vap->va_mask = 0;
2075 return (NFS4_OK);
2076
2077 default:
2078 va_mask = 0;
2079 for (i = 0; i < nfs4_ntov_map_size; i++) {
2080 if ((breq & nfs4_ntov_map[i].fbit) &&
2081 nfs4_ntov_map[i].vbit)
2082 va_mask |= nfs4_ntov_map[i].vbit;
2083 }
2084
2085 /*
2086 * Check is vfsstat is needed
2087 */
2088 if (breq & NFS4_FS_ATTR_MASK)
2089 sargp->sbp = sbp;
2090
2091 sargp->vap->va_mask = va_mask;
2092 return (NFS4_OK);
2093 }
2094 /* NOTREACHED */
2095 }
2096
2097 /*
2098 * bitmap4_get_sysattrs is called by getattr and readdir.
2099 * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2100 * Returns nfsv4 status.
2101 */
2102 static nfsstat4
bitmap4_get_sysattrs(struct nfs4_svgetit_arg * sargp)2103 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2104 {
2105 int error;
2106 struct compound_state *cs = sargp->cs;
2107 vnode_t *vp = cs->vp;
2108
2109 if (sargp->sbp != NULL) {
2110 if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2111 sargp->sbp = NULL; /* to identify error */
2112 return (puterrno4(error));
2113 }
2114 }
2115
2116 return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2117 }
2118
2119 static void
nfs4_ntov_table_init(struct nfs4_ntov_table * ntovp)2120 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2121 {
2122 ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2123 KM_SLEEP);
2124 ntovp->attrcnt = 0;
2125 ntovp->vfsstat = FALSE;
2126 }
2127
2128 static void
nfs4_ntov_table_free(struct nfs4_ntov_table * ntovp,struct nfs4_svgetit_arg * sargp)2129 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2130 struct nfs4_svgetit_arg *sargp)
2131 {
2132 int i;
2133 union nfs4_attr_u *na;
2134 uint8_t *amap;
2135
2136 /*
2137 * XXX Should do the same checks for whether the bit is set
2138 */
2139 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2140 i < ntovp->attrcnt; i++, na++, amap++) {
2141 (void) (*nfs4_ntov_map[*amap].sv_getit)(
2142 NFS4ATTR_FREEIT, sargp, na);
2143 }
2144 if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2145 /*
2146 * xdr_free for getattr will be done later
2147 */
2148 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2149 i < ntovp->attrcnt; i++, na++, amap++) {
2150 xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2151 }
2152 }
2153 kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2154 }
2155
2156 /*
2157 * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2158 */
2159 static nfsstat4
do_rfs4_op_getattr(bitmap4 breq,fattr4 * fattrp,struct nfs4_svgetit_arg * sargp)2160 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2161 struct nfs4_svgetit_arg *sargp)
2162 {
2163 int error = 0;
2164 int i, k;
2165 struct nfs4_ntov_table ntov;
2166 XDR xdr;
2167 ulong_t xdr_size;
2168 char *xdr_attrs;
2169 nfsstat4 status = NFS4_OK;
2170 nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2171 union nfs4_attr_u *na;
2172 uint8_t *amap;
2173
2174 sargp->op = NFS4ATTR_GETIT;
2175 sargp->flag = 0;
2176
2177 fattrp->attrmask = 0;
2178 /* if no bits requested, then return empty fattr4 */
2179 if (breq == 0) {
2180 fattrp->attrlist4_len = 0;
2181 fattrp->attrlist4 = NULL;
2182 return (NFS4_OK);
2183 }
2184
2185 /*
2186 * return NFS4ERR_INVAL when client requests write-only attrs
2187 */
2188 if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2189 return (NFS4ERR_INVAL);
2190
2191 nfs4_ntov_table_init(&ntov);
2192 na = ntov.na;
2193 amap = ntov.amap;
2194
2195 /*
2196 * Now loop to get or verify the attrs
2197 */
2198 for (i = 0; i < nfs4_ntov_map_size; i++) {
2199 if (breq & nfs4_ntov_map[i].fbit) {
2200 if ((*nfs4_ntov_map[i].sv_getit)(
2201 NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2202
2203 error = (*nfs4_ntov_map[i].sv_getit)(
2204 NFS4ATTR_GETIT, sargp, na);
2205
2206 /*
2207 * Possible error values:
2208 * >0 if sv_getit failed to
2209 * get the attr; 0 if succeeded;
2210 * <0 if rdattr_error and the
2211 * attribute cannot be returned.
2212 */
2213 if (error && !(sargp->rdattr_error_req))
2214 goto done;
2215 /*
2216 * If error then just for entry
2217 */
2218 if (error == 0) {
2219 fattrp->attrmask |=
2220 nfs4_ntov_map[i].fbit;
2221 *amap++ =
2222 (uint8_t)nfs4_ntov_map[i].nval;
2223 na++;
2224 (ntov.attrcnt)++;
2225 } else if ((error > 0) &&
2226 (sargp->rdattr_error == NFS4_OK)) {
2227 sargp->rdattr_error = puterrno4(error);
2228 }
2229 error = 0;
2230 }
2231 }
2232 }
2233
2234 /*
2235 * If rdattr_error was set after the return value for it was assigned,
2236 * update it.
2237 */
2238 if (prev_rdattr_error != sargp->rdattr_error) {
2239 na = ntov.na;
2240 amap = ntov.amap;
2241 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2242 k = *amap;
2243 if (k < FATTR4_RDATTR_ERROR) {
2244 continue;
2245 }
2246 if ((k == FATTR4_RDATTR_ERROR) &&
2247 ((*nfs4_ntov_map[k].sv_getit)(
2248 NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2249
2250 (void) (*nfs4_ntov_map[k].sv_getit)(
2251 NFS4ATTR_GETIT, sargp, na);
2252 }
2253 break;
2254 }
2255 }
2256
2257 xdr_size = 0;
2258 na = ntov.na;
2259 amap = ntov.amap;
2260 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2261 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2262 }
2263
2264 fattrp->attrlist4_len = xdr_size;
2265 if (xdr_size) {
2266 /* freed by rfs4_op_getattr_free() */
2267 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2268
2269 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2270
2271 na = ntov.na;
2272 amap = ntov.amap;
2273 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2274 if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2275 DTRACE_PROBE1(nfss__e__getattr4_encfail,
2276 int, *amap);
2277 status = NFS4ERR_SERVERFAULT;
2278 break;
2279 }
2280 }
2281 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
2282 } else {
2283 fattrp->attrlist4 = NULL;
2284 }
2285 done:
2286
2287 nfs4_ntov_table_free(&ntov, sargp);
2288
2289 if (error != 0)
2290 status = puterrno4(error);
2291
2292 return (status);
2293 }
2294
2295 /* ARGSUSED */
2296 static void
rfs4_op_getattr(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2297 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2298 struct compound_state *cs)
2299 {
2300 GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2301 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2302 struct nfs4_svgetit_arg sarg;
2303 struct statvfs64 sb;
2304 nfsstat4 status;
2305
2306 DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2307 GETATTR4args *, args);
2308
2309 if (cs->vp == NULL) {
2310 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2311 goto out;
2312 }
2313
2314 if (cs->access == CS_ACCESS_DENIED) {
2315 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2316 goto out;
2317 }
2318
2319 sarg.sbp = &sb;
2320 sarg.cs = cs;
2321 sarg.is_referral = B_FALSE;
2322
2323 status = bitmap4_to_attrmask(args->attr_request, &sarg);
2324 if (status == NFS4_OK) {
2325
2326 status = bitmap4_get_sysattrs(&sarg);
2327 if (status == NFS4_OK) {
2328
2329 /* Is this a referral? */
2330 if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2331 /* Older V4 Solaris client sees a link */
2332 if (client_is_downrev(req))
2333 sarg.vap->va_type = VLNK;
2334 else
2335 sarg.is_referral = B_TRUE;
2336 }
2337
2338 status = do_rfs4_op_getattr(args->attr_request,
2339 &resp->obj_attributes, &sarg);
2340 }
2341 }
2342 *cs->statusp = resp->status = status;
2343 out:
2344 DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2345 GETATTR4res *, resp);
2346 }
2347
2348 static void
rfs4_op_getattr_free(nfs_resop4 * resop)2349 rfs4_op_getattr_free(nfs_resop4 *resop)
2350 {
2351 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2352
2353 nfs4_fattr4_free(&resp->obj_attributes);
2354 }
2355
2356 /* ARGSUSED */
2357 static void
rfs4_op_getfh(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2358 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2359 struct compound_state *cs)
2360 {
2361 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2362
2363 DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2364
2365 if (cs->vp == NULL) {
2366 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2367 goto out;
2368 }
2369 if (cs->access == CS_ACCESS_DENIED) {
2370 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2371 goto out;
2372 }
2373
2374 /* check for reparse point at the share point */
2375 if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2376 /* it's all bad */
2377 cs->exi->exi_moved = 1;
2378 *cs->statusp = resp->status = NFS4ERR_MOVED;
2379 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2380 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2381 return;
2382 }
2383
2384 /* check for reparse point at vp */
2385 if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2386 /* it's not all bad */
2387 *cs->statusp = resp->status = NFS4ERR_MOVED;
2388 DTRACE_PROBE2(nfs4serv__func__referral__moved,
2389 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2390 return;
2391 }
2392
2393 resp->object.nfs_fh4_val =
2394 kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2395 nfs_fh4_copy(&cs->fh, &resp->object);
2396 *cs->statusp = resp->status = NFS4_OK;
2397 out:
2398 DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2399 GETFH4res *, resp);
2400 }
2401
2402 static void
rfs4_op_getfh_free(nfs_resop4 * resop)2403 rfs4_op_getfh_free(nfs_resop4 *resop)
2404 {
2405 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2406
2407 if (resp->status == NFS4_OK &&
2408 resp->object.nfs_fh4_val != NULL) {
2409 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2410 resp->object.nfs_fh4_val = NULL;
2411 resp->object.nfs_fh4_len = 0;
2412 }
2413 }
2414
2415 /*
2416 * illegal: args: void
2417 * res : status (NFS4ERR_OP_ILLEGAL)
2418 */
2419 /* ARGSUSED */
2420 static void
rfs4_op_illegal(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2421 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2422 struct svc_req *req, struct compound_state *cs)
2423 {
2424 ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2425
2426 resop->resop = OP_ILLEGAL;
2427 *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2428 }
2429
2430 /*
2431 * link: args: SAVED_FH: file, CURRENT_FH: target directory
2432 * res: status. If success - CURRENT_FH unchanged, return change_info
2433 */
2434 /* ARGSUSED */
2435 static void
rfs4_op_link(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2436 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2437 struct compound_state *cs)
2438 {
2439 LINK4args *args = &argop->nfs_argop4_u.oplink;
2440 LINK4res *resp = &resop->nfs_resop4_u.oplink;
2441 int error;
2442 vnode_t *vp;
2443 vnode_t *dvp;
2444 struct vattr bdva, idva, adva;
2445 char *nm;
2446 uint_t len;
2447 struct sockaddr *ca;
2448 char *name = NULL;
2449
2450 DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2451 LINK4args *, args);
2452
2453 /* SAVED_FH: source object */
2454 vp = cs->saved_vp;
2455 if (vp == NULL) {
2456 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2457 goto out;
2458 }
2459
2460 /* CURRENT_FH: target directory */
2461 dvp = cs->vp;
2462 if (dvp == NULL) {
2463 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2464 goto out;
2465 }
2466
2467 /*
2468 * If there is a non-shared filesystem mounted on this vnode,
2469 * do not allow to link any file in this directory.
2470 */
2471 if (vn_ismntpt(dvp)) {
2472 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2473 goto out;
2474 }
2475
2476 if (cs->access == CS_ACCESS_DENIED) {
2477 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2478 goto out;
2479 }
2480
2481 /* Check source object's type validity */
2482 if (vp->v_type == VDIR) {
2483 *cs->statusp = resp->status = NFS4ERR_ISDIR;
2484 goto out;
2485 }
2486
2487 /* Check target directory's type */
2488 if (dvp->v_type != VDIR) {
2489 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2490 goto out;
2491 }
2492
2493 if (cs->saved_exi != cs->exi) {
2494 *cs->statusp = resp->status = NFS4ERR_XDEV;
2495 goto out;
2496 }
2497
2498 if (!utf8_dir_verify(&args->newname)) {
2499 *cs->statusp = resp->status = NFS4ERR_INVAL;
2500 goto out;
2501 }
2502
2503 nm = utf8_to_fn(&args->newname, &len, NULL);
2504 if (nm == NULL) {
2505 *cs->statusp = resp->status = NFS4ERR_INVAL;
2506 goto out;
2507 }
2508
2509 if (len > MAXNAMELEN) {
2510 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2511 kmem_free(nm, len);
2512 goto out;
2513 }
2514
2515 if (rdonly4(cs->exi, cs->vp, req)) {
2516 *cs->statusp = resp->status = NFS4ERR_ROFS;
2517 kmem_free(nm, len);
2518 goto out;
2519 }
2520
2521 /* Get "before" change value */
2522 bdva.va_mask = AT_CTIME|AT_SEQ;
2523 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2524 if (error) {
2525 *cs->statusp = resp->status = puterrno4(error);
2526 kmem_free(nm, len);
2527 goto out;
2528 }
2529
2530 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2531 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2532 MAXPATHLEN + 1);
2533
2534 if (name == NULL) {
2535 *cs->statusp = resp->status = NFS4ERR_INVAL;
2536 kmem_free(nm, len);
2537 goto out;
2538 }
2539
2540 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2541
2542 error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2543
2544 if (nm != name)
2545 kmem_free(name, MAXPATHLEN + 1);
2546 kmem_free(nm, len);
2547
2548 /*
2549 * Get the initial "after" sequence number, if it fails, set to zero
2550 */
2551 idva.va_mask = AT_SEQ;
2552 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2553 idva.va_seq = 0;
2554
2555 /*
2556 * Force modified data and metadata out to stable storage.
2557 */
2558 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2559 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2560
2561 if (error) {
2562 *cs->statusp = resp->status = puterrno4(error);
2563 goto out;
2564 }
2565
2566 /*
2567 * Get "after" change value, if it fails, simply return the
2568 * before value.
2569 */
2570 adva.va_mask = AT_CTIME|AT_SEQ;
2571 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2572 adva.va_ctime = bdva.va_ctime;
2573 adva.va_seq = 0;
2574 }
2575
2576 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2577
2578 /*
2579 * The cinfo.atomic = TRUE only if we have
2580 * non-zero va_seq's, and it has incremented by exactly one
2581 * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2582 */
2583 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2584 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2585 resp->cinfo.atomic = TRUE;
2586 else
2587 resp->cinfo.atomic = FALSE;
2588
2589 *cs->statusp = resp->status = NFS4_OK;
2590 out:
2591 DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2592 LINK4res *, resp);
2593 }
2594
2595 /*
2596 * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2597 */
2598
2599 /* ARGSUSED */
2600 static nfsstat4
do_rfs4_op_lookup(char * nm,struct svc_req * req,struct compound_state * cs)2601 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2602 {
2603 int error;
2604 int different_export = 0;
2605 vnode_t *vp, *tvp, *pre_tvp = NULL, *oldvp = NULL;
2606 struct exportinfo *exi = NULL, *pre_exi = NULL;
2607 nfsstat4 stat;
2608 fid_t fid;
2609 int attrdir, dotdot, walk;
2610 bool_t is_newvp = FALSE;
2611
2612 if (cs->vp->v_flag & V_XATTRDIR) {
2613 attrdir = 1;
2614 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2615 } else {
2616 attrdir = 0;
2617 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2618 }
2619
2620 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2621
2622 /*
2623 * If dotdotting, then need to check whether it's
2624 * above the root of a filesystem, or above an
2625 * export point.
2626 */
2627 if (dotdot) {
2628
2629 /*
2630 * If dotdotting at the root of a filesystem, then
2631 * need to traverse back to the mounted-on filesystem
2632 * and do the dotdot lookup there.
2633 */
2634 if (cs->vp->v_flag & VROOT) {
2635
2636 /*
2637 * If at the system root, then can
2638 * go up no further.
2639 */
2640 if (VN_CMP(cs->vp, rootdir))
2641 return (puterrno4(ENOENT));
2642
2643 /*
2644 * Traverse back to the mounted-on filesystem
2645 */
2646 cs->vp = untraverse(cs->vp);
2647
2648 /*
2649 * Set the different_export flag so we remember
2650 * to pick up a new exportinfo entry for
2651 * this new filesystem.
2652 */
2653 different_export = 1;
2654 } else {
2655
2656 /*
2657 * If dotdotting above an export point then set
2658 * the different_export to get new export info.
2659 */
2660 different_export = nfs_exported(cs->exi, cs->vp);
2661 }
2662 }
2663
2664 error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2665 NULL, NULL, NULL);
2666 if (error)
2667 return (puterrno4(error));
2668
2669 /*
2670 * If the vnode is in a pseudo filesystem, check whether it is visible.
2671 *
2672 * XXX if the vnode is a symlink and it is not visible in
2673 * a pseudo filesystem, return ENOENT (not following symlink).
2674 * V4 client can not mount such symlink. This is a regression
2675 * from V2/V3.
2676 *
2677 * In the same exported filesystem, if the security flavor used
2678 * is not an explicitly shared flavor, limit the view to the visible
2679 * list entries only. This is not a WRONGSEC case because it's already
2680 * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2681 */
2682 if (!different_export &&
2683 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2684 cs->access & CS_ACCESS_LIMITED)) {
2685 if (! nfs_visible(cs->exi, vp, &different_export)) {
2686 VN_RELE(vp);
2687 return (puterrno4(ENOENT));
2688 }
2689 }
2690
2691 /*
2692 * If it's a mountpoint, then traverse it.
2693 */
2694 if (vn_ismntpt(vp)) {
2695 pre_exi = cs->exi; /* save pre-traversed exportinfo */
2696 pre_tvp = vp; /* save pre-traversed vnode */
2697
2698 /*
2699 * hold pre_tvp to counteract rele by traverse. We will
2700 * need pre_tvp below if checkexport4 fails
2701 */
2702 VN_HOLD(pre_tvp);
2703 tvp = vp;
2704 if ((error = traverse(&tvp)) != 0) {
2705 VN_RELE(vp);
2706 VN_RELE(pre_tvp);
2707 return (puterrno4(error));
2708 }
2709 vp = tvp;
2710 different_export = 1;
2711 } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2712 /*
2713 * The vfsp comparison is to handle the case where
2714 * a LOFS mount is shared. lo_lookup traverses mount points,
2715 * and NFS is unaware of local fs transistions because
2716 * v_vfsmountedhere isn't set. For this special LOFS case,
2717 * the dir and the obj returned by lookup will have different
2718 * vfs ptrs.
2719 */
2720 different_export = 1;
2721 }
2722
2723 if (different_export) {
2724
2725 bzero(&fid, sizeof (fid));
2726 fid.fid_len = MAXFIDSZ;
2727 error = vop_fid_pseudo(vp, &fid);
2728 if (error) {
2729 VN_RELE(vp);
2730 if (pre_tvp)
2731 VN_RELE(pre_tvp);
2732 return (puterrno4(error));
2733 }
2734
2735 if (dotdot)
2736 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2737 else
2738 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2739
2740 if (exi == NULL) {
2741 if (pre_tvp) {
2742 /*
2743 * If this vnode is a mounted-on vnode,
2744 * but the mounted-on file system is not
2745 * exported, send back the filehandle for
2746 * the mounted-on vnode, not the root of
2747 * the mounted-on file system.
2748 */
2749 VN_RELE(vp);
2750 vp = pre_tvp;
2751 exi = pre_exi;
2752 } else {
2753 VN_RELE(vp);
2754 return (puterrno4(EACCES));
2755 }
2756 } else if (pre_tvp) {
2757 /* we're done with pre_tvp now. release extra hold */
2758 VN_RELE(pre_tvp);
2759 }
2760
2761 cs->exi = exi;
2762
2763 /*
2764 * Now we do a checkauth4. The reason is that
2765 * this client/user may not have access to the new
2766 * exported file system, and if he does,
2767 * the client/user may be mapped to a different uid.
2768 *
2769 * We start with a new cr, because the checkauth4 done
2770 * in the PUT*FH operation over wrote the cred's uid,
2771 * gid, etc, and we want the real thing before calling
2772 * checkauth4()
2773 */
2774 crfree(cs->cr);
2775 cs->cr = crdup(cs->basecr);
2776
2777 oldvp = cs->vp;
2778 cs->vp = vp;
2779 is_newvp = TRUE;
2780
2781 stat = call_checkauth4(cs, req);
2782 if (stat != NFS4_OK) {
2783 VN_RELE(cs->vp);
2784 cs->vp = oldvp;
2785 return (stat);
2786 }
2787 }
2788
2789 /*
2790 * After various NFS checks, do a label check on the path
2791 * component. The label on this path should either be the
2792 * global zone's label or a zone's label. We are only
2793 * interested in the zone's label because exported files
2794 * in global zone is accessible (though read-only) to
2795 * clients. The exportability/visibility check is already
2796 * done before reaching this code.
2797 */
2798 if (is_system_labeled()) {
2799 bslabel_t *clabel;
2800
2801 ASSERT(req->rq_label != NULL);
2802 clabel = req->rq_label;
2803 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2804 "got client label from request(1)", struct svc_req *, req);
2805
2806 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2807 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2808 cs->exi)) {
2809 error = EACCES;
2810 goto err_out;
2811 }
2812 } else {
2813 /*
2814 * We grant access to admin_low label clients
2815 * only if the client is trusted, i.e. also
2816 * running Solaris Trusted Extension.
2817 */
2818 struct sockaddr *ca;
2819 int addr_type;
2820 void *ipaddr;
2821 tsol_tpc_t *tp;
2822
2823 ca = (struct sockaddr *)svc_getrpccaller(
2824 req->rq_xprt)->buf;
2825 if (ca->sa_family == AF_INET) {
2826 addr_type = IPV4_VERSION;
2827 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2828 } else if (ca->sa_family == AF_INET6) {
2829 addr_type = IPV6_VERSION;
2830 ipaddr = &((struct sockaddr_in6 *)
2831 ca)->sin6_addr;
2832 }
2833 tp = find_tpc(ipaddr, addr_type, B_FALSE);
2834 if (tp == NULL || tp->tpc_tp.tp_doi !=
2835 l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2836 SUN_CIPSO) {
2837 if (tp != NULL)
2838 TPC_RELE(tp);
2839 error = EACCES;
2840 goto err_out;
2841 }
2842 TPC_RELE(tp);
2843 }
2844 }
2845
2846 error = makefh4(&cs->fh, vp, cs->exi);
2847
2848 err_out:
2849 if (error) {
2850 if (is_newvp) {
2851 VN_RELE(cs->vp);
2852 cs->vp = oldvp;
2853 } else
2854 VN_RELE(vp);
2855 return (puterrno4(error));
2856 }
2857
2858 if (!is_newvp) {
2859 if (cs->vp)
2860 VN_RELE(cs->vp);
2861 cs->vp = vp;
2862 } else if (oldvp)
2863 VN_RELE(oldvp);
2864
2865 /*
2866 * if did lookup on attrdir and didn't lookup .., set named
2867 * attr fh flag
2868 */
2869 if (attrdir && ! dotdot)
2870 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2871
2872 /* Assume false for now, open proc will set this */
2873 cs->mandlock = FALSE;
2874
2875 return (NFS4_OK);
2876 }
2877
2878 /* ARGSUSED */
2879 static void
rfs4_op_lookup(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2880 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2881 struct compound_state *cs)
2882 {
2883 LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2884 LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2885 char *nm;
2886 uint_t len;
2887 struct sockaddr *ca;
2888 char *name = NULL;
2889
2890 DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2891 LOOKUP4args *, args);
2892
2893 if (cs->vp == NULL) {
2894 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2895 goto out;
2896 }
2897
2898 if (cs->vp->v_type == VLNK) {
2899 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2900 goto out;
2901 }
2902
2903 if (cs->vp->v_type != VDIR) {
2904 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2905 goto out;
2906 }
2907
2908 if (!utf8_dir_verify(&args->objname)) {
2909 *cs->statusp = resp->status = NFS4ERR_INVAL;
2910 goto out;
2911 }
2912
2913 nm = utf8_to_str(&args->objname, &len, NULL);
2914 if (nm == NULL) {
2915 *cs->statusp = resp->status = NFS4ERR_INVAL;
2916 goto out;
2917 }
2918
2919 if (len > MAXNAMELEN) {
2920 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2921 kmem_free(nm, len);
2922 goto out;
2923 }
2924
2925 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2926 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2927 MAXPATHLEN + 1);
2928
2929 if (name == NULL) {
2930 *cs->statusp = resp->status = NFS4ERR_INVAL;
2931 kmem_free(nm, len);
2932 goto out;
2933 }
2934
2935 *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
2936
2937 if (name != nm)
2938 kmem_free(name, MAXPATHLEN + 1);
2939 kmem_free(nm, len);
2940
2941 out:
2942 DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
2943 LOOKUP4res *, resp);
2944 }
2945
2946 /* ARGSUSED */
2947 static void
rfs4_op_lookupp(nfs_argop4 * args,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2948 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
2949 struct compound_state *cs)
2950 {
2951 LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
2952
2953 DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
2954
2955 if (cs->vp == NULL) {
2956 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2957 goto out;
2958 }
2959
2960 if (cs->vp->v_type != VDIR) {
2961 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2962 goto out;
2963 }
2964
2965 *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
2966
2967 /*
2968 * From NFSV4 Specification, LOOKUPP should not check for
2969 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
2970 */
2971 if (resp->status == NFS4ERR_WRONGSEC) {
2972 *cs->statusp = resp->status = NFS4_OK;
2973 }
2974
2975 out:
2976 DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
2977 LOOKUPP4res *, resp);
2978 }
2979
2980
2981 /*ARGSUSED2*/
2982 static void
rfs4_op_openattr(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2983 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2984 struct compound_state *cs)
2985 {
2986 OPENATTR4args *args = &argop->nfs_argop4_u.opopenattr;
2987 OPENATTR4res *resp = &resop->nfs_resop4_u.opopenattr;
2988 vnode_t *avp = NULL;
2989 int lookup_flags = LOOKUP_XATTR, error;
2990 int exp_ro = 0;
2991
2992 DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
2993 OPENATTR4args *, args);
2994
2995 if (cs->vp == NULL) {
2996 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2997 goto out;
2998 }
2999
3000 if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3001 !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3002 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3003 goto out;
3004 }
3005
3006 /*
3007 * If file system supports passing ACE mask to VOP_ACCESS then
3008 * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3009 */
3010
3011 if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3012 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3013 V_ACE_MASK, cs->cr, NULL);
3014 else
3015 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3016 (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3017 (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3018
3019 if (error) {
3020 *cs->statusp = resp->status = puterrno4(EACCES);
3021 goto out;
3022 }
3023
3024 /*
3025 * The CREATE_XATTR_DIR VOP flag cannot be specified if
3026 * the file system is exported read-only -- regardless of
3027 * createdir flag. Otherwise the attrdir would be created
3028 * (assuming server fs isn't mounted readonly locally). If
3029 * VOP_LOOKUP returns ENOENT in this case, the error will
3030 * be translated into EROFS. ENOSYS is mapped to ENOTSUP
3031 * because specfs has no VOP_LOOKUP op, so the macro would
3032 * return ENOSYS. EINVAL is returned by all (current)
3033 * Solaris file system implementations when any of their
3034 * restrictions are violated (xattr(dir) can't have xattrdir).
3035 * Returning NOTSUPP is more appropriate in this case
3036 * because the object will never be able to have an attrdir.
3037 */
3038 if (args->createdir && ! (exp_ro = rdonly4(cs->exi, cs->vp, req)))
3039 lookup_flags |= CREATE_XATTR_DIR;
3040
3041 error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3042 NULL, NULL, NULL);
3043
3044 if (error) {
3045 if (error == ENOENT && args->createdir && exp_ro)
3046 *cs->statusp = resp->status = puterrno4(EROFS);
3047 else if (error == EINVAL || error == ENOSYS)
3048 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3049 else
3050 *cs->statusp = resp->status = puterrno4(error);
3051 goto out;
3052 }
3053
3054 ASSERT(avp->v_flag & V_XATTRDIR);
3055
3056 error = makefh4(&cs->fh, avp, cs->exi);
3057
3058 if (error) {
3059 VN_RELE(avp);
3060 *cs->statusp = resp->status = puterrno4(error);
3061 goto out;
3062 }
3063
3064 VN_RELE(cs->vp);
3065 cs->vp = avp;
3066
3067 /*
3068 * There is no requirement for an attrdir fh flag
3069 * because the attrdir has a vnode flag to distinguish
3070 * it from regular (non-xattr) directories. The
3071 * FH4_ATTRDIR flag is set for future sanity checks.
3072 */
3073 set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3074 *cs->statusp = resp->status = NFS4_OK;
3075
3076 out:
3077 DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3078 OPENATTR4res *, resp);
3079 }
3080
3081 static int
do_io(int direction,vnode_t * vp,struct uio * uio,int ioflag,cred_t * cred,caller_context_t * ct)3082 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3083 caller_context_t *ct)
3084 {
3085 int error;
3086 int i;
3087 clock_t delaytime;
3088
3089 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3090
3091 /*
3092 * Don't block on mandatory locks. If this routine returns
3093 * EAGAIN, the caller should return NFS4ERR_LOCKED.
3094 */
3095 uio->uio_fmode = FNONBLOCK;
3096
3097 for (i = 0; i < rfs4_maxlock_tries; i++) {
3098
3099
3100 if (direction == FREAD) {
3101 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3102 error = VOP_READ(vp, uio, ioflag, cred, ct);
3103 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3104 } else {
3105 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3106 error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3107 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3108 }
3109
3110 if (error != EAGAIN)
3111 break;
3112
3113 if (i < rfs4_maxlock_tries - 1) {
3114 delay(delaytime);
3115 delaytime *= 2;
3116 }
3117 }
3118
3119 return (error);
3120 }
3121
3122 /* ARGSUSED */
3123 static void
rfs4_op_read(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3124 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3125 struct compound_state *cs)
3126 {
3127 READ4args *args = &argop->nfs_argop4_u.opread;
3128 READ4res *resp = &resop->nfs_resop4_u.opread;
3129 int error;
3130 int verror;
3131 vnode_t *vp;
3132 struct vattr va;
3133 struct iovec iov;
3134 struct uio uio;
3135 u_offset_t offset;
3136 bool_t *deleg = &cs->deleg;
3137 nfsstat4 stat;
3138 int in_crit = 0;
3139 mblk_t *mp = NULL;
3140 int alloc_err = 0;
3141 int rdma_used = 0;
3142 int loaned_buffers;
3143 caller_context_t ct;
3144 struct uio *uiop;
3145
3146 DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3147 READ4args, args);
3148
3149 vp = cs->vp;
3150 if (vp == NULL) {
3151 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3152 goto out;
3153 }
3154 if (cs->access == CS_ACCESS_DENIED) {
3155 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3156 goto out;
3157 }
3158
3159 if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3160 deleg, TRUE, &ct)) != NFS4_OK) {
3161 *cs->statusp = resp->status = stat;
3162 goto out;
3163 }
3164
3165 /*
3166 * Enter the critical region before calling VOP_RWLOCK
3167 * to avoid a deadlock with write requests.
3168 */
3169 if (nbl_need_check(vp)) {
3170 nbl_start_crit(vp, RW_READER);
3171 in_crit = 1;
3172 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3173 &ct)) {
3174 *cs->statusp = resp->status = NFS4ERR_LOCKED;
3175 goto out;
3176 }
3177 }
3178
3179 if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3180 deleg, TRUE, &ct)) != NFS4_OK) {
3181 *cs->statusp = resp->status = stat;
3182 goto out;
3183 }
3184
3185 if (args->wlist) {
3186 if (args->count > clist_len(args->wlist)) {
3187 *cs->statusp = resp->status = NFS4ERR_INVAL;
3188 goto out;
3189 }
3190 rdma_used = 1;
3191 }
3192
3193 /* use loaned buffers for TCP */
3194 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3195
3196 va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3197 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3198
3199 /*
3200 * If we can't get the attributes, then we can't do the
3201 * right access checking. So, we'll fail the request.
3202 */
3203 if (verror) {
3204 *cs->statusp = resp->status = puterrno4(verror);
3205 goto out;
3206 }
3207
3208 if (vp->v_type != VREG) {
3209 *cs->statusp = resp->status =
3210 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3211 goto out;
3212 }
3213
3214 if (crgetuid(cs->cr) != va.va_uid &&
3215 (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3216 (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3217 *cs->statusp = resp->status = puterrno4(error);
3218 goto out;
3219 }
3220
3221 if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3222 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3223 goto out;
3224 }
3225
3226 offset = args->offset;
3227 if (offset >= va.va_size) {
3228 *cs->statusp = resp->status = NFS4_OK;
3229 resp->eof = TRUE;
3230 resp->data_len = 0;
3231 resp->data_val = NULL;
3232 resp->mblk = NULL;
3233 /* RDMA */
3234 resp->wlist = args->wlist;
3235 resp->wlist_len = resp->data_len;
3236 *cs->statusp = resp->status = NFS4_OK;
3237 if (resp->wlist)
3238 clist_zero_len(resp->wlist);
3239 goto out;
3240 }
3241
3242 if (args->count == 0) {
3243 *cs->statusp = resp->status = NFS4_OK;
3244 resp->eof = FALSE;
3245 resp->data_len = 0;
3246 resp->data_val = NULL;
3247 resp->mblk = NULL;
3248 /* RDMA */
3249 resp->wlist = args->wlist;
3250 resp->wlist_len = resp->data_len;
3251 if (resp->wlist)
3252 clist_zero_len(resp->wlist);
3253 goto out;
3254 }
3255
3256 /*
3257 * Do not allocate memory more than maximum allowed
3258 * transfer size
3259 */
3260 if (args->count > rfs4_tsize(req))
3261 args->count = rfs4_tsize(req);
3262
3263 if (loaned_buffers) {
3264 uiop = (uio_t *)rfs_setup_xuio(vp);
3265 ASSERT(uiop != NULL);
3266 uiop->uio_segflg = UIO_SYSSPACE;
3267 uiop->uio_loffset = args->offset;
3268 uiop->uio_resid = args->count;
3269
3270 /* Jump to do the read if successful */
3271 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3272 /*
3273 * Need to hold the vnode until after VOP_RETZCBUF()
3274 * is called.
3275 */
3276 VN_HOLD(vp);
3277 goto doio_read;
3278 }
3279
3280 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3281 uiop->uio_loffset, int, uiop->uio_resid);
3282
3283 uiop->uio_extflg = 0;
3284
3285 /* failure to setup for zero copy */
3286 rfs_free_xuio((void *)uiop);
3287 loaned_buffers = 0;
3288 }
3289
3290 /*
3291 * If returning data via RDMA Write, then grab the chunk list. If we
3292 * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3293 */
3294 if (rdma_used) {
3295 mp = NULL;
3296 (void) rdma_get_wchunk(req, &iov, args->wlist);
3297 } else {
3298 /*
3299 * mp will contain the data to be sent out in the read reply.
3300 * It will be freed after the reply has been sent. Let's
3301 * roundup the data to a BYTES_PER_XDR_UNIT multiple, so that
3302 * the call to xdrmblk_putmblk() never fails. If the first
3303 * alloc of the requested size fails, then decrease the size to
3304 * something more reasonable and wait for the allocation to
3305 * occur.
3306 */
3307 mp = allocb(RNDUP(args->count), BPRI_MED);
3308 if (mp == NULL) {
3309 if (args->count > MAXBSIZE)
3310 args->count = MAXBSIZE;
3311 mp = allocb_wait(RNDUP(args->count), BPRI_MED,
3312 STR_NOSIG, &alloc_err);
3313 }
3314 ASSERT(mp != NULL);
3315 ASSERT(alloc_err == 0);
3316
3317 iov.iov_base = (caddr_t)mp->b_datap->db_base;
3318 iov.iov_len = args->count;
3319 }
3320
3321 uio.uio_iov = &iov;
3322 uio.uio_iovcnt = 1;
3323 uio.uio_segflg = UIO_SYSSPACE;
3324 uio.uio_extflg = UIO_COPY_CACHED;
3325 uio.uio_loffset = args->offset;
3326 uio.uio_resid = args->count;
3327 uiop = &uio;
3328
3329 doio_read:
3330 error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3331
3332 va.va_mask = AT_SIZE;
3333 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3334
3335 if (error) {
3336 if (mp)
3337 freemsg(mp);
3338 *cs->statusp = resp->status = puterrno4(error);
3339 goto out;
3340 }
3341
3342 /* make mblk using zc buffers */
3343 if (loaned_buffers) {
3344 mp = uio_to_mblk(uiop);
3345 ASSERT(mp != NULL);
3346 }
3347
3348 *cs->statusp = resp->status = NFS4_OK;
3349
3350 ASSERT(uiop->uio_resid >= 0);
3351 resp->data_len = args->count - uiop->uio_resid;
3352 if (mp) {
3353 resp->data_val = (char *)mp->b_datap->db_base;
3354 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3355 } else {
3356 resp->data_val = (caddr_t)iov.iov_base;
3357 }
3358
3359 resp->mblk = mp;
3360
3361 if (!verror && offset + resp->data_len == va.va_size)
3362 resp->eof = TRUE;
3363 else
3364 resp->eof = FALSE;
3365
3366 if (rdma_used) {
3367 if (!rdma_setup_read_data4(args, resp)) {
3368 *cs->statusp = resp->status = NFS4ERR_INVAL;
3369 }
3370 } else {
3371 resp->wlist = NULL;
3372 }
3373
3374 out:
3375 if (in_crit)
3376 nbl_end_crit(vp);
3377
3378 DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3379 READ4res *, resp);
3380 }
3381
3382 static void
rfs4_op_read_free(nfs_resop4 * resop)3383 rfs4_op_read_free(nfs_resop4 *resop)
3384 {
3385 READ4res *resp = &resop->nfs_resop4_u.opread;
3386
3387 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3388 freemsg(resp->mblk);
3389 resp->mblk = NULL;
3390 resp->data_val = NULL;
3391 resp->data_len = 0;
3392 }
3393 }
3394
3395 static void
rfs4_op_readdir_free(nfs_resop4 * resop)3396 rfs4_op_readdir_free(nfs_resop4 * resop)
3397 {
3398 READDIR4res *resp = &resop->nfs_resop4_u.opreaddir;
3399
3400 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3401 freeb(resp->mblk);
3402 resp->mblk = NULL;
3403 resp->data_len = 0;
3404 }
3405 }
3406
3407
3408 /* ARGSUSED */
3409 static void
rfs4_op_putpubfh(nfs_argop4 * args,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3410 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3411 struct compound_state *cs)
3412 {
3413 PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh;
3414 int error;
3415 vnode_t *vp;
3416 struct exportinfo *exi, *sav_exi;
3417 nfs_fh4_fmt_t *fh_fmtp;
3418
3419 DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3420
3421 if (cs->vp) {
3422 VN_RELE(cs->vp);
3423 cs->vp = NULL;
3424 }
3425
3426 if (cs->cr)
3427 crfree(cs->cr);
3428
3429 cs->cr = crdup(cs->basecr);
3430
3431 vp = exi_public->exi_vp;
3432 if (vp == NULL) {
3433 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3434 goto out;
3435 }
3436
3437 error = makefh4(&cs->fh, vp, exi_public);
3438 if (error != 0) {
3439 *cs->statusp = resp->status = puterrno4(error);
3440 goto out;
3441 }
3442 sav_exi = cs->exi;
3443 if (exi_public == exi_root) {
3444 /*
3445 * No filesystem is actually shared public, so we default
3446 * to exi_root. In this case, we must check whether root
3447 * is exported.
3448 */
3449 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3450
3451 /*
3452 * if root filesystem is exported, the exportinfo struct that we
3453 * should use is what checkexport4 returns, because root_exi is
3454 * actually a mostly empty struct.
3455 */
3456 exi = checkexport4(&fh_fmtp->fh4_fsid,
3457 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3458 cs->exi = ((exi != NULL) ? exi : exi_public);
3459 } else {
3460 /*
3461 * it's a properly shared filesystem
3462 */
3463 cs->exi = exi_public;
3464 }
3465
3466 if (is_system_labeled()) {
3467 bslabel_t *clabel;
3468
3469 ASSERT(req->rq_label != NULL);
3470 clabel = req->rq_label;
3471 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3472 "got client label from request(1)",
3473 struct svc_req *, req);
3474 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3475 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3476 cs->exi)) {
3477 *cs->statusp = resp->status =
3478 NFS4ERR_SERVERFAULT;
3479 goto out;
3480 }
3481 }
3482 }
3483
3484 VN_HOLD(vp);
3485 cs->vp = vp;
3486
3487 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3488 VN_RELE(cs->vp);
3489 cs->vp = NULL;
3490 cs->exi = sav_exi;
3491 goto out;
3492 }
3493
3494 *cs->statusp = resp->status = NFS4_OK;
3495 out:
3496 DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3497 PUTPUBFH4res *, resp);
3498 }
3499
3500 /*
3501 * XXX - issue with put*fh operations. Suppose /export/home is exported.
3502 * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3503 * or joe have restrictive search permissions, then we shouldn't let
3504 * the client get a file handle. This is easy to enforce. However, we
3505 * don't know what security flavor should be used until we resolve the
3506 * path name. Another complication is uid mapping. If root is
3507 * the user, then it will be mapped to the anonymous user by default,
3508 * but we won't know that till we've resolved the path name. And we won't
3509 * know what the anonymous user is.
3510 * Luckily, SECINFO is specified to take a full filename.
3511 * So what we will have to in rfs4_op_lookup is check that flavor of
3512 * the target object matches that of the request, and if root was the
3513 * caller, check for the root= and anon= options, and if necessary,
3514 * repeat the lookup using the right cred_t. But that's not done yet.
3515 */
3516 /* ARGSUSED */
3517 static void
rfs4_op_putfh(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3518 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3519 struct compound_state *cs)
3520 {
3521 PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3522 PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3523 nfs_fh4_fmt_t *fh_fmtp;
3524
3525 DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3526 PUTFH4args *, args);
3527
3528 if (cs->vp) {
3529 VN_RELE(cs->vp);
3530 cs->vp = NULL;
3531 }
3532
3533 if (cs->cr) {
3534 crfree(cs->cr);
3535 cs->cr = NULL;
3536 }
3537
3538
3539 if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3540 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3541 goto out;
3542 }
3543
3544 fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3545 cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3546 NULL);
3547
3548 if (cs->exi == NULL) {
3549 *cs->statusp = resp->status = NFS4ERR_STALE;
3550 goto out;
3551 }
3552
3553 cs->cr = crdup(cs->basecr);
3554
3555 ASSERT(cs->cr != NULL);
3556
3557 if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3558 *cs->statusp = resp->status;
3559 goto out;
3560 }
3561
3562 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3563 VN_RELE(cs->vp);
3564 cs->vp = NULL;
3565 goto out;
3566 }
3567
3568 nfs_fh4_copy(&args->object, &cs->fh);
3569 *cs->statusp = resp->status = NFS4_OK;
3570 cs->deleg = FALSE;
3571
3572 out:
3573 DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3574 PUTFH4res *, resp);
3575 }
3576
3577 /* ARGSUSED */
3578 static void
rfs4_op_putrootfh(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3579 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3580 struct compound_state *cs)
3581 {
3582 PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3583 int error;
3584 fid_t fid;
3585 struct exportinfo *exi, *sav_exi;
3586
3587 DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3588
3589 if (cs->vp) {
3590 VN_RELE(cs->vp);
3591 cs->vp = NULL;
3592 }
3593
3594 if (cs->cr)
3595 crfree(cs->cr);
3596
3597 cs->cr = crdup(cs->basecr);
3598
3599 /*
3600 * Using rootdir, the system root vnode,
3601 * get its fid.
3602 */
3603 bzero(&fid, sizeof (fid));
3604 fid.fid_len = MAXFIDSZ;
3605 error = vop_fid_pseudo(rootdir, &fid);
3606 if (error != 0) {
3607 *cs->statusp = resp->status = puterrno4(error);
3608 goto out;
3609 }
3610
3611 /*
3612 * Then use the root fsid & fid it to find out if it's exported
3613 *
3614 * If the server root isn't exported directly, then
3615 * it should at least be a pseudo export based on
3616 * one or more exports further down in the server's
3617 * file tree.
3618 */
3619 exi = checkexport4(&rootdir->v_vfsp->vfs_fsid, &fid, NULL);
3620 if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3621 NFS4_DEBUG(rfs4_debug,
3622 (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3623 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3624 goto out;
3625 }
3626
3627 /*
3628 * Now make a filehandle based on the root
3629 * export and root vnode.
3630 */
3631 error = makefh4(&cs->fh, rootdir, exi);
3632 if (error != 0) {
3633 *cs->statusp = resp->status = puterrno4(error);
3634 goto out;
3635 }
3636
3637 sav_exi = cs->exi;
3638 cs->exi = exi;
3639
3640 VN_HOLD(rootdir);
3641 cs->vp = rootdir;
3642
3643 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3644 VN_RELE(rootdir);
3645 cs->vp = NULL;
3646 cs->exi = sav_exi;
3647 goto out;
3648 }
3649
3650 *cs->statusp = resp->status = NFS4_OK;
3651 cs->deleg = FALSE;
3652 out:
3653 DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3654 PUTROOTFH4res *, resp);
3655 }
3656
3657 /*
3658 * A directory entry is a valid nfsv4 entry if
3659 * - it has a non-zero ino
3660 * - it is not a dot or dotdot name
3661 * - it is visible in a pseudo export or in a real export that can
3662 * only have a limited view.
3663 */
3664 static bool_t
valid_nfs4_entry(struct exportinfo * exi,struct dirent64 * dp,int * expseudo,int check_visible)3665 valid_nfs4_entry(struct exportinfo *exi, struct dirent64 *dp,
3666 int *expseudo, int check_visible)
3667 {
3668 if (dp->d_ino == 0 || NFS_IS_DOTNAME(dp->d_name)) {
3669 *expseudo = 0;
3670 return (FALSE);
3671 }
3672
3673 if (! check_visible) {
3674 *expseudo = 0;
3675 return (TRUE);
3676 }
3677
3678 return (nfs_visible_inode(exi, dp->d_ino, expseudo));
3679 }
3680
3681 /*
3682 * set_rdattr_params sets up the variables used to manage what information
3683 * to get for each directory entry.
3684 */
3685 static nfsstat4
set_rdattr_params(struct nfs4_svgetit_arg * sargp,bitmap4 attrs,bool_t * need_to_lookup)3686 set_rdattr_params(struct nfs4_svgetit_arg *sargp,
3687 bitmap4 attrs, bool_t *need_to_lookup)
3688 {
3689 uint_t va_mask;
3690 nfsstat4 status;
3691 bitmap4 objbits;
3692
3693 status = bitmap4_to_attrmask(attrs, sargp);
3694 if (status != NFS4_OK) {
3695 /*
3696 * could not even figure attr mask
3697 */
3698 return (status);
3699 }
3700 va_mask = sargp->vap->va_mask;
3701
3702 /*
3703 * dirent's d_ino is always correct value for mounted_on_fileid.
3704 * mntdfid_set is set once here, but mounted_on_fileid is
3705 * set in main dirent processing loop for each dirent.
3706 * The mntdfid_set is a simple optimization that lets the
3707 * server attr code avoid work when caller is readdir.
3708 */
3709 sargp->mntdfid_set = TRUE;
3710
3711 /*
3712 * Lookup entry only if client asked for any of the following:
3713 * a) vattr attrs
3714 * b) vfs attrs
3715 * c) attrs w/per-object scope requested (change, filehandle, etc)
3716 * other than mounted_on_fileid (which we can take from dirent)
3717 */
3718 objbits = attrs ? attrs & NFS4_VP_ATTR_MASK : 0;
3719
3720 if (va_mask || sargp->sbp || (objbits & ~FATTR4_MOUNTED_ON_FILEID_MASK))
3721 *need_to_lookup = TRUE;
3722 else
3723 *need_to_lookup = FALSE;
3724
3725 if (sargp->sbp == NULL)
3726 return (NFS4_OK);
3727
3728 /*
3729 * If filesystem attrs are requested, get them now from the
3730 * directory vp, as most entries will have same filesystem. The only
3731 * exception are mounted over entries but we handle
3732 * those as we go (XXX mounted over detection not yet implemented).
3733 */
3734 sargp->vap->va_mask = 0; /* to avoid VOP_GETATTR */
3735 status = bitmap4_get_sysattrs(sargp);
3736 sargp->vap->va_mask = va_mask;
3737
3738 if ((status != NFS4_OK) && sargp->rdattr_error_req) {
3739 /*
3740 * Failed to get filesystem attributes.
3741 * Return a rdattr_error for each entry, but don't fail.
3742 * However, don't get any obj-dependent attrs.
3743 */
3744 sargp->rdattr_error = status; /* for rdattr_error */
3745 *need_to_lookup = FALSE;
3746 /*
3747 * At least get fileid for regular readdir output
3748 */
3749 sargp->vap->va_mask &= AT_NODEID;
3750 status = NFS4_OK;
3751 }
3752
3753 return (status);
3754 }
3755
3756 /*
3757 * readlink: args: CURRENT_FH.
3758 * res: status. If success - CURRENT_FH unchanged, return linktext.
3759 */
3760
3761 /* ARGSUSED */
3762 static void
rfs4_op_readlink(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3763 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3764 struct compound_state *cs)
3765 {
3766 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3767 int error;
3768 vnode_t *vp;
3769 struct iovec iov;
3770 struct vattr va;
3771 struct uio uio;
3772 char *data;
3773 struct sockaddr *ca;
3774 char *name = NULL;
3775 int is_referral;
3776
3777 DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3778
3779 /* CURRENT_FH: directory */
3780 vp = cs->vp;
3781 if (vp == NULL) {
3782 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3783 goto out;
3784 }
3785
3786 if (cs->access == CS_ACCESS_DENIED) {
3787 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3788 goto out;
3789 }
3790
3791 /* Is it a referral? */
3792 if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3793
3794 is_referral = 1;
3795
3796 } else {
3797
3798 is_referral = 0;
3799
3800 if (vp->v_type == VDIR) {
3801 *cs->statusp = resp->status = NFS4ERR_ISDIR;
3802 goto out;
3803 }
3804
3805 if (vp->v_type != VLNK) {
3806 *cs->statusp = resp->status = NFS4ERR_INVAL;
3807 goto out;
3808 }
3809
3810 }
3811
3812 va.va_mask = AT_MODE;
3813 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3814 if (error) {
3815 *cs->statusp = resp->status = puterrno4(error);
3816 goto out;
3817 }
3818
3819 if (MANDLOCK(vp, va.va_mode)) {
3820 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3821 goto out;
3822 }
3823
3824 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3825
3826 if (is_referral) {
3827 char *s;
3828 size_t strsz;
3829
3830 /* Get an artificial symlink based on a referral */
3831 s = build_symlink(vp, cs->cr, &strsz);
3832 global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3833 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3834 vnode_t *, vp, char *, s);
3835 if (s == NULL)
3836 error = EINVAL;
3837 else {
3838 error = 0;
3839 (void) strlcpy(data, s, MAXPATHLEN + 1);
3840 kmem_free(s, strsz);
3841 }
3842
3843 } else {
3844
3845 iov.iov_base = data;
3846 iov.iov_len = MAXPATHLEN;
3847 uio.uio_iov = &iov;
3848 uio.uio_iovcnt = 1;
3849 uio.uio_segflg = UIO_SYSSPACE;
3850 uio.uio_extflg = UIO_COPY_CACHED;
3851 uio.uio_loffset = 0;
3852 uio.uio_resid = MAXPATHLEN;
3853
3854 error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3855
3856 if (!error)
3857 *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3858 }
3859
3860 if (error) {
3861 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3862 *cs->statusp = resp->status = puterrno4(error);
3863 goto out;
3864 }
3865
3866 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3867 name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3868 MAXPATHLEN + 1);
3869
3870 if (name == NULL) {
3871 /*
3872 * Even though the conversion failed, we return
3873 * something. We just don't translate it.
3874 */
3875 name = data;
3876 }
3877
3878 /*
3879 * treat link name as data
3880 */
3881 (void) str_to_utf8(name, &resp->link);
3882
3883 if (name != data)
3884 kmem_free(name, MAXPATHLEN + 1);
3885 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3886 *cs->statusp = resp->status = NFS4_OK;
3887
3888 out:
3889 DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3890 READLINK4res *, resp);
3891 }
3892
3893 static void
rfs4_op_readlink_free(nfs_resop4 * resop)3894 rfs4_op_readlink_free(nfs_resop4 *resop)
3895 {
3896 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3897 utf8string *symlink = &resp->link;
3898
3899 if (symlink->utf8string_val) {
3900 UTF8STRING_FREE(*symlink)
3901 }
3902 }
3903
3904 /*
3905 * release_lockowner:
3906 * Release any state associated with the supplied
3907 * lockowner. Note if any lo_state is holding locks we will not
3908 * rele that lo_state and thus the lockowner will not be destroyed.
3909 * A client using lock after the lock owner stateid has been released
3910 * will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3911 * to reissue the lock with new_lock_owner set to TRUE.
3912 * args: lock_owner
3913 * res: status
3914 */
3915 /* ARGSUSED */
3916 static void
rfs4_op_release_lockowner(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3917 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3918 struct svc_req *req, struct compound_state *cs)
3919 {
3920 RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3921 RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3922 rfs4_lockowner_t *lo;
3923 rfs4_openowner_t *oo;
3924 rfs4_state_t *sp;
3925 rfs4_lo_state_t *lsp;
3926 rfs4_client_t *cp;
3927 bool_t create = FALSE;
3928 locklist_t *llist;
3929 sysid_t sysid;
3930
3931 DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3932 cs, RELEASE_LOCKOWNER4args *, ap);
3933
3934 /* Make sure there is a clientid around for this request */
3935 cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3936
3937 if (cp == NULL) {
3938 *cs->statusp = resp->status =
3939 rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3940 goto out;
3941 }
3942 rfs4_client_rele(cp);
3943
3944 lo = rfs4_findlockowner(&ap->lock_owner, &create);
3945 if (lo == NULL) {
3946 *cs->statusp = resp->status = NFS4_OK;
3947 goto out;
3948 }
3949 ASSERT(lo->rl_client != NULL);
3950
3951 /*
3952 * Check for EXPIRED client. If so will reap state with in a lease
3953 * period or on next set_clientid_confirm step
3954 */
3955 if (rfs4_lease_expired(lo->rl_client)) {
3956 rfs4_lockowner_rele(lo);
3957 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
3958 goto out;
3959 }
3960
3961 /*
3962 * If no sysid has been assigned, then no locks exist; just return.
3963 */
3964 rfs4_dbe_lock(lo->rl_client->rc_dbe);
3965 if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3966 rfs4_lockowner_rele(lo);
3967 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3968 goto out;
3969 }
3970
3971 sysid = lo->rl_client->rc_sysidt;
3972 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3973
3974 /*
3975 * Mark the lockowner invalid.
3976 */
3977 rfs4_dbe_hide(lo->rl_dbe);
3978
3979 /*
3980 * sysid-pid pair should now not be used since the lockowner is
3981 * invalid. If the client were to instantiate the lockowner again
3982 * it would be assigned a new pid. Thus we can get the list of
3983 * current locks.
3984 */
3985
3986 llist = flk_get_active_locks(sysid, lo->rl_pid);
3987 /* If we are still holding locks fail */
3988 if (llist != NULL) {
3989
3990 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3991
3992 flk_free_locklist(llist);
3993 /*
3994 * We need to unhide the lockowner so the client can
3995 * try it again. The bad thing here is if the client
3996 * has a logic error that took it here in the first place
3997 * he probably has lost accounting of the locks that it
3998 * is holding. So we may have dangling state until the
3999 * open owner state is reaped via close. One scenario
4000 * that could possibly occur is that the client has
4001 * sent the unlock request(s) in separate threads
4002 * and has not waited for the replies before sending the
4003 * RELEASE_LOCKOWNER request. Presumably, it would expect
4004 * and deal appropriately with NFS4ERR_LOCKS_HELD, by
4005 * reissuing the request.
4006 */
4007 rfs4_dbe_unhide(lo->rl_dbe);
4008 rfs4_lockowner_rele(lo);
4009 goto out;
4010 }
4011
4012 /*
4013 * For the corresponding client we need to check each open
4014 * owner for any opens that have lockowner state associated
4015 * with this lockowner.
4016 */
4017
4018 rfs4_dbe_lock(lo->rl_client->rc_dbe);
4019 for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
4020 oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
4021
4022 rfs4_dbe_lock(oo->ro_dbe);
4023 for (sp = list_head(&oo->ro_statelist); sp != NULL;
4024 sp = list_next(&oo->ro_statelist, sp)) {
4025
4026 rfs4_dbe_lock(sp->rs_dbe);
4027 for (lsp = list_head(&sp->rs_lostatelist);
4028 lsp != NULL;
4029 lsp = list_next(&sp->rs_lostatelist, lsp)) {
4030 if (lsp->rls_locker == lo) {
4031 rfs4_dbe_lock(lsp->rls_dbe);
4032 rfs4_dbe_invalidate(lsp->rls_dbe);
4033 rfs4_dbe_unlock(lsp->rls_dbe);
4034 }
4035 }
4036 rfs4_dbe_unlock(sp->rs_dbe);
4037 }
4038 rfs4_dbe_unlock(oo->ro_dbe);
4039 }
4040 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4041
4042 rfs4_lockowner_rele(lo);
4043
4044 *cs->statusp = resp->status = NFS4_OK;
4045
4046 out:
4047 DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4048 cs, RELEASE_LOCKOWNER4res *, resp);
4049 }
4050
4051 /*
4052 * short utility function to lookup a file and recall the delegation
4053 */
4054 static rfs4_file_t *
rfs4_lookup_and_findfile(vnode_t * dvp,char * nm,vnode_t ** vpp,int * lkup_error,cred_t * cr)4055 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4056 int *lkup_error, cred_t *cr)
4057 {
4058 vnode_t *vp;
4059 rfs4_file_t *fp = NULL;
4060 bool_t fcreate = FALSE;
4061 int error;
4062
4063 if (vpp)
4064 *vpp = NULL;
4065
4066 if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4067 NULL)) == 0) {
4068 if (vp->v_type == VREG)
4069 fp = rfs4_findfile(vp, NULL, &fcreate);
4070 if (vpp)
4071 *vpp = vp;
4072 else
4073 VN_RELE(vp);
4074 }
4075
4076 if (lkup_error)
4077 *lkup_error = error;
4078
4079 return (fp);
4080 }
4081
4082 /*
4083 * remove: args: CURRENT_FH: directory; name.
4084 * res: status. If success - CURRENT_FH unchanged, return change_info
4085 * for directory.
4086 */
4087 /* ARGSUSED */
4088 static void
rfs4_op_remove(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)4089 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4090 struct compound_state *cs)
4091 {
4092 REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4093 REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4094 int error;
4095 vnode_t *dvp, *vp;
4096 struct vattr bdva, idva, adva;
4097 char *nm;
4098 uint_t len;
4099 rfs4_file_t *fp;
4100 int in_crit = 0;
4101 bslabel_t *clabel;
4102 struct sockaddr *ca;
4103 char *name = NULL;
4104
4105 DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4106 REMOVE4args *, args);
4107
4108 /* CURRENT_FH: directory */
4109 dvp = cs->vp;
4110 if (dvp == NULL) {
4111 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4112 goto out;
4113 }
4114
4115 if (cs->access == CS_ACCESS_DENIED) {
4116 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4117 goto out;
4118 }
4119
4120 /*
4121 * If there is an unshared filesystem mounted on this vnode,
4122 * Do not allow to remove anything in this directory.
4123 */
4124 if (vn_ismntpt(dvp)) {
4125 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4126 goto out;
4127 }
4128
4129 if (dvp->v_type != VDIR) {
4130 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4131 goto out;
4132 }
4133
4134 if (!utf8_dir_verify(&args->target)) {
4135 *cs->statusp = resp->status = NFS4ERR_INVAL;
4136 goto out;
4137 }
4138
4139 /*
4140 * Lookup the file so that we can check if it's a directory
4141 */
4142 nm = utf8_to_fn(&args->target, &len, NULL);
4143 if (nm == NULL) {
4144 *cs->statusp = resp->status = NFS4ERR_INVAL;
4145 goto out;
4146 }
4147
4148 if (len > MAXNAMELEN) {
4149 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4150 kmem_free(nm, len);
4151 goto out;
4152 }
4153
4154 if (rdonly4(cs->exi, cs->vp, req)) {
4155 *cs->statusp = resp->status = NFS4ERR_ROFS;
4156 kmem_free(nm, len);
4157 goto out;
4158 }
4159
4160 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4161 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4162 MAXPATHLEN + 1);
4163
4164 if (name == NULL) {
4165 *cs->statusp = resp->status = NFS4ERR_INVAL;
4166 kmem_free(nm, len);
4167 goto out;
4168 }
4169
4170 /*
4171 * Lookup the file to determine type and while we are see if
4172 * there is a file struct around and check for delegation.
4173 * We don't need to acquire va_seq before this lookup, if
4174 * it causes an update, cinfo.before will not match, which will
4175 * trigger a cache flush even if atomic is TRUE.
4176 */
4177 if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4178 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4179 NULL)) {
4180 VN_RELE(vp);
4181 rfs4_file_rele(fp);
4182 *cs->statusp = resp->status = NFS4ERR_DELAY;
4183 if (nm != name)
4184 kmem_free(name, MAXPATHLEN + 1);
4185 kmem_free(nm, len);
4186 goto out;
4187 }
4188 }
4189
4190 /* Didn't find anything to remove */
4191 if (vp == NULL) {
4192 *cs->statusp = resp->status = error;
4193 if (nm != name)
4194 kmem_free(name, MAXPATHLEN + 1);
4195 kmem_free(nm, len);
4196 goto out;
4197 }
4198
4199 if (nbl_need_check(vp)) {
4200 nbl_start_crit(vp, RW_READER);
4201 in_crit = 1;
4202 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4203 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4204 if (nm != name)
4205 kmem_free(name, MAXPATHLEN + 1);
4206 kmem_free(nm, len);
4207 nbl_end_crit(vp);
4208 VN_RELE(vp);
4209 if (fp) {
4210 rfs4_clear_dont_grant(fp);
4211 rfs4_file_rele(fp);
4212 }
4213 goto out;
4214 }
4215 }
4216
4217 /* check label before allowing removal */
4218 if (is_system_labeled()) {
4219 ASSERT(req->rq_label != NULL);
4220 clabel = req->rq_label;
4221 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4222 "got client label from request(1)",
4223 struct svc_req *, req);
4224 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4225 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4226 cs->exi)) {
4227 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4228 if (name != nm)
4229 kmem_free(name, MAXPATHLEN + 1);
4230 kmem_free(nm, len);
4231 if (in_crit)
4232 nbl_end_crit(vp);
4233 VN_RELE(vp);
4234 if (fp) {
4235 rfs4_clear_dont_grant(fp);
4236 rfs4_file_rele(fp);
4237 }
4238 goto out;
4239 }
4240 }
4241 }
4242
4243 /* Get dir "before" change value */
4244 bdva.va_mask = AT_CTIME|AT_SEQ;
4245 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4246 if (error) {
4247 *cs->statusp = resp->status = puterrno4(error);
4248 if (nm != name)
4249 kmem_free(name, MAXPATHLEN + 1);
4250 kmem_free(nm, len);
4251 if (in_crit)
4252 nbl_end_crit(vp);
4253 VN_RELE(vp);
4254 if (fp) {
4255 rfs4_clear_dont_grant(fp);
4256 rfs4_file_rele(fp);
4257 }
4258 goto out;
4259 }
4260 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4261
4262 /* Actually do the REMOVE operation */
4263 if (vp->v_type == VDIR) {
4264 /*
4265 * Can't remove a directory that has a mounted-on filesystem.
4266 */
4267 if (vn_ismntpt(vp)) {
4268 error = EACCES;
4269 } else {
4270 /*
4271 * System V defines rmdir to return EEXIST,
4272 * not ENOTEMPTY, if the directory is not
4273 * empty. A System V NFS server needs to map
4274 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4275 * transmit over the wire.
4276 */
4277 if ((error = VOP_RMDIR(dvp, name, rootdir, cs->cr,
4278 NULL, 0)) == EEXIST)
4279 error = ENOTEMPTY;
4280 }
4281 } else {
4282 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4283 fp != NULL) {
4284 struct vattr va;
4285 vnode_t *tvp;
4286
4287 rfs4_dbe_lock(fp->rf_dbe);
4288 tvp = fp->rf_vp;
4289 if (tvp)
4290 VN_HOLD(tvp);
4291 rfs4_dbe_unlock(fp->rf_dbe);
4292
4293 if (tvp) {
4294 /*
4295 * This is va_seq safe because we are not
4296 * manipulating dvp.
4297 */
4298 va.va_mask = AT_NLINK;
4299 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4300 va.va_nlink == 0) {
4301 /* Remove state on file remove */
4302 if (in_crit) {
4303 nbl_end_crit(vp);
4304 in_crit = 0;
4305 }
4306 rfs4_close_all_state(fp);
4307 }
4308 VN_RELE(tvp);
4309 }
4310 }
4311 }
4312
4313 if (in_crit)
4314 nbl_end_crit(vp);
4315 VN_RELE(vp);
4316
4317 if (fp) {
4318 rfs4_clear_dont_grant(fp);
4319 rfs4_file_rele(fp);
4320 }
4321 if (nm != name)
4322 kmem_free(name, MAXPATHLEN + 1);
4323 kmem_free(nm, len);
4324
4325 if (error) {
4326 *cs->statusp = resp->status = puterrno4(error);
4327 goto out;
4328 }
4329
4330 /*
4331 * Get the initial "after" sequence number, if it fails, set to zero
4332 */
4333 idva.va_mask = AT_SEQ;
4334 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4335 idva.va_seq = 0;
4336
4337 /*
4338 * Force modified data and metadata out to stable storage.
4339 */
4340 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4341
4342 /*
4343 * Get "after" change value, if it fails, simply return the
4344 * before value.
4345 */
4346 adva.va_mask = AT_CTIME|AT_SEQ;
4347 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4348 adva.va_ctime = bdva.va_ctime;
4349 adva.va_seq = 0;
4350 }
4351
4352 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4353
4354 /*
4355 * The cinfo.atomic = TRUE only if we have
4356 * non-zero va_seq's, and it has incremented by exactly one
4357 * during the VOP_REMOVE/RMDIR and it didn't change during
4358 * the VOP_FSYNC.
4359 */
4360 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4361 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4362 resp->cinfo.atomic = TRUE;
4363 else
4364 resp->cinfo.atomic = FALSE;
4365
4366 *cs->statusp = resp->status = NFS4_OK;
4367
4368 out:
4369 DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4370 REMOVE4res *, resp);
4371 }
4372
4373 /*
4374 * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4375 * oldname and newname.
4376 * res: status. If success - CURRENT_FH unchanged, return change_info
4377 * for both from and target directories.
4378 */
4379 /* ARGSUSED */
4380 static void
rfs4_op_rename(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)4381 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4382 struct compound_state *cs)
4383 {
4384 RENAME4args *args = &argop->nfs_argop4_u.oprename;
4385 RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4386 int error;
4387 vnode_t *odvp;
4388 vnode_t *ndvp;
4389 vnode_t *srcvp, *targvp;
4390 struct vattr obdva, oidva, oadva;
4391 struct vattr nbdva, nidva, nadva;
4392 char *onm, *nnm;
4393 uint_t olen, nlen;
4394 rfs4_file_t *fp, *sfp;
4395 int in_crit_src, in_crit_targ;
4396 int fp_rele_grant_hold, sfp_rele_grant_hold;
4397 bslabel_t *clabel;
4398 struct sockaddr *ca;
4399 char *converted_onm = NULL;
4400 char *converted_nnm = NULL;
4401
4402 DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4403 RENAME4args *, args);
4404
4405 fp = sfp = NULL;
4406 srcvp = targvp = NULL;
4407 in_crit_src = in_crit_targ = 0;
4408 fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4409
4410 /* CURRENT_FH: target directory */
4411 ndvp = cs->vp;
4412 if (ndvp == NULL) {
4413 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4414 goto out;
4415 }
4416
4417 /* SAVED_FH: from directory */
4418 odvp = cs->saved_vp;
4419 if (odvp == NULL) {
4420 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4421 goto out;
4422 }
4423
4424 if (cs->access == CS_ACCESS_DENIED) {
4425 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4426 goto out;
4427 }
4428
4429 /*
4430 * If there is an unshared filesystem mounted on this vnode,
4431 * do not allow to rename objects in this directory.
4432 */
4433 if (vn_ismntpt(odvp)) {
4434 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4435 goto out;
4436 }
4437
4438 /*
4439 * If there is an unshared filesystem mounted on this vnode,
4440 * do not allow to rename to this directory.
4441 */
4442 if (vn_ismntpt(ndvp)) {
4443 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4444 goto out;
4445 }
4446
4447 if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4448 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4449 goto out;
4450 }
4451
4452 if (cs->saved_exi != cs->exi) {
4453 *cs->statusp = resp->status = NFS4ERR_XDEV;
4454 goto out;
4455 }
4456
4457 if (!utf8_dir_verify(&args->oldname)) {
4458 *cs->statusp = resp->status = NFS4ERR_INVAL;
4459 goto out;
4460 }
4461
4462 if (!utf8_dir_verify(&args->newname)) {
4463 *cs->statusp = resp->status = NFS4ERR_INVAL;
4464 goto out;
4465 }
4466
4467 onm = utf8_to_fn(&args->oldname, &olen, NULL);
4468 if (onm == NULL) {
4469 *cs->statusp = resp->status = NFS4ERR_INVAL;
4470 goto out;
4471 }
4472 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4473 nlen = MAXPATHLEN + 1;
4474 converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4475 nlen);
4476
4477 if (converted_onm == NULL) {
4478 *cs->statusp = resp->status = NFS4ERR_INVAL;
4479 kmem_free(onm, olen);
4480 goto out;
4481 }
4482
4483 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4484 if (nnm == NULL) {
4485 *cs->statusp = resp->status = NFS4ERR_INVAL;
4486 if (onm != converted_onm)
4487 kmem_free(converted_onm, MAXPATHLEN + 1);
4488 kmem_free(onm, olen);
4489 goto out;
4490 }
4491 converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4492 MAXPATHLEN + 1);
4493
4494 if (converted_nnm == NULL) {
4495 *cs->statusp = resp->status = NFS4ERR_INVAL;
4496 kmem_free(nnm, nlen);
4497 nnm = NULL;
4498 if (onm != converted_onm)
4499 kmem_free(converted_onm, MAXPATHLEN + 1);
4500 kmem_free(onm, olen);
4501 goto out;
4502 }
4503
4504
4505 if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4506 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4507 kmem_free(onm, olen);
4508 kmem_free(nnm, nlen);
4509 goto out;
4510 }
4511
4512
4513 if (rdonly4(cs->exi, cs->vp, req)) {
4514 *cs->statusp = resp->status = NFS4ERR_ROFS;
4515 if (onm != converted_onm)
4516 kmem_free(converted_onm, MAXPATHLEN + 1);
4517 kmem_free(onm, olen);
4518 if (nnm != converted_nnm)
4519 kmem_free(converted_nnm, MAXPATHLEN + 1);
4520 kmem_free(nnm, nlen);
4521 goto out;
4522 }
4523
4524 /* check label of the target dir */
4525 if (is_system_labeled()) {
4526 ASSERT(req->rq_label != NULL);
4527 clabel = req->rq_label;
4528 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4529 "got client label from request(1)",
4530 struct svc_req *, req);
4531 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4532 if (!do_rfs_label_check(clabel, ndvp,
4533 EQUALITY_CHECK, cs->exi)) {
4534 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4535 goto err_out;
4536 }
4537 }
4538 }
4539
4540 /*
4541 * Is the source a file and have a delegation?
4542 * We don't need to acquire va_seq before these lookups, if
4543 * it causes an update, cinfo.before will not match, which will
4544 * trigger a cache flush even if atomic is TRUE.
4545 */
4546 if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4547 &error, cs->cr)) {
4548 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4549 NULL)) {
4550 *cs->statusp = resp->status = NFS4ERR_DELAY;
4551 goto err_out;
4552 }
4553 }
4554
4555 if (srcvp == NULL) {
4556 *cs->statusp = resp->status = puterrno4(error);
4557 if (onm != converted_onm)
4558 kmem_free(converted_onm, MAXPATHLEN + 1);
4559 kmem_free(onm, olen);
4560 if (nnm != converted_nnm)
4561 kmem_free(converted_nnm, MAXPATHLEN + 1);
4562 kmem_free(nnm, nlen);
4563 goto out;
4564 }
4565
4566 sfp_rele_grant_hold = 1;
4567
4568 /* Does the destination exist and a file and have a delegation? */
4569 if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4570 NULL, cs->cr)) {
4571 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4572 NULL)) {
4573 *cs->statusp = resp->status = NFS4ERR_DELAY;
4574 goto err_out;
4575 }
4576 }
4577 fp_rele_grant_hold = 1;
4578
4579
4580 /* Check for NBMAND lock on both source and target */
4581 if (nbl_need_check(srcvp)) {
4582 nbl_start_crit(srcvp, RW_READER);
4583 in_crit_src = 1;
4584 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4585 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4586 goto err_out;
4587 }
4588 }
4589
4590 if (targvp && nbl_need_check(targvp)) {
4591 nbl_start_crit(targvp, RW_READER);
4592 in_crit_targ = 1;
4593 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4594 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4595 goto err_out;
4596 }
4597 }
4598
4599 /* Get source "before" change value */
4600 obdva.va_mask = AT_CTIME|AT_SEQ;
4601 error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4602 if (!error) {
4603 nbdva.va_mask = AT_CTIME|AT_SEQ;
4604 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4605 }
4606 if (error) {
4607 *cs->statusp = resp->status = puterrno4(error);
4608 goto err_out;
4609 }
4610
4611 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4612 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4613
4614 if ((error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm,
4615 cs->cr, NULL, 0)) == 0 && fp != NULL) {
4616 struct vattr va;
4617 vnode_t *tvp;
4618
4619 rfs4_dbe_lock(fp->rf_dbe);
4620 tvp = fp->rf_vp;
4621 if (tvp)
4622 VN_HOLD(tvp);
4623 rfs4_dbe_unlock(fp->rf_dbe);
4624
4625 if (tvp) {
4626 va.va_mask = AT_NLINK;
4627 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4628 va.va_nlink == 0) {
4629 /* The file is gone and so should the state */
4630 if (in_crit_targ) {
4631 nbl_end_crit(targvp);
4632 in_crit_targ = 0;
4633 }
4634 rfs4_close_all_state(fp);
4635 }
4636 VN_RELE(tvp);
4637 }
4638 }
4639 if (error == 0)
4640 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4641
4642 if (in_crit_src)
4643 nbl_end_crit(srcvp);
4644 if (srcvp)
4645 VN_RELE(srcvp);
4646 if (in_crit_targ)
4647 nbl_end_crit(targvp);
4648 if (targvp)
4649 VN_RELE(targvp);
4650
4651 if (sfp) {
4652 rfs4_clear_dont_grant(sfp);
4653 rfs4_file_rele(sfp);
4654 }
4655 if (fp) {
4656 rfs4_clear_dont_grant(fp);
4657 rfs4_file_rele(fp);
4658 }
4659
4660 if (converted_onm != onm)
4661 kmem_free(converted_onm, MAXPATHLEN + 1);
4662 kmem_free(onm, olen);
4663 if (converted_nnm != nnm)
4664 kmem_free(converted_nnm, MAXPATHLEN + 1);
4665 kmem_free(nnm, nlen);
4666
4667 /*
4668 * Get the initial "after" sequence number, if it fails, set to zero
4669 */
4670 oidva.va_mask = AT_SEQ;
4671 if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4672 oidva.va_seq = 0;
4673
4674 nidva.va_mask = AT_SEQ;
4675 if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4676 nidva.va_seq = 0;
4677
4678 /*
4679 * Force modified data and metadata out to stable storage.
4680 */
4681 (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4682 (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4683
4684 if (error) {
4685 *cs->statusp = resp->status = puterrno4(error);
4686 goto out;
4687 }
4688
4689 /*
4690 * Get "after" change values, if it fails, simply return the
4691 * before value.
4692 */
4693 oadva.va_mask = AT_CTIME|AT_SEQ;
4694 if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4695 oadva.va_ctime = obdva.va_ctime;
4696 oadva.va_seq = 0;
4697 }
4698
4699 nadva.va_mask = AT_CTIME|AT_SEQ;
4700 if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4701 nadva.va_ctime = nbdva.va_ctime;
4702 nadva.va_seq = 0;
4703 }
4704
4705 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4706 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4707
4708 /*
4709 * The cinfo.atomic = TRUE only if we have
4710 * non-zero va_seq's, and it has incremented by exactly one
4711 * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4712 */
4713 if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4714 oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4715 resp->source_cinfo.atomic = TRUE;
4716 else
4717 resp->source_cinfo.atomic = FALSE;
4718
4719 if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4720 nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4721 resp->target_cinfo.atomic = TRUE;
4722 else
4723 resp->target_cinfo.atomic = FALSE;
4724
4725 #ifdef VOLATILE_FH_TEST
4726 {
4727 extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4728
4729 /*
4730 * Add the renamed file handle to the volatile rename list
4731 */
4732 if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4733 /* file handles may expire on rename */
4734 vnode_t *vp;
4735
4736 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4737 /*
4738 * Already know that nnm will be a valid string
4739 */
4740 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4741 NULL, NULL, NULL);
4742 kmem_free(nnm, nlen);
4743 if (!error) {
4744 add_volrnm_fh(cs->exi, vp);
4745 VN_RELE(vp);
4746 }
4747 }
4748 }
4749 #endif /* VOLATILE_FH_TEST */
4750
4751 *cs->statusp = resp->status = NFS4_OK;
4752 out:
4753 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4754 RENAME4res *, resp);
4755 return;
4756
4757 err_out:
4758 if (onm != converted_onm)
4759 kmem_free(converted_onm, MAXPATHLEN + 1);
4760 if (onm != NULL)
4761 kmem_free(onm, olen);
4762 if (nnm != converted_nnm)
4763 kmem_free(converted_nnm, MAXPATHLEN + 1);
4764 if (nnm != NULL)
4765 kmem_free(nnm, nlen);
4766
4767 if (in_crit_src) nbl_end_crit(srcvp);
4768 if (in_crit_targ) nbl_end_crit(targvp);
4769 if (targvp) VN_RELE(targvp);
4770 if (srcvp) VN_RELE(srcvp);
4771 if (sfp) {
4772 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4773 rfs4_file_rele(sfp);
4774 }
4775 if (fp) {
4776 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4777 rfs4_file_rele(fp);
4778 }
4779
4780 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4781 RENAME4res *, resp);
4782 }
4783
4784 /* ARGSUSED */
4785 static void
rfs4_op_renew(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)4786 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4787 struct compound_state *cs)
4788 {
4789 RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4790 RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4791 rfs4_client_t *cp;
4792
4793 DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4794 RENEW4args *, args);
4795
4796 if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4797 *cs->statusp = resp->status =
4798 rfs4_check_clientid(&args->clientid, 0);
4799 goto out;
4800 }
4801
4802 if (rfs4_lease_expired(cp)) {
4803 rfs4_client_rele(cp);
4804 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4805 goto out;
4806 }
4807
4808 rfs4_update_lease(cp);
4809
4810 mutex_enter(cp->rc_cbinfo.cb_lock);
4811 if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4812 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4813 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4814 } else {
4815 *cs->statusp = resp->status = NFS4_OK;
4816 }
4817 mutex_exit(cp->rc_cbinfo.cb_lock);
4818
4819 rfs4_client_rele(cp);
4820
4821 out:
4822 DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4823 RENEW4res *, resp);
4824 }
4825
4826 /* ARGSUSED */
4827 static void
rfs4_op_restorefh(nfs_argop4 * args,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)4828 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4829 struct compound_state *cs)
4830 {
4831 RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4832
4833 DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4834
4835 /* No need to check cs->access - we are not accessing any object */
4836 if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4837 *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4838 goto out;
4839 }
4840 if (cs->vp != NULL) {
4841 VN_RELE(cs->vp);
4842 }
4843 cs->vp = cs->saved_vp;
4844 cs->saved_vp = NULL;
4845 cs->exi = cs->saved_exi;
4846 nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4847 *cs->statusp = resp->status = NFS4_OK;
4848 cs->deleg = FALSE;
4849
4850 out:
4851 DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4852 RESTOREFH4res *, resp);
4853 }
4854
4855 /* ARGSUSED */
4856 static void
rfs4_op_savefh(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)4857 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4858 struct compound_state *cs)
4859 {
4860 SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4861
4862 DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4863
4864 /* No need to check cs->access - we are not accessing any object */
4865 if (cs->vp == NULL) {
4866 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4867 goto out;
4868 }
4869 if (cs->saved_vp != NULL) {
4870 VN_RELE(cs->saved_vp);
4871 }
4872 cs->saved_vp = cs->vp;
4873 VN_HOLD(cs->saved_vp);
4874 cs->saved_exi = cs->exi;
4875 /*
4876 * since SAVEFH is fairly rare, don't alloc space for its fh
4877 * unless necessary.
4878 */
4879 if (cs->saved_fh.nfs_fh4_val == NULL) {
4880 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4881 }
4882 nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4883 *cs->statusp = resp->status = NFS4_OK;
4884
4885 out:
4886 DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4887 SAVEFH4res *, resp);
4888 }
4889
4890 /*
4891 * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4892 * return the bitmap of attrs that were set successfully. It is also
4893 * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4894 * always be called only after rfs4_do_set_attrs().
4895 *
4896 * Verify that the attributes are same as the expected ones. sargp->vap
4897 * and sargp->sbp contain the input attributes as translated from fattr4.
4898 *
4899 * This function verifies only the attrs that correspond to a vattr or
4900 * vfsstat struct. That is because of the extra step needed to get the
4901 * corresponding system structs. Other attributes have already been set or
4902 * verified by do_rfs4_set_attrs.
4903 *
4904 * Return 0 if all attrs match, -1 if some don't, error if error processing.
4905 */
4906 static int
rfs4_verify_attr(struct nfs4_svgetit_arg * sargp,bitmap4 * resp,struct nfs4_ntov_table * ntovp)4907 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4908 bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4909 {
4910 int error, ret_error = 0;
4911 int i, k;
4912 uint_t sva_mask = sargp->vap->va_mask;
4913 uint_t vbit;
4914 union nfs4_attr_u *na;
4915 uint8_t *amap;
4916 bool_t getsb = ntovp->vfsstat;
4917
4918 if (sva_mask != 0) {
4919 /*
4920 * Okay to overwrite sargp->vap because we verify based
4921 * on the incoming values.
4922 */
4923 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4924 sargp->cs->cr, NULL);
4925 if (ret_error) {
4926 if (resp == NULL)
4927 return (ret_error);
4928 /*
4929 * Must return bitmap of successful attrs
4930 */
4931 sva_mask = 0; /* to prevent checking vap later */
4932 } else {
4933 /*
4934 * Some file systems clobber va_mask. it is probably
4935 * wrong of them to do so, nonethless we practice
4936 * defensive coding.
4937 * See bug id 4276830.
4938 */
4939 sargp->vap->va_mask = sva_mask;
4940 }
4941 }
4942
4943 if (getsb) {
4944 /*
4945 * Now get the superblock and loop on the bitmap, as there is
4946 * no simple way of translating from superblock to bitmap4.
4947 */
4948 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4949 if (ret_error) {
4950 if (resp == NULL)
4951 goto errout;
4952 getsb = FALSE;
4953 }
4954 }
4955
4956 /*
4957 * Now loop and verify each attribute which getattr returned
4958 * whether it's the same as the input.
4959 */
4960 if (resp == NULL && !getsb && (sva_mask == 0))
4961 goto errout;
4962
4963 na = ntovp->na;
4964 amap = ntovp->amap;
4965 k = 0;
4966 for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4967 k = *amap;
4968 ASSERT(nfs4_ntov_map[k].nval == k);
4969 vbit = nfs4_ntov_map[k].vbit;
4970
4971 /*
4972 * If vattr attribute but VOP_GETATTR failed, or it's
4973 * superblock attribute but VFS_STATVFS failed, skip
4974 */
4975 if (vbit) {
4976 if ((vbit & sva_mask) == 0)
4977 continue;
4978 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4979 continue;
4980 }
4981 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
4982 if (resp != NULL) {
4983 if (error)
4984 ret_error = -1; /* not all match */
4985 else /* update response bitmap */
4986 *resp |= nfs4_ntov_map[k].fbit;
4987 continue;
4988 }
4989 if (error) {
4990 ret_error = -1; /* not all match */
4991 break;
4992 }
4993 }
4994 errout:
4995 return (ret_error);
4996 }
4997
4998 /*
4999 * Decode the attribute to be set/verified. If the attr requires a sys op
5000 * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
5001 * call the sv_getit function for it, because the sys op hasn't yet been done.
5002 * Return 0 for success, error code if failed.
5003 *
5004 * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
5005 */
5006 static int
decode_fattr4_attr(nfs4_attr_cmd_t cmd,struct nfs4_svgetit_arg * sargp,int k,XDR * xdrp,bitmap4 * resp_bval,union nfs4_attr_u * nap)5007 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5008 int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5009 {
5010 int error = 0;
5011 bool_t set_later;
5012
5013 sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5014
5015 if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5016 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5017 /*
5018 * don't verify yet if a vattr or sb dependent attr,
5019 * because we don't have their sys values yet.
5020 * Will be done later.
5021 */
5022 if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5023 /*
5024 * ACLs are a special case, since setting the MODE
5025 * conflicts with setting the ACL. We delay setting
5026 * the ACL until all other attributes have been set.
5027 * The ACL gets set in do_rfs4_op_setattr().
5028 */
5029 if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5030 error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5031 sargp, nap);
5032 if (error) {
5033 xdr_free(nfs4_ntov_map[k].xfunc,
5034 (caddr_t)nap);
5035 }
5036 }
5037 }
5038 } else {
5039 #ifdef DEBUG
5040 cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5041 "decoding attribute %d\n", k);
5042 #endif
5043 error = EINVAL;
5044 }
5045 if (!error && resp_bval && !set_later) {
5046 *resp_bval |= nfs4_ntov_map[k].fbit;
5047 }
5048
5049 return (error);
5050 }
5051
5052 /*
5053 * Set vattr based on incoming fattr4 attrs - used by setattr.
5054 * Set response mask. Ignore any values that are not writable vattr attrs.
5055 */
5056 static nfsstat4
do_rfs4_set_attrs(bitmap4 * resp,fattr4 * fattrp,struct compound_state * cs,struct nfs4_svgetit_arg * sargp,struct nfs4_ntov_table * ntovp,nfs4_attr_cmd_t cmd)5057 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5058 struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5059 nfs4_attr_cmd_t cmd)
5060 {
5061 int error = 0;
5062 int i;
5063 char *attrs = fattrp->attrlist4;
5064 uint32_t attrslen = fattrp->attrlist4_len;
5065 XDR xdr;
5066 nfsstat4 status = NFS4_OK;
5067 vnode_t *vp = cs->vp;
5068 union nfs4_attr_u *na;
5069 uint8_t *amap;
5070
5071 #ifndef lint
5072 /*
5073 * Make sure that maximum attribute number can be expressed as an
5074 * 8 bit quantity.
5075 */
5076 ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5077 #endif
5078
5079 if (vp == NULL) {
5080 if (resp)
5081 *resp = 0;
5082 return (NFS4ERR_NOFILEHANDLE);
5083 }
5084 if (cs->access == CS_ACCESS_DENIED) {
5085 if (resp)
5086 *resp = 0;
5087 return (NFS4ERR_ACCESS);
5088 }
5089
5090 sargp->op = cmd;
5091 sargp->cs = cs;
5092 sargp->flag = 0; /* may be set later */
5093 sargp->vap->va_mask = 0;
5094 sargp->rdattr_error = NFS4_OK;
5095 sargp->rdattr_error_req = FALSE;
5096 /* sargp->sbp is set by the caller */
5097
5098 xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5099
5100 na = ntovp->na;
5101 amap = ntovp->amap;
5102
5103 /*
5104 * The following loop iterates on the nfs4_ntov_map checking
5105 * if the fbit is set in the requested bitmap.
5106 * If set then we process the arguments using the
5107 * rfs4_fattr4 conversion functions to populate the setattr
5108 * vattr and va_mask. Any settable attrs that are not using vattr
5109 * will be set in this loop.
5110 */
5111 for (i = 0; i < nfs4_ntov_map_size; i++) {
5112 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5113 continue;
5114 }
5115 /*
5116 * If setattr, must be a writable attr.
5117 * If verify/nverify, must be a readable attr.
5118 */
5119 if ((error = (*nfs4_ntov_map[i].sv_getit)(
5120 NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5121 /*
5122 * Client tries to set/verify an
5123 * unsupported attribute, tries to set
5124 * a read only attr or verify a write
5125 * only one - error!
5126 */
5127 break;
5128 }
5129 /*
5130 * Decode the attribute to set/verify
5131 */
5132 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5133 &xdr, resp ? resp : NULL, na);
5134 if (error)
5135 break;
5136 *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5137 na++;
5138 (ntovp->attrcnt)++;
5139 if (nfs4_ntov_map[i].vfsstat)
5140 ntovp->vfsstat = TRUE;
5141 }
5142
5143 if (error != 0)
5144 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5145 puterrno4(error));
5146 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
5147 return (status);
5148 }
5149
5150 static nfsstat4
do_rfs4_op_setattr(bitmap4 * resp,fattr4 * fattrp,struct compound_state * cs,stateid4 * stateid)5151 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5152 stateid4 *stateid)
5153 {
5154 int error = 0;
5155 struct nfs4_svgetit_arg sarg;
5156 bool_t trunc;
5157
5158 nfsstat4 status = NFS4_OK;
5159 cred_t *cr = cs->cr;
5160 vnode_t *vp = cs->vp;
5161 struct nfs4_ntov_table ntov;
5162 struct statvfs64 sb;
5163 struct vattr bva;
5164 struct flock64 bf;
5165 int in_crit = 0;
5166 uint_t saved_mask = 0;
5167 caller_context_t ct;
5168
5169 *resp = 0;
5170 sarg.sbp = &sb;
5171 sarg.is_referral = B_FALSE;
5172 nfs4_ntov_table_init(&ntov);
5173 status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5174 NFS4ATTR_SETIT);
5175 if (status != NFS4_OK) {
5176 /*
5177 * failed set attrs
5178 */
5179 goto done;
5180 }
5181 if ((sarg.vap->va_mask == 0) &&
5182 (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5183 /*
5184 * no further work to be done
5185 */
5186 goto done;
5187 }
5188
5189 /*
5190 * If we got a request to set the ACL and the MODE, only
5191 * allow changing VSUID, VSGID, and VSVTX. Attempting
5192 * to change any other bits, along with setting an ACL,
5193 * gives NFS4ERR_INVAL.
5194 */
5195 if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5196 (fattrp->attrmask & FATTR4_MODE_MASK)) {
5197 vattr_t va;
5198
5199 va.va_mask = AT_MODE;
5200 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5201 if (error) {
5202 status = puterrno4(error);
5203 goto done;
5204 }
5205 if ((sarg.vap->va_mode ^ va.va_mode) &
5206 ~(VSUID | VSGID | VSVTX)) {
5207 status = NFS4ERR_INVAL;
5208 goto done;
5209 }
5210 }
5211
5212 /* Check stateid only if size has been set */
5213 if (sarg.vap->va_mask & AT_SIZE) {
5214 trunc = (sarg.vap->va_size == 0);
5215 status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5216 trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5217 if (status != NFS4_OK)
5218 goto done;
5219 } else {
5220 ct.cc_sysid = 0;
5221 ct.cc_pid = 0;
5222 ct.cc_caller_id = nfs4_srv_caller_id;
5223 ct.cc_flags = CC_DONTBLOCK;
5224 }
5225
5226 /* XXX start of possible race with delegations */
5227
5228 /*
5229 * We need to specially handle size changes because it is
5230 * possible for the client to create a file with read-only
5231 * modes, but with the file opened for writing. If the client
5232 * then tries to set the file size, e.g. ftruncate(3C),
5233 * fcntl(F_FREESP), the normal access checking done in
5234 * VOP_SETATTR would prevent the client from doing it even though
5235 * it should be allowed to do so. To get around this, we do the
5236 * access checking for ourselves and use VOP_SPACE which doesn't
5237 * do the access checking.
5238 * Also the client should not be allowed to change the file
5239 * size if there is a conflicting non-blocking mandatory lock in
5240 * the region of the change.
5241 */
5242 if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5243 u_offset_t offset;
5244 ssize_t length;
5245
5246 /*
5247 * ufs_setattr clears AT_SIZE from vap->va_mask, but
5248 * before returning, sarg.vap->va_mask is used to
5249 * generate the setattr reply bitmap. We also clear
5250 * AT_SIZE below before calling VOP_SPACE. For both
5251 * of these cases, the va_mask needs to be saved here
5252 * and restored after calling VOP_SETATTR.
5253 */
5254 saved_mask = sarg.vap->va_mask;
5255
5256 /*
5257 * Check any possible conflict due to NBMAND locks.
5258 * Get into critical region before VOP_GETATTR, so the
5259 * size attribute is valid when checking conflicts.
5260 */
5261 if (nbl_need_check(vp)) {
5262 nbl_start_crit(vp, RW_READER);
5263 in_crit = 1;
5264 }
5265
5266 bva.va_mask = AT_UID|AT_SIZE;
5267 if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5268 status = puterrno4(error);
5269 goto done;
5270 }
5271
5272 if (in_crit) {
5273 if (sarg.vap->va_size < bva.va_size) {
5274 offset = sarg.vap->va_size;
5275 length = bva.va_size - sarg.vap->va_size;
5276 } else {
5277 offset = bva.va_size;
5278 length = sarg.vap->va_size - bva.va_size;
5279 }
5280 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5281 &ct)) {
5282 status = NFS4ERR_LOCKED;
5283 goto done;
5284 }
5285 }
5286
5287 if (crgetuid(cr) == bva.va_uid) {
5288 sarg.vap->va_mask &= ~AT_SIZE;
5289 bf.l_type = F_WRLCK;
5290 bf.l_whence = 0;
5291 bf.l_start = (off64_t)sarg.vap->va_size;
5292 bf.l_len = 0;
5293 bf.l_sysid = 0;
5294 bf.l_pid = 0;
5295 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5296 (offset_t)sarg.vap->va_size, cr, &ct);
5297 }
5298 }
5299
5300 if (!error && sarg.vap->va_mask != 0)
5301 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5302
5303 /* restore va_mask -- ufs_setattr clears AT_SIZE */
5304 if (saved_mask & AT_SIZE)
5305 sarg.vap->va_mask |= AT_SIZE;
5306
5307 /*
5308 * If an ACL was being set, it has been delayed until now,
5309 * in order to set the mode (via the VOP_SETATTR() above) first.
5310 */
5311 if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5312 int i;
5313
5314 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5315 if (ntov.amap[i] == FATTR4_ACL)
5316 break;
5317 if (i < NFS4_MAXNUM_ATTRS) {
5318 error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5319 NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5320 if (error == 0) {
5321 *resp |= FATTR4_ACL_MASK;
5322 } else if (error == ENOTSUP) {
5323 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5324 status = NFS4ERR_ATTRNOTSUPP;
5325 goto done;
5326 }
5327 } else {
5328 NFS4_DEBUG(rfs4_debug,
5329 (CE_NOTE, "do_rfs4_op_setattr: "
5330 "unable to find ACL in fattr4"));
5331 error = EINVAL;
5332 }
5333 }
5334
5335 if (error) {
5336 /* check if a monitor detected a delegation conflict */
5337 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5338 status = NFS4ERR_DELAY;
5339 else
5340 status = puterrno4(error);
5341
5342 /*
5343 * Set the response bitmap when setattr failed.
5344 * If VOP_SETATTR partially succeeded, test by doing a
5345 * VOP_GETATTR on the object and comparing the data
5346 * to the setattr arguments.
5347 */
5348 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5349 } else {
5350 /*
5351 * Force modified metadata out to stable storage.
5352 */
5353 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5354 /*
5355 * Set response bitmap
5356 */
5357 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5358 }
5359
5360 /* Return early and already have a NFSv4 error */
5361 done:
5362 /*
5363 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5364 * conversion sets both readable and writeable NFS4 attrs
5365 * for AT_MTIME and AT_ATIME. The line below masks out
5366 * unrequested attrs from the setattr result bitmap. This
5367 * is placed after the done: label to catch the ATTRNOTSUP
5368 * case.
5369 */
5370 *resp &= fattrp->attrmask;
5371
5372 if (in_crit)
5373 nbl_end_crit(vp);
5374
5375 nfs4_ntov_table_free(&ntov, &sarg);
5376
5377 return (status);
5378 }
5379
5380 /* ARGSUSED */
5381 static void
rfs4_op_setattr(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)5382 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5383 struct compound_state *cs)
5384 {
5385 SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5386 SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5387 bslabel_t *clabel;
5388
5389 DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5390 SETATTR4args *, args);
5391
5392 if (cs->vp == NULL) {
5393 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5394 goto out;
5395 }
5396
5397 /*
5398 * If there is an unshared filesystem mounted on this vnode,
5399 * do not allow to setattr on this vnode.
5400 */
5401 if (vn_ismntpt(cs->vp)) {
5402 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5403 goto out;
5404 }
5405
5406 resp->attrsset = 0;
5407
5408 if (rdonly4(cs->exi, cs->vp, req)) {
5409 *cs->statusp = resp->status = NFS4ERR_ROFS;
5410 goto out;
5411 }
5412
5413 /* check label before setting attributes */
5414 if (is_system_labeled()) {
5415 ASSERT(req->rq_label != NULL);
5416 clabel = req->rq_label;
5417 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5418 "got client label from request(1)",
5419 struct svc_req *, req);
5420 if (!blequal(&l_admin_low->tsl_label, clabel)) {
5421 if (!do_rfs_label_check(clabel, cs->vp,
5422 EQUALITY_CHECK, cs->exi)) {
5423 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5424 goto out;
5425 }
5426 }
5427 }
5428
5429 *cs->statusp = resp->status =
5430 do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5431 &args->stateid);
5432
5433 out:
5434 DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5435 SETATTR4res *, resp);
5436 }
5437
5438 /* ARGSUSED */
5439 static void
rfs4_op_verify(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)5440 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5441 struct compound_state *cs)
5442 {
5443 /*
5444 * verify and nverify are exactly the same, except that nverify
5445 * succeeds when some argument changed, and verify succeeds when
5446 * when none changed.
5447 */
5448
5449 VERIFY4args *args = &argop->nfs_argop4_u.opverify;
5450 VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5451
5452 int error;
5453 struct nfs4_svgetit_arg sarg;
5454 struct statvfs64 sb;
5455 struct nfs4_ntov_table ntov;
5456
5457 DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5458 VERIFY4args *, args);
5459
5460 if (cs->vp == NULL) {
5461 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5462 goto out;
5463 }
5464
5465 sarg.sbp = &sb;
5466 sarg.is_referral = B_FALSE;
5467 nfs4_ntov_table_init(&ntov);
5468 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5469 &sarg, &ntov, NFS4ATTR_VERIT);
5470 if (resp->status != NFS4_OK) {
5471 /*
5472 * do_rfs4_set_attrs will try to verify systemwide attrs,
5473 * so could return -1 for "no match".
5474 */
5475 if (resp->status == -1)
5476 resp->status = NFS4ERR_NOT_SAME;
5477 goto done;
5478 }
5479 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5480 switch (error) {
5481 case 0:
5482 resp->status = NFS4_OK;
5483 break;
5484 case -1:
5485 resp->status = NFS4ERR_NOT_SAME;
5486 break;
5487 default:
5488 resp->status = puterrno4(error);
5489 break;
5490 }
5491 done:
5492 *cs->statusp = resp->status;
5493 nfs4_ntov_table_free(&ntov, &sarg);
5494 out:
5495 DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5496 VERIFY4res *, resp);
5497 }
5498
5499 /* ARGSUSED */
5500 static void
rfs4_op_nverify(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)5501 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5502 struct compound_state *cs)
5503 {
5504 /*
5505 * verify and nverify are exactly the same, except that nverify
5506 * succeeds when some argument changed, and verify succeeds when
5507 * when none changed.
5508 */
5509
5510 NVERIFY4args *args = &argop->nfs_argop4_u.opnverify;
5511 NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5512
5513 int error;
5514 struct nfs4_svgetit_arg sarg;
5515 struct statvfs64 sb;
5516 struct nfs4_ntov_table ntov;
5517
5518 DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5519 NVERIFY4args *, args);
5520
5521 if (cs->vp == NULL) {
5522 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5523 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5524 NVERIFY4res *, resp);
5525 return;
5526 }
5527 sarg.sbp = &sb;
5528 sarg.is_referral = B_FALSE;
5529 nfs4_ntov_table_init(&ntov);
5530 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5531 &sarg, &ntov, NFS4ATTR_VERIT);
5532 if (resp->status != NFS4_OK) {
5533 /*
5534 * do_rfs4_set_attrs will try to verify systemwide attrs,
5535 * so could return -1 for "no match".
5536 */
5537 if (resp->status == -1)
5538 resp->status = NFS4_OK;
5539 goto done;
5540 }
5541 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5542 switch (error) {
5543 case 0:
5544 resp->status = NFS4ERR_SAME;
5545 break;
5546 case -1:
5547 resp->status = NFS4_OK;
5548 break;
5549 default:
5550 resp->status = puterrno4(error);
5551 break;
5552 }
5553 done:
5554 *cs->statusp = resp->status;
5555 nfs4_ntov_table_free(&ntov, &sarg);
5556
5557 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5558 NVERIFY4res *, resp);
5559 }
5560
5561 /*
5562 * XXX - This should live in an NFS header file.
5563 */
5564 #define MAX_IOVECS 12
5565
5566 /* ARGSUSED */
5567 static void
rfs4_op_write(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)5568 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5569 struct compound_state *cs)
5570 {
5571 WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5572 WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5573 int error;
5574 vnode_t *vp;
5575 struct vattr bva;
5576 u_offset_t rlimit;
5577 struct uio uio;
5578 struct iovec iov[MAX_IOVECS];
5579 struct iovec *iovp;
5580 int iovcnt;
5581 int ioflag;
5582 cred_t *savecred, *cr;
5583 bool_t *deleg = &cs->deleg;
5584 nfsstat4 stat;
5585 int in_crit = 0;
5586 caller_context_t ct;
5587
5588 DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5589 WRITE4args *, args);
5590
5591 vp = cs->vp;
5592 if (vp == NULL) {
5593 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5594 goto out;
5595 }
5596 if (cs->access == CS_ACCESS_DENIED) {
5597 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5598 goto out;
5599 }
5600
5601 cr = cs->cr;
5602
5603 if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5604 deleg, TRUE, &ct)) != NFS4_OK) {
5605 *cs->statusp = resp->status = stat;
5606 goto out;
5607 }
5608
5609 /*
5610 * We have to enter the critical region before calling VOP_RWLOCK
5611 * to avoid a deadlock with ufs.
5612 */
5613 if (nbl_need_check(vp)) {
5614 nbl_start_crit(vp, RW_READER);
5615 in_crit = 1;
5616 if (nbl_conflict(vp, NBL_WRITE,
5617 args->offset, args->data_len, 0, &ct)) {
5618 *cs->statusp = resp->status = NFS4ERR_LOCKED;
5619 goto out;
5620 }
5621 }
5622
5623 bva.va_mask = AT_MODE | AT_UID;
5624 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5625
5626 /*
5627 * If we can't get the attributes, then we can't do the
5628 * right access checking. So, we'll fail the request.
5629 */
5630 if (error) {
5631 *cs->statusp = resp->status = puterrno4(error);
5632 goto out;
5633 }
5634
5635 if (rdonly4(cs->exi, cs->vp, req)) {
5636 *cs->statusp = resp->status = NFS4ERR_ROFS;
5637 goto out;
5638 }
5639
5640 if (vp->v_type != VREG) {
5641 *cs->statusp = resp->status =
5642 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5643 goto out;
5644 }
5645
5646 if (crgetuid(cr) != bva.va_uid &&
5647 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5648 *cs->statusp = resp->status = puterrno4(error);
5649 goto out;
5650 }
5651
5652 if (MANDLOCK(vp, bva.va_mode)) {
5653 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5654 goto out;
5655 }
5656
5657 if (args->data_len == 0) {
5658 *cs->statusp = resp->status = NFS4_OK;
5659 resp->count = 0;
5660 resp->committed = args->stable;
5661 resp->writeverf = Write4verf;
5662 goto out;
5663 }
5664
5665 if (args->mblk != NULL) {
5666 mblk_t *m;
5667 uint_t bytes, round_len;
5668
5669 iovcnt = 0;
5670 bytes = 0;
5671 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5672 for (m = args->mblk;
5673 m != NULL && bytes < round_len;
5674 m = m->b_cont) {
5675 iovcnt++;
5676 bytes += MBLKL(m);
5677 }
5678 #ifdef DEBUG
5679 /* should have ended on an mblk boundary */
5680 if (bytes != round_len) {
5681 printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5682 bytes, round_len, args->data_len);
5683 printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5684 (void *)args->mblk, (void *)m);
5685 ASSERT(bytes == round_len);
5686 }
5687 #endif
5688 if (iovcnt <= MAX_IOVECS) {
5689 iovp = iov;
5690 } else {
5691 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5692 }
5693 mblk_to_iov(args->mblk, iovcnt, iovp);
5694 } else if (args->rlist != NULL) {
5695 iovcnt = 1;
5696 iovp = iov;
5697 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5698 iovp->iov_len = args->data_len;
5699 } else {
5700 iovcnt = 1;
5701 iovp = iov;
5702 iovp->iov_base = args->data_val;
5703 iovp->iov_len = args->data_len;
5704 }
5705
5706 uio.uio_iov = iovp;
5707 uio.uio_iovcnt = iovcnt;
5708
5709 uio.uio_segflg = UIO_SYSSPACE;
5710 uio.uio_extflg = UIO_COPY_DEFAULT;
5711 uio.uio_loffset = args->offset;
5712 uio.uio_resid = args->data_len;
5713 uio.uio_llimit = curproc->p_fsz_ctl;
5714 rlimit = uio.uio_llimit - args->offset;
5715 if (rlimit < (u_offset_t)uio.uio_resid)
5716 uio.uio_resid = (int)rlimit;
5717
5718 if (args->stable == UNSTABLE4)
5719 ioflag = 0;
5720 else if (args->stable == FILE_SYNC4)
5721 ioflag = FSYNC;
5722 else if (args->stable == DATA_SYNC4)
5723 ioflag = FDSYNC;
5724 else {
5725 if (iovp != iov)
5726 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5727 *cs->statusp = resp->status = NFS4ERR_INVAL;
5728 goto out;
5729 }
5730
5731 /*
5732 * We're changing creds because VM may fault and we need
5733 * the cred of the current thread to be used if quota
5734 * checking is enabled.
5735 */
5736 savecred = curthread->t_cred;
5737 curthread->t_cred = cr;
5738 error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5739 curthread->t_cred = savecred;
5740
5741 if (iovp != iov)
5742 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5743
5744 if (error) {
5745 *cs->statusp = resp->status = puterrno4(error);
5746 goto out;
5747 }
5748
5749 *cs->statusp = resp->status = NFS4_OK;
5750 resp->count = args->data_len - uio.uio_resid;
5751
5752 if (ioflag == 0)
5753 resp->committed = UNSTABLE4;
5754 else
5755 resp->committed = FILE_SYNC4;
5756
5757 resp->writeverf = Write4verf;
5758
5759 out:
5760 if (in_crit)
5761 nbl_end_crit(vp);
5762
5763 DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5764 WRITE4res *, resp);
5765 }
5766
5767
5768 /* XXX put in a header file */
5769 extern int sec_svc_getcred(struct svc_req *, cred_t *, caddr_t *, int *);
5770
5771 void
rfs4_compound(COMPOUND4args * args,COMPOUND4res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,int * rv)5772 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5773 struct svc_req *req, cred_t *cr, int *rv)
5774 {
5775 uint_t i;
5776 struct compound_state cs;
5777
5778 if (rv != NULL)
5779 *rv = 0;
5780 rfs4_init_compound_state(&cs);
5781 /*
5782 * Form a reply tag by copying over the reqeuest tag.
5783 */
5784 resp->tag.utf8string_val =
5785 kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5786 resp->tag.utf8string_len = args->tag.utf8string_len;
5787 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5788 resp->tag.utf8string_len);
5789
5790 cs.statusp = &resp->status;
5791 cs.req = req;
5792
5793 /*
5794 * XXX for now, minorversion should be zero
5795 */
5796 if (args->minorversion != NFS4_MINORVERSION) {
5797 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5798 &cs, COMPOUND4args *, args);
5799 resp->array_len = 0;
5800 resp->array = NULL;
5801 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5802 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5803 &cs, COMPOUND4res *, resp);
5804 return;
5805 }
5806
5807 ASSERT(exi == NULL);
5808 ASSERT(cr == NULL);
5809
5810 cr = crget();
5811 ASSERT(cr != NULL);
5812
5813 if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5814 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5815 &cs, COMPOUND4args *, args);
5816 crfree(cr);
5817 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5818 &cs, COMPOUND4res *, resp);
5819 svcerr_badcred(req->rq_xprt);
5820 if (rv != NULL)
5821 *rv = 1;
5822 return;
5823 }
5824 resp->array_len = args->array_len;
5825 resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5826 KM_SLEEP);
5827
5828 cs.basecr = cr;
5829
5830 DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5831 COMPOUND4args *, args);
5832
5833 /*
5834 * For now, NFS4 compound processing must be protected by
5835 * exported_lock because it can access more than one exportinfo
5836 * per compound and share/unshare can now change multiple
5837 * exinfo structs. The NFS2/3 code only refs 1 exportinfo
5838 * per proc (excluding public exinfo), and exi_count design
5839 * is sufficient to protect concurrent execution of NFS2/3
5840 * ops along with unexport. This lock will be removed as
5841 * part of the NFSv4 phase 2 namespace redesign work.
5842 */
5843 rw_enter(&exported_lock, RW_READER);
5844
5845 /*
5846 * If this is the first compound we've seen, we need to start all
5847 * new instances' grace periods.
5848 */
5849 if (rfs4_seen_first_compound == 0) {
5850 rfs4_grace_start_new();
5851 /*
5852 * This must be set after rfs4_grace_start_new(), otherwise
5853 * another thread could proceed past here before the former
5854 * is finished.
5855 */
5856 rfs4_seen_first_compound = 1;
5857 }
5858
5859 for (i = 0; i < args->array_len && cs.cont; i++) {
5860 nfs_argop4 *argop;
5861 nfs_resop4 *resop;
5862 uint_t op;
5863
5864 argop = &args->array[i];
5865 resop = &resp->array[i];
5866 resop->resop = argop->argop;
5867 op = (uint_t)resop->resop;
5868
5869 if (op < rfsv4disp_cnt) {
5870 /*
5871 * Count the individual ops here; NULL and COMPOUND
5872 * are counted in common_dispatch()
5873 */
5874 rfsproccnt_v4_ptr[op].value.ui64++;
5875
5876 NFS4_DEBUG(rfs4_debug > 1,
5877 (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5878 (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5879 NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5880 rfs4_op_string[op], *cs.statusp));
5881 if (*cs.statusp != NFS4_OK)
5882 cs.cont = FALSE;
5883 } else {
5884 /*
5885 * This is effectively dead code since XDR code
5886 * will have already returned BADXDR if op doesn't
5887 * decode to legal value. This only done for a
5888 * day when XDR code doesn't verify v4 opcodes.
5889 */
5890 op = OP_ILLEGAL;
5891 rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5892
5893 rfs4_op_illegal(argop, resop, req, &cs);
5894 cs.cont = FALSE;
5895 }
5896
5897 /*
5898 * If not at last op, and if we are to stop, then
5899 * compact the results array.
5900 */
5901 if ((i + 1) < args->array_len && !cs.cont) {
5902 nfs_resop4 *new_res = kmem_alloc(
5903 (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5904 bcopy(resp->array,
5905 new_res, (i+1) * sizeof (nfs_resop4));
5906 kmem_free(resp->array,
5907 args->array_len * sizeof (nfs_resop4));
5908
5909 resp->array_len = i + 1;
5910 resp->array = new_res;
5911 }
5912 }
5913
5914 rw_exit(&exported_lock);
5915
5916 DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5917 COMPOUND4res *, resp);
5918
5919 if (cs.vp)
5920 VN_RELE(cs.vp);
5921 if (cs.saved_vp)
5922 VN_RELE(cs.saved_vp);
5923 if (cs.saved_fh.nfs_fh4_val)
5924 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5925
5926 if (cs.basecr)
5927 crfree(cs.basecr);
5928 if (cs.cr)
5929 crfree(cs.cr);
5930 /*
5931 * done with this compound request, free the label
5932 */
5933
5934 if (req->rq_label != NULL) {
5935 kmem_free(req->rq_label, sizeof (bslabel_t));
5936 req->rq_label = NULL;
5937 }
5938 }
5939
5940 /*
5941 * XXX because of what appears to be duplicate calls to rfs4_compound_free
5942 * XXX zero out the tag and array values. Need to investigate why the
5943 * XXX calls occur, but at least prevent the panic for now.
5944 */
5945 void
rfs4_compound_free(COMPOUND4res * resp)5946 rfs4_compound_free(COMPOUND4res *resp)
5947 {
5948 uint_t i;
5949
5950 if (resp->tag.utf8string_val) {
5951 UTF8STRING_FREE(resp->tag)
5952 }
5953
5954 for (i = 0; i < resp->array_len; i++) {
5955 nfs_resop4 *resop;
5956 uint_t op;
5957
5958 resop = &resp->array[i];
5959 op = (uint_t)resop->resop;
5960 if (op < rfsv4disp_cnt) {
5961 (*rfsv4disptab[op].dis_resfree)(resop);
5962 }
5963 }
5964 if (resp->array != NULL) {
5965 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5966 }
5967 }
5968
5969 /*
5970 * Process the value of the compound request rpc flags, as a bit-AND
5971 * of the individual per-op flags (idempotent, allowork, publicfh_ok)
5972 */
5973 void
rfs4_compound_flagproc(COMPOUND4args * args,int * flagp)5974 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
5975 {
5976 int i;
5977 int flag = RPC_ALL;
5978
5979 for (i = 0; flag && i < args->array_len; i++) {
5980 uint_t op;
5981
5982 op = (uint_t)args->array[i].argop;
5983
5984 if (op < rfsv4disp_cnt)
5985 flag &= rfsv4disptab[op].dis_flags;
5986 else
5987 flag = 0;
5988 }
5989 *flagp = flag;
5990 }
5991
5992 nfsstat4
rfs4_client_sysid(rfs4_client_t * cp,sysid_t * sp)5993 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
5994 {
5995 nfsstat4 e;
5996
5997 rfs4_dbe_lock(cp->rc_dbe);
5998
5999 if (cp->rc_sysidt != LM_NOSYSID) {
6000 *sp = cp->rc_sysidt;
6001 e = NFS4_OK;
6002
6003 } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6004 *sp = cp->rc_sysidt;
6005 e = NFS4_OK;
6006
6007 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6008 "rfs4_client_sysid: allocated 0x%x\n", *sp));
6009 } else
6010 e = NFS4ERR_DELAY;
6011
6012 rfs4_dbe_unlock(cp->rc_dbe);
6013 return (e);
6014 }
6015
6016 #if defined(DEBUG) && ! defined(lint)
lock_print(char * str,int operation,struct flock64 * flk)6017 static void lock_print(char *str, int operation, struct flock64 *flk)
6018 {
6019 char *op, *type;
6020
6021 switch (operation) {
6022 case F_GETLK: op = "F_GETLK";
6023 break;
6024 case F_SETLK: op = "F_SETLK";
6025 break;
6026 case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6027 break;
6028 default: op = "F_UNKNOWN";
6029 break;
6030 }
6031 switch (flk->l_type) {
6032 case F_UNLCK: type = "F_UNLCK";
6033 break;
6034 case F_RDLCK: type = "F_RDLCK";
6035 break;
6036 case F_WRLCK: type = "F_WRLCK";
6037 break;
6038 default: type = "F_UNKNOWN";
6039 break;
6040 }
6041
6042 ASSERT(flk->l_whence == 0);
6043 cmn_err(CE_NOTE, "%s: %s, type = %s, off = %llx len = %llx pid = %d",
6044 str, op, type, (longlong_t)flk->l_start,
6045 flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6046 }
6047
6048 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6049 #else
6050 #define LOCK_PRINT(d, s, t, f)
6051 #endif
6052
6053 /*ARGSUSED*/
6054 static bool_t
creds_ok(cred_set_t cr_set,struct svc_req * req,struct compound_state * cs)6055 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
6056 {
6057 return (TRUE);
6058 }
6059
6060 /*
6061 * Look up the pathname using the vp in cs as the directory vnode.
6062 * cs->vp will be the vnode for the file on success
6063 */
6064
6065 static nfsstat4
rfs4_lookup(component4 * component,struct svc_req * req,struct compound_state * cs)6066 rfs4_lookup(component4 *component, struct svc_req *req,
6067 struct compound_state *cs)
6068 {
6069 char *nm;
6070 uint32_t len;
6071 nfsstat4 status;
6072 struct sockaddr *ca;
6073 char *name;
6074
6075 if (cs->vp == NULL) {
6076 return (NFS4ERR_NOFILEHANDLE);
6077 }
6078 if (cs->vp->v_type != VDIR) {
6079 return (NFS4ERR_NOTDIR);
6080 }
6081
6082 if (!utf8_dir_verify(component))
6083 return (NFS4ERR_INVAL);
6084
6085 nm = utf8_to_fn(component, &len, NULL);
6086 if (nm == NULL) {
6087 return (NFS4ERR_INVAL);
6088 }
6089
6090 if (len > MAXNAMELEN) {
6091 kmem_free(nm, len);
6092 return (NFS4ERR_NAMETOOLONG);
6093 }
6094
6095 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6096 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6097 MAXPATHLEN + 1);
6098
6099 if (name == NULL) {
6100 kmem_free(nm, len);
6101 return (NFS4ERR_INVAL);
6102 }
6103
6104 status = do_rfs4_op_lookup(name, req, cs);
6105
6106 if (name != nm)
6107 kmem_free(name, MAXPATHLEN + 1);
6108
6109 kmem_free(nm, len);
6110
6111 return (status);
6112 }
6113
6114 static nfsstat4
rfs4_lookupfile(component4 * component,struct svc_req * req,struct compound_state * cs,uint32_t access,change_info4 * cinfo)6115 rfs4_lookupfile(component4 *component, struct svc_req *req,
6116 struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6117 {
6118 nfsstat4 status;
6119 vnode_t *dvp = cs->vp;
6120 vattr_t bva, ava, fva;
6121 int error;
6122
6123 /* Get "before" change value */
6124 bva.va_mask = AT_CTIME|AT_SEQ;
6125 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6126 if (error)
6127 return (puterrno4(error));
6128
6129 /* rfs4_lookup may VN_RELE directory */
6130 VN_HOLD(dvp);
6131
6132 status = rfs4_lookup(component, req, cs);
6133 if (status != NFS4_OK) {
6134 VN_RELE(dvp);
6135 return (status);
6136 }
6137
6138 /*
6139 * Get "after" change value, if it fails, simply return the
6140 * before value.
6141 */
6142 ava.va_mask = AT_CTIME|AT_SEQ;
6143 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6144 ava.va_ctime = bva.va_ctime;
6145 ava.va_seq = 0;
6146 }
6147 VN_RELE(dvp);
6148
6149 /*
6150 * Validate the file is a file
6151 */
6152 fva.va_mask = AT_TYPE|AT_MODE;
6153 error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6154 if (error)
6155 return (puterrno4(error));
6156
6157 if (fva.va_type != VREG) {
6158 if (fva.va_type == VDIR)
6159 return (NFS4ERR_ISDIR);
6160 if (fva.va_type == VLNK)
6161 return (NFS4ERR_SYMLINK);
6162 return (NFS4ERR_INVAL);
6163 }
6164
6165 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6166 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6167
6168 /*
6169 * It is undefined if VOP_LOOKUP will change va_seq, so
6170 * cinfo.atomic = TRUE only if we have
6171 * non-zero va_seq's, and they have not changed.
6172 */
6173 if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6174 cinfo->atomic = TRUE;
6175 else
6176 cinfo->atomic = FALSE;
6177
6178 /* Check for mandatory locking */
6179 cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6180 return (check_open_access(access, cs, req));
6181 }
6182
6183 static nfsstat4
create_vnode(vnode_t * dvp,char * nm,vattr_t * vap,createmode4 mode,timespec32_t * mtime,cred_t * cr,vnode_t ** vpp,bool_t * created)6184 create_vnode(vnode_t *dvp, char *nm, vattr_t *vap, createmode4 mode,
6185 timespec32_t *mtime, cred_t *cr, vnode_t **vpp, bool_t *created)
6186 {
6187 int error;
6188 nfsstat4 status = NFS4_OK;
6189 vattr_t va;
6190
6191 tryagain:
6192
6193 /*
6194 * The file open mode used is VWRITE. If the client needs
6195 * some other semantic, then it should do the access checking
6196 * itself. It would have been nice to have the file open mode
6197 * passed as part of the arguments.
6198 */
6199
6200 *created = TRUE;
6201 error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6202
6203 if (error) {
6204 *created = FALSE;
6205
6206 /*
6207 * If we got something other than file already exists
6208 * then just return this error. Otherwise, we got
6209 * EEXIST. If we were doing a GUARDED create, then
6210 * just return this error. Otherwise, we need to
6211 * make sure that this wasn't a duplicate of an
6212 * exclusive create request.
6213 *
6214 * The assumption is made that a non-exclusive create
6215 * request will never return EEXIST.
6216 */
6217
6218 if (error != EEXIST || mode == GUARDED4) {
6219 status = puterrno4(error);
6220 return (status);
6221 }
6222 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6223 NULL, NULL, NULL);
6224
6225 if (error) {
6226 /*
6227 * We couldn't find the file that we thought that
6228 * we just created. So, we'll just try creating
6229 * it again.
6230 */
6231 if (error == ENOENT)
6232 goto tryagain;
6233
6234 status = puterrno4(error);
6235 return (status);
6236 }
6237
6238 if (mode == UNCHECKED4) {
6239 /* existing object must be regular file */
6240 if ((*vpp)->v_type != VREG) {
6241 if ((*vpp)->v_type == VDIR)
6242 status = NFS4ERR_ISDIR;
6243 else if ((*vpp)->v_type == VLNK)
6244 status = NFS4ERR_SYMLINK;
6245 else
6246 status = NFS4ERR_INVAL;
6247 VN_RELE(*vpp);
6248 return (status);
6249 }
6250
6251 return (NFS4_OK);
6252 }
6253
6254 /* Check for duplicate request */
6255 ASSERT(mtime != 0);
6256 va.va_mask = AT_MTIME;
6257 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6258 if (!error) {
6259 /* We found the file */
6260 if (va.va_mtime.tv_sec != mtime->tv_sec ||
6261 va.va_mtime.tv_nsec != mtime->tv_nsec) {
6262 /* but its not our creation */
6263 VN_RELE(*vpp);
6264 return (NFS4ERR_EXIST);
6265 }
6266 *created = TRUE; /* retrans of create == created */
6267 return (NFS4_OK);
6268 }
6269 VN_RELE(*vpp);
6270 return (NFS4ERR_EXIST);
6271 }
6272
6273 return (NFS4_OK);
6274 }
6275
6276 static nfsstat4
check_open_access(uint32_t access,struct compound_state * cs,struct svc_req * req)6277 check_open_access(uint32_t access, struct compound_state *cs,
6278 struct svc_req *req)
6279 {
6280 int error;
6281 vnode_t *vp;
6282 bool_t readonly;
6283 cred_t *cr = cs->cr;
6284
6285 /* For now we don't allow mandatory locking as per V2/V3 */
6286 if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6287 return (NFS4ERR_ACCESS);
6288 }
6289
6290 vp = cs->vp;
6291 ASSERT(cr != NULL && vp->v_type == VREG);
6292
6293 /*
6294 * If the file system is exported read only and we are trying
6295 * to open for write, then return NFS4ERR_ROFS
6296 */
6297
6298 readonly = rdonly4(cs->exi, cs->vp, req);
6299
6300 if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6301 return (NFS4ERR_ROFS);
6302
6303 if (access & OPEN4_SHARE_ACCESS_READ) {
6304 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6305 (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6306 return (NFS4ERR_ACCESS);
6307 }
6308 }
6309
6310 if (access & OPEN4_SHARE_ACCESS_WRITE) {
6311 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6312 if (error)
6313 return (NFS4ERR_ACCESS);
6314 }
6315
6316 return (NFS4_OK);
6317 }
6318
6319 static nfsstat4
rfs4_createfile(OPEN4args * args,struct svc_req * req,struct compound_state * cs,change_info4 * cinfo,bitmap4 * attrset,clientid4 clientid)6320 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6321 change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6322 {
6323 struct nfs4_svgetit_arg sarg;
6324 struct nfs4_ntov_table ntov;
6325
6326 bool_t ntov_table_init = FALSE;
6327 struct statvfs64 sb;
6328 nfsstat4 status;
6329 vnode_t *vp;
6330 vattr_t bva, ava, iva, cva, *vap;
6331 vnode_t *dvp;
6332 timespec32_t *mtime;
6333 char *nm = NULL;
6334 uint_t buflen;
6335 bool_t created;
6336 bool_t setsize = FALSE;
6337 len_t reqsize;
6338 int error;
6339 bool_t trunc;
6340 caller_context_t ct;
6341 component4 *component;
6342 bslabel_t *clabel;
6343 struct sockaddr *ca;
6344 char *name = NULL;
6345
6346 sarg.sbp = &sb;
6347 sarg.is_referral = B_FALSE;
6348
6349 dvp = cs->vp;
6350
6351 /* Check if the file system is read only */
6352 if (rdonly4(cs->exi, dvp, req))
6353 return (NFS4ERR_ROFS);
6354
6355 /* check the label of including directory */
6356 if (is_system_labeled()) {
6357 ASSERT(req->rq_label != NULL);
6358 clabel = req->rq_label;
6359 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6360 "got client label from request(1)",
6361 struct svc_req *, req);
6362 if (!blequal(&l_admin_low->tsl_label, clabel)) {
6363 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6364 cs->exi)) {
6365 return (NFS4ERR_ACCESS);
6366 }
6367 }
6368 }
6369
6370 /*
6371 * Get the last component of path name in nm. cs will reference
6372 * the including directory on success.
6373 */
6374 component = &args->open_claim4_u.file;
6375 if (!utf8_dir_verify(component))
6376 return (NFS4ERR_INVAL);
6377
6378 nm = utf8_to_fn(component, &buflen, NULL);
6379
6380 if (nm == NULL)
6381 return (NFS4ERR_RESOURCE);
6382
6383 if (buflen > MAXNAMELEN) {
6384 kmem_free(nm, buflen);
6385 return (NFS4ERR_NAMETOOLONG);
6386 }
6387
6388 bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6389 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6390 if (error) {
6391 kmem_free(nm, buflen);
6392 return (puterrno4(error));
6393 }
6394
6395 if (bva.va_type != VDIR) {
6396 kmem_free(nm, buflen);
6397 return (NFS4ERR_NOTDIR);
6398 }
6399
6400 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6401
6402 switch (args->mode) {
6403 case GUARDED4:
6404 /*FALLTHROUGH*/
6405 case UNCHECKED4:
6406 nfs4_ntov_table_init(&ntov);
6407 ntov_table_init = TRUE;
6408
6409 *attrset = 0;
6410 status = do_rfs4_set_attrs(attrset,
6411 &args->createhow4_u.createattrs,
6412 cs, &sarg, &ntov, NFS4ATTR_SETIT);
6413
6414 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6415 sarg.vap->va_type != VREG) {
6416 if (sarg.vap->va_type == VDIR)
6417 status = NFS4ERR_ISDIR;
6418 else if (sarg.vap->va_type == VLNK)
6419 status = NFS4ERR_SYMLINK;
6420 else
6421 status = NFS4ERR_INVAL;
6422 }
6423
6424 if (status != NFS4_OK) {
6425 kmem_free(nm, buflen);
6426 nfs4_ntov_table_free(&ntov, &sarg);
6427 *attrset = 0;
6428 return (status);
6429 }
6430
6431 vap = sarg.vap;
6432 vap->va_type = VREG;
6433 vap->va_mask |= AT_TYPE;
6434
6435 if ((vap->va_mask & AT_MODE) == 0) {
6436 vap->va_mask |= AT_MODE;
6437 vap->va_mode = (mode_t)0600;
6438 }
6439
6440 if (vap->va_mask & AT_SIZE) {
6441
6442 /* Disallow create with a non-zero size */
6443
6444 if ((reqsize = sarg.vap->va_size) != 0) {
6445 kmem_free(nm, buflen);
6446 nfs4_ntov_table_free(&ntov, &sarg);
6447 *attrset = 0;
6448 return (NFS4ERR_INVAL);
6449 }
6450 setsize = TRUE;
6451 }
6452 break;
6453
6454 case EXCLUSIVE4:
6455 /* prohibit EXCL create of named attributes */
6456 if (dvp->v_flag & V_XATTRDIR) {
6457 kmem_free(nm, buflen);
6458 *attrset = 0;
6459 return (NFS4ERR_INVAL);
6460 }
6461
6462 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6463 cva.va_type = VREG;
6464 /*
6465 * Ensure no time overflows. Assumes underlying
6466 * filesystem supports at least 32 bits.
6467 * Truncate nsec to usec resolution to allow valid
6468 * compares even if the underlying filesystem truncates.
6469 */
6470 mtime = (timespec32_t *)&args->createhow4_u.createverf;
6471 cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6472 cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6473 cva.va_mode = (mode_t)0;
6474 vap = &cva;
6475
6476 /*
6477 * For EXCL create, attrset is set to the server attr
6478 * used to cache the client's verifier.
6479 */
6480 *attrset = FATTR4_TIME_MODIFY_MASK;
6481 break;
6482 }
6483
6484 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6485 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6486 MAXPATHLEN + 1);
6487
6488 if (name == NULL) {
6489 kmem_free(nm, buflen);
6490 return (NFS4ERR_SERVERFAULT);
6491 }
6492
6493 status = create_vnode(dvp, name, vap, args->mode, mtime,
6494 cs->cr, &vp, &created);
6495 if (nm != name)
6496 kmem_free(name, MAXPATHLEN + 1);
6497 kmem_free(nm, buflen);
6498
6499 if (status != NFS4_OK) {
6500 if (ntov_table_init)
6501 nfs4_ntov_table_free(&ntov, &sarg);
6502 *attrset = 0;
6503 return (status);
6504 }
6505
6506 trunc = (setsize && !created);
6507
6508 if (args->mode != EXCLUSIVE4) {
6509 bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6510
6511 /*
6512 * True verification that object was created with correct
6513 * attrs is impossible. The attrs could have been changed
6514 * immediately after object creation. If attributes did
6515 * not verify, the only recourse for the server is to
6516 * destroy the object. Maybe if some attrs (like gid)
6517 * are set incorrectly, the object should be destroyed;
6518 * however, seems bad as a default policy. Do we really
6519 * want to destroy an object over one of the times not
6520 * verifying correctly? For these reasons, the server
6521 * currently sets bits in attrset for createattrs
6522 * that were set; however, no verification is done.
6523 *
6524 * vmask_to_nmask accounts for vattr bits set on create
6525 * [do_rfs4_set_attrs() only sets resp bits for
6526 * non-vattr/vfs bits.]
6527 * Mask off any bits we set by default so as not to return
6528 * more attrset bits than were requested in createattrs
6529 */
6530 if (created) {
6531 nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6532 *attrset &= createmask;
6533 } else {
6534 /*
6535 * We did not create the vnode (we tried but it
6536 * already existed). In this case, the only createattr
6537 * that the spec allows the server to set is size,
6538 * and even then, it can only be set if it is 0.
6539 */
6540 *attrset = 0;
6541 if (trunc)
6542 *attrset = FATTR4_SIZE_MASK;
6543 }
6544 }
6545 if (ntov_table_init)
6546 nfs4_ntov_table_free(&ntov, &sarg);
6547
6548 /*
6549 * Get the initial "after" sequence number, if it fails,
6550 * set to zero, time to before.
6551 */
6552 iva.va_mask = AT_CTIME|AT_SEQ;
6553 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6554 iva.va_seq = 0;
6555 iva.va_ctime = bva.va_ctime;
6556 }
6557
6558 /*
6559 * create_vnode attempts to create the file exclusive,
6560 * if it already exists the VOP_CREATE will fail and
6561 * may not increase va_seq. It is atomic if
6562 * we haven't changed the directory, but if it has changed
6563 * we don't know what changed it.
6564 */
6565 if (!created) {
6566 if (bva.va_seq && iva.va_seq &&
6567 bva.va_seq == iva.va_seq)
6568 cinfo->atomic = TRUE;
6569 else
6570 cinfo->atomic = FALSE;
6571 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6572 } else {
6573 /*
6574 * The entry was created, we need to sync the
6575 * directory metadata.
6576 */
6577 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6578
6579 /*
6580 * Get "after" change value, if it fails, simply return the
6581 * before value.
6582 */
6583 ava.va_mask = AT_CTIME|AT_SEQ;
6584 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6585 ava.va_ctime = bva.va_ctime;
6586 ava.va_seq = 0;
6587 }
6588
6589 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6590
6591 /*
6592 * The cinfo->atomic = TRUE only if we have
6593 * non-zero va_seq's, and it has incremented by exactly one
6594 * during the create_vnode and it didn't
6595 * change during the VOP_FSYNC.
6596 */
6597 if (bva.va_seq && iva.va_seq && ava.va_seq &&
6598 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6599 cinfo->atomic = TRUE;
6600 else
6601 cinfo->atomic = FALSE;
6602 }
6603
6604 /* Check for mandatory locking and that the size gets set. */
6605 cva.va_mask = AT_MODE;
6606 if (setsize)
6607 cva.va_mask |= AT_SIZE;
6608
6609 /* Assume the worst */
6610 cs->mandlock = TRUE;
6611
6612 if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6613 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6614
6615 /*
6616 * Truncate the file if necessary; this would be
6617 * the case for create over an existing file.
6618 */
6619
6620 if (trunc) {
6621 int in_crit = 0;
6622 rfs4_file_t *fp;
6623 bool_t create = FALSE;
6624
6625 /*
6626 * We are writing over an existing file.
6627 * Check to see if we need to recall a delegation.
6628 */
6629 rfs4_hold_deleg_policy();
6630 if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6631 if (rfs4_check_delegated_byfp(FWRITE, fp,
6632 (reqsize == 0), FALSE, FALSE, &clientid)) {
6633 rfs4_file_rele(fp);
6634 rfs4_rele_deleg_policy();
6635 VN_RELE(vp);
6636 *attrset = 0;
6637 return (NFS4ERR_DELAY);
6638 }
6639 rfs4_file_rele(fp);
6640 }
6641 rfs4_rele_deleg_policy();
6642
6643 if (nbl_need_check(vp)) {
6644 in_crit = 1;
6645
6646 ASSERT(reqsize == 0);
6647
6648 nbl_start_crit(vp, RW_READER);
6649 if (nbl_conflict(vp, NBL_WRITE, 0,
6650 cva.va_size, 0, NULL)) {
6651 in_crit = 0;
6652 nbl_end_crit(vp);
6653 VN_RELE(vp);
6654 *attrset = 0;
6655 return (NFS4ERR_ACCESS);
6656 }
6657 }
6658 ct.cc_sysid = 0;
6659 ct.cc_pid = 0;
6660 ct.cc_caller_id = nfs4_srv_caller_id;
6661 ct.cc_flags = CC_DONTBLOCK;
6662
6663 cva.va_mask = AT_SIZE;
6664 cva.va_size = reqsize;
6665 (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6666 if (in_crit)
6667 nbl_end_crit(vp);
6668 }
6669 }
6670
6671 error = makefh4(&cs->fh, vp, cs->exi);
6672
6673 /*
6674 * Force modified data and metadata out to stable storage.
6675 */
6676 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6677
6678 if (error) {
6679 VN_RELE(vp);
6680 *attrset = 0;
6681 return (puterrno4(error));
6682 }
6683
6684 /* if parent dir is attrdir, set namedattr fh flag */
6685 if (dvp->v_flag & V_XATTRDIR)
6686 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6687
6688 if (cs->vp)
6689 VN_RELE(cs->vp);
6690
6691 cs->vp = vp;
6692
6693 /*
6694 * if we did not create the file, we will need to check
6695 * the access bits on the file
6696 */
6697
6698 if (!created) {
6699 if (setsize)
6700 args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6701 status = check_open_access(args->share_access, cs, req);
6702 if (status != NFS4_OK)
6703 *attrset = 0;
6704 }
6705 return (status);
6706 }
6707
6708 /*ARGSUSED*/
6709 static void
rfs4_do_open(struct compound_state * cs,struct svc_req * req,rfs4_openowner_t * oo,delegreq_t deleg,uint32_t access,uint32_t deny,OPEN4res * resp,int deleg_cur)6710 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6711 rfs4_openowner_t *oo, delegreq_t deleg,
6712 uint32_t access, uint32_t deny,
6713 OPEN4res *resp, int deleg_cur)
6714 {
6715 /* XXX Currently not using req */
6716 rfs4_state_t *sp;
6717 rfs4_file_t *fp;
6718 bool_t screate = TRUE;
6719 bool_t fcreate = TRUE;
6720 uint32_t open_a, share_a;
6721 uint32_t open_d, share_d;
6722 rfs4_deleg_state_t *dsp;
6723 sysid_t sysid;
6724 nfsstat4 status;
6725 caller_context_t ct;
6726 int fflags = 0;
6727 int recall = 0;
6728 int err;
6729 int first_open;
6730
6731 /* get the file struct and hold a lock on it during initial open */
6732 fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6733 if (fp == NULL) {
6734 resp->status = NFS4ERR_RESOURCE;
6735 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6736 return;
6737 }
6738
6739 sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6740 if (sp == NULL) {
6741 resp->status = NFS4ERR_RESOURCE;
6742 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6743 /* No need to keep any reference */
6744 rw_exit(&fp->rf_file_rwlock);
6745 rfs4_file_rele(fp);
6746 return;
6747 }
6748
6749 /* try to get the sysid before continuing */
6750 if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6751 resp->status = status;
6752 rfs4_file_rele(fp);
6753 /* Not a fully formed open; "close" it */
6754 if (screate == TRUE)
6755 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6756 rfs4_state_rele(sp);
6757 return;
6758 }
6759
6760 /* Calculate the fflags for this OPEN. */
6761 if (access & OPEN4_SHARE_ACCESS_READ)
6762 fflags |= FREAD;
6763 if (access & OPEN4_SHARE_ACCESS_WRITE)
6764 fflags |= FWRITE;
6765
6766 rfs4_dbe_lock(sp->rs_dbe);
6767
6768 /*
6769 * Calculate the new deny and access mode that this open is adding to
6770 * the file for this open owner;
6771 */
6772 open_d = (deny & ~sp->rs_open_deny);
6773 open_a = (access & ~sp->rs_open_access);
6774
6775 /*
6776 * Calculate the new share access and share deny modes that this open
6777 * is adding to the file for this open owner;
6778 */
6779 share_a = (access & ~sp->rs_share_access);
6780 share_d = (deny & ~sp->rs_share_deny);
6781
6782 first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6783
6784 /*
6785 * Check to see the client has already sent an open for this
6786 * open owner on this file with the same share/deny modes.
6787 * If so, we don't need to check for a conflict and we don't
6788 * need to add another shrlock. If not, then we need to
6789 * check for conflicts in deny and access before checking for
6790 * conflicts in delegation. We don't want to recall a
6791 * delegation based on an open that will eventually fail based
6792 * on shares modes.
6793 */
6794
6795 if (share_a || share_d) {
6796 if ((err = rfs4_share(sp, access, deny)) != 0) {
6797 rfs4_dbe_unlock(sp->rs_dbe);
6798 resp->status = err;
6799
6800 rfs4_file_rele(fp);
6801 /* Not a fully formed open; "close" it */
6802 if (screate == TRUE)
6803 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6804 rfs4_state_rele(sp);
6805 return;
6806 }
6807 }
6808
6809 rfs4_dbe_lock(fp->rf_dbe);
6810
6811 /*
6812 * Check to see if this file is delegated and if so, if a
6813 * recall needs to be done.
6814 */
6815 if (rfs4_check_recall(sp, access)) {
6816 rfs4_dbe_unlock(fp->rf_dbe);
6817 rfs4_dbe_unlock(sp->rs_dbe);
6818 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6819 delay(NFS4_DELEGATION_CONFLICT_DELAY);
6820 rfs4_dbe_lock(sp->rs_dbe);
6821
6822 /* if state closed while lock was dropped */
6823 if (sp->rs_closed) {
6824 if (share_a || share_d)
6825 (void) rfs4_unshare(sp);
6826 rfs4_dbe_unlock(sp->rs_dbe);
6827 rfs4_file_rele(fp);
6828 /* Not a fully formed open; "close" it */
6829 if (screate == TRUE)
6830 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6831 rfs4_state_rele(sp);
6832 resp->status = NFS4ERR_OLD_STATEID;
6833 return;
6834 }
6835
6836 rfs4_dbe_lock(fp->rf_dbe);
6837 /* Let's see if the delegation was returned */
6838 if (rfs4_check_recall(sp, access)) {
6839 rfs4_dbe_unlock(fp->rf_dbe);
6840 if (share_a || share_d)
6841 (void) rfs4_unshare(sp);
6842 rfs4_dbe_unlock(sp->rs_dbe);
6843 rfs4_file_rele(fp);
6844 rfs4_update_lease(sp->rs_owner->ro_client);
6845
6846 /* Not a fully formed open; "close" it */
6847 if (screate == TRUE)
6848 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6849 rfs4_state_rele(sp);
6850 resp->status = NFS4ERR_DELAY;
6851 return;
6852 }
6853 }
6854 /*
6855 * the share check passed and any delegation conflict has been
6856 * taken care of, now call vop_open.
6857 * if this is the first open then call vop_open with fflags.
6858 * if not, call vn_open_upgrade with just the upgrade flags.
6859 *
6860 * if the file has been opened already, it will have the current
6861 * access mode in the state struct. if it has no share access, then
6862 * this is a new open.
6863 *
6864 * However, if this is open with CLAIM_DLEGATE_CUR, then don't
6865 * call VOP_OPEN(), just do the open upgrade.
6866 */
6867 if (first_open && !deleg_cur) {
6868 ct.cc_sysid = sysid;
6869 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
6870 ct.cc_caller_id = nfs4_srv_caller_id;
6871 ct.cc_flags = CC_DONTBLOCK;
6872 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
6873 if (err) {
6874 rfs4_dbe_unlock(fp->rf_dbe);
6875 if (share_a || share_d)
6876 (void) rfs4_unshare(sp);
6877 rfs4_dbe_unlock(sp->rs_dbe);
6878 rfs4_file_rele(fp);
6879
6880 /* Not a fully formed open; "close" it */
6881 if (screate == TRUE)
6882 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6883 rfs4_state_rele(sp);
6884 /* check if a monitor detected a delegation conflict */
6885 if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
6886 resp->status = NFS4ERR_DELAY;
6887 else
6888 resp->status = NFS4ERR_SERVERFAULT;
6889 return;
6890 }
6891 } else { /* open upgrade */
6892 /*
6893 * calculate the fflags for the new mode that is being added
6894 * by this upgrade.
6895 */
6896 fflags = 0;
6897 if (open_a & OPEN4_SHARE_ACCESS_READ)
6898 fflags |= FREAD;
6899 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6900 fflags |= FWRITE;
6901 vn_open_upgrade(cs->vp, fflags);
6902 }
6903 sp->rs_open_access |= access;
6904 sp->rs_open_deny |= deny;
6905
6906 if (open_d & OPEN4_SHARE_DENY_READ)
6907 fp->rf_deny_read++;
6908 if (open_d & OPEN4_SHARE_DENY_WRITE)
6909 fp->rf_deny_write++;
6910 fp->rf_share_deny |= deny;
6911
6912 if (open_a & OPEN4_SHARE_ACCESS_READ)
6913 fp->rf_access_read++;
6914 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6915 fp->rf_access_write++;
6916 fp->rf_share_access |= access;
6917
6918 /*
6919 * Check for delegation here. if the deleg argument is not
6920 * DELEG_ANY, then this is a reclaim from a client and
6921 * we must honor the delegation requested. If necessary we can
6922 * set the recall flag.
6923 */
6924
6925 dsp = rfs4_grant_delegation(deleg, sp, &recall);
6926
6927 cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
6928
6929 next_stateid(&sp->rs_stateid);
6930
6931 resp->stateid = sp->rs_stateid.stateid;
6932
6933 rfs4_dbe_unlock(fp->rf_dbe);
6934 rfs4_dbe_unlock(sp->rs_dbe);
6935
6936 if (dsp) {
6937 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6938 rfs4_deleg_state_rele(dsp);
6939 }
6940
6941 rfs4_file_rele(fp);
6942 rfs4_state_rele(sp);
6943
6944 resp->status = NFS4_OK;
6945 }
6946
6947 /*ARGSUSED*/
6948 static void
rfs4_do_opennull(struct compound_state * cs,struct svc_req * req,OPEN4args * args,rfs4_openowner_t * oo,OPEN4res * resp)6949 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6950 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6951 {
6952 change_info4 *cinfo = &resp->cinfo;
6953 bitmap4 *attrset = &resp->attrset;
6954
6955 if (args->opentype == OPEN4_NOCREATE)
6956 resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6957 req, cs, args->share_access, cinfo);
6958 else {
6959 /* inhibit delegation grants during exclusive create */
6960
6961 if (args->mode == EXCLUSIVE4)
6962 rfs4_disable_delegation();
6963
6964 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
6965 oo->ro_client->rc_clientid);
6966 }
6967
6968 if (resp->status == NFS4_OK) {
6969
6970 /* cs->vp cs->fh now reference the desired file */
6971
6972 rfs4_do_open(cs, req, oo,
6973 oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
6974 args->share_access, args->share_deny, resp, 0);
6975
6976 /*
6977 * If rfs4_createfile set attrset, we must
6978 * clear this attrset before the response is copied.
6979 */
6980 if (resp->status != NFS4_OK && resp->attrset) {
6981 resp->attrset = 0;
6982 }
6983 }
6984 else
6985 *cs->statusp = resp->status;
6986
6987 if (args->mode == EXCLUSIVE4)
6988 rfs4_enable_delegation();
6989 }
6990
6991 /*ARGSUSED*/
6992 static void
rfs4_do_openprev(struct compound_state * cs,struct svc_req * req,OPEN4args * args,rfs4_openowner_t * oo,OPEN4res * resp)6993 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
6994 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6995 {
6996 change_info4 *cinfo = &resp->cinfo;
6997 vattr_t va;
6998 vtype_t v_type = cs->vp->v_type;
6999 int error = 0;
7000
7001 /* Verify that we have a regular file */
7002 if (v_type != VREG) {
7003 if (v_type == VDIR)
7004 resp->status = NFS4ERR_ISDIR;
7005 else if (v_type == VLNK)
7006 resp->status = NFS4ERR_SYMLINK;
7007 else
7008 resp->status = NFS4ERR_INVAL;
7009 return;
7010 }
7011
7012 va.va_mask = AT_MODE|AT_UID;
7013 error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7014 if (error) {
7015 resp->status = puterrno4(error);
7016 return;
7017 }
7018
7019 cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7020
7021 /*
7022 * Check if we have access to the file, Note the the file
7023 * could have originally been open UNCHECKED or GUARDED
7024 * with mode bits that will now fail, but there is nothing
7025 * we can really do about that except in the case that the
7026 * owner of the file is the one requesting the open.
7027 */
7028 if (crgetuid(cs->cr) != va.va_uid) {
7029 resp->status = check_open_access(args->share_access, cs, req);
7030 if (resp->status != NFS4_OK) {
7031 return;
7032 }
7033 }
7034
7035 /*
7036 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7037 */
7038 cinfo->before = 0;
7039 cinfo->after = 0;
7040 cinfo->atomic = FALSE;
7041
7042 rfs4_do_open(cs, req, oo,
7043 NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
7044 args->share_access, args->share_deny, resp, 0);
7045 }
7046
7047 static void
rfs4_do_opendelcur(struct compound_state * cs,struct svc_req * req,OPEN4args * args,rfs4_openowner_t * oo,OPEN4res * resp)7048 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7049 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7050 {
7051 int error;
7052 nfsstat4 status;
7053 stateid4 stateid =
7054 args->open_claim4_u.delegate_cur_info.delegate_stateid;
7055 rfs4_deleg_state_t *dsp;
7056
7057 /*
7058 * Find the state info from the stateid and confirm that the
7059 * file is delegated. If the state openowner is the same as
7060 * the supplied openowner we're done. If not, get the file
7061 * info from the found state info. Use that file info to
7062 * create the state for this lock owner. Note solaris doen't
7063 * really need the pathname to find the file. We may want to
7064 * lookup the pathname and make sure that the vp exist and
7065 * matches the vp in the file structure. However it is
7066 * possible that the pathname nolonger exists (local process
7067 * unlinks the file), so this may not be that useful.
7068 */
7069
7070 status = rfs4_get_deleg_state(&stateid, &dsp);
7071 if (status != NFS4_OK) {
7072 resp->status = status;
7073 return;
7074 }
7075
7076 ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7077
7078 /*
7079 * New lock owner, create state. Since this was probably called
7080 * in response to a CB_RECALL we set deleg to DELEG_NONE
7081 */
7082
7083 ASSERT(cs->vp != NULL);
7084 VN_RELE(cs->vp);
7085 VN_HOLD(dsp->rds_finfo->rf_vp);
7086 cs->vp = dsp->rds_finfo->rf_vp;
7087
7088 if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
7089 rfs4_deleg_state_rele(dsp);
7090 *cs->statusp = resp->status = puterrno4(error);
7091 return;
7092 }
7093
7094 /* Mark progress for delegation returns */
7095 dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7096 rfs4_deleg_state_rele(dsp);
7097 rfs4_do_open(cs, req, oo, DELEG_NONE,
7098 args->share_access, args->share_deny, resp, 1);
7099 }
7100
7101 /*ARGSUSED*/
7102 static void
rfs4_do_opendelprev(struct compound_state * cs,struct svc_req * req,OPEN4args * args,rfs4_openowner_t * oo,OPEN4res * resp)7103 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7104 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7105 {
7106 /*
7107 * Lookup the pathname, it must already exist since this file
7108 * was delegated.
7109 *
7110 * Find the file and state info for this vp and open owner pair.
7111 * check that they are in fact delegated.
7112 * check that the state access and deny modes are the same.
7113 *
7114 * Return the delgation possibly seting the recall flag.
7115 */
7116 rfs4_file_t *fp;
7117 rfs4_state_t *sp;
7118 bool_t create = FALSE;
7119 bool_t dcreate = FALSE;
7120 rfs4_deleg_state_t *dsp;
7121 nfsace4 *ace;
7122
7123 /* Note we ignore oflags */
7124 resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7125 req, cs, args->share_access, &resp->cinfo);
7126
7127 if (resp->status != NFS4_OK) {
7128 return;
7129 }
7130
7131 /* get the file struct and hold a lock on it during initial open */
7132 fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7133 if (fp == NULL) {
7134 resp->status = NFS4ERR_RESOURCE;
7135 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7136 return;
7137 }
7138
7139 sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7140 if (sp == NULL) {
7141 resp->status = NFS4ERR_SERVERFAULT;
7142 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7143 rw_exit(&fp->rf_file_rwlock);
7144 rfs4_file_rele(fp);
7145 return;
7146 }
7147
7148 rfs4_dbe_lock(sp->rs_dbe);
7149 rfs4_dbe_lock(fp->rf_dbe);
7150 if (args->share_access != sp->rs_share_access ||
7151 args->share_deny != sp->rs_share_deny ||
7152 sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7153 NFS4_DEBUG(rfs4_debug,
7154 (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7155 rfs4_dbe_unlock(fp->rf_dbe);
7156 rfs4_dbe_unlock(sp->rs_dbe);
7157 rfs4_file_rele(fp);
7158 rfs4_state_rele(sp);
7159 resp->status = NFS4ERR_SERVERFAULT;
7160 return;
7161 }
7162 rfs4_dbe_unlock(fp->rf_dbe);
7163 rfs4_dbe_unlock(sp->rs_dbe);
7164
7165 dsp = rfs4_finddeleg(sp, &dcreate);
7166 if (dsp == NULL) {
7167 rfs4_state_rele(sp);
7168 rfs4_file_rele(fp);
7169 resp->status = NFS4ERR_SERVERFAULT;
7170 return;
7171 }
7172
7173 next_stateid(&sp->rs_stateid);
7174
7175 resp->stateid = sp->rs_stateid.stateid;
7176
7177 resp->delegation.delegation_type = dsp->rds_dtype;
7178
7179 if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7180 open_read_delegation4 *rv =
7181 &resp->delegation.open_delegation4_u.read;
7182
7183 rv->stateid = dsp->rds_delegid.stateid;
7184 rv->recall = FALSE; /* no policy in place to set to TRUE */
7185 ace = &rv->permissions;
7186 } else {
7187 open_write_delegation4 *rv =
7188 &resp->delegation.open_delegation4_u.write;
7189
7190 rv->stateid = dsp->rds_delegid.stateid;
7191 rv->recall = FALSE; /* no policy in place to set to TRUE */
7192 ace = &rv->permissions;
7193 rv->space_limit.limitby = NFS_LIMIT_SIZE;
7194 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7195 }
7196
7197 /* XXX For now */
7198 ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7199 ace->flag = 0;
7200 ace->access_mask = 0;
7201 ace->who.utf8string_len = 0;
7202 ace->who.utf8string_val = 0;
7203
7204 rfs4_deleg_state_rele(dsp);
7205 rfs4_state_rele(sp);
7206 rfs4_file_rele(fp);
7207 }
7208
7209 typedef enum {
7210 NFS4_CHKSEQ_OKAY = 0,
7211 NFS4_CHKSEQ_REPLAY = 1,
7212 NFS4_CHKSEQ_BAD = 2
7213 } rfs4_chkseq_t;
7214
7215 /*
7216 * Generic function for sequence number checks.
7217 */
7218 static rfs4_chkseq_t
rfs4_check_seqid(seqid4 seqid,nfs_resop4 * lastop,seqid4 rqst_seq,nfs_resop4 * resop,bool_t copyres)7219 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7220 seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7221 {
7222 /* Same sequence ids and matching operations? */
7223 if (seqid == rqst_seq && resop->resop == lastop->resop) {
7224 if (copyres == TRUE) {
7225 rfs4_free_reply(resop);
7226 rfs4_copy_reply(resop, lastop);
7227 }
7228 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7229 "Replayed SEQID %d\n", seqid));
7230 return (NFS4_CHKSEQ_REPLAY);
7231 }
7232
7233 /* If the incoming sequence is not the next expected then it is bad */
7234 if (rqst_seq != seqid + 1) {
7235 if (rqst_seq == seqid) {
7236 NFS4_DEBUG(rfs4_debug,
7237 (CE_NOTE, "BAD SEQID: Replayed sequence id "
7238 "but last op was %d current op is %d\n",
7239 lastop->resop, resop->resop));
7240 return (NFS4_CHKSEQ_BAD);
7241 }
7242 NFS4_DEBUG(rfs4_debug,
7243 (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7244 rqst_seq, seqid));
7245 return (NFS4_CHKSEQ_BAD);
7246 }
7247
7248 /* Everything okay -- next expected */
7249 return (NFS4_CHKSEQ_OKAY);
7250 }
7251
7252
7253 static rfs4_chkseq_t
rfs4_check_open_seqid(seqid4 seqid,rfs4_openowner_t * op,nfs_resop4 * resop)7254 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7255 {
7256 rfs4_chkseq_t rc;
7257
7258 rfs4_dbe_lock(op->ro_dbe);
7259 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7260 TRUE);
7261 rfs4_dbe_unlock(op->ro_dbe);
7262
7263 if (rc == NFS4_CHKSEQ_OKAY)
7264 rfs4_update_lease(op->ro_client);
7265
7266 return (rc);
7267 }
7268
7269 static rfs4_chkseq_t
rfs4_check_olo_seqid(seqid4 olo_seqid,rfs4_openowner_t * op,nfs_resop4 * resop)7270 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7271 {
7272 rfs4_chkseq_t rc;
7273
7274 rfs4_dbe_lock(op->ro_dbe);
7275 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7276 olo_seqid, resop, FALSE);
7277 rfs4_dbe_unlock(op->ro_dbe);
7278
7279 return (rc);
7280 }
7281
7282 static rfs4_chkseq_t
rfs4_check_lock_seqid(seqid4 seqid,rfs4_lo_state_t * lsp,nfs_resop4 * resop)7283 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7284 {
7285 rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7286
7287 rfs4_dbe_lock(lsp->rls_dbe);
7288 if (!lsp->rls_skip_seqid_check)
7289 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7290 resop, TRUE);
7291 rfs4_dbe_unlock(lsp->rls_dbe);
7292
7293 return (rc);
7294 }
7295
7296 static void
rfs4_op_open(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)7297 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7298 struct svc_req *req, struct compound_state *cs)
7299 {
7300 OPEN4args *args = &argop->nfs_argop4_u.opopen;
7301 OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7302 open_owner4 *owner = &args->owner;
7303 open_claim_type4 claim = args->claim;
7304 rfs4_client_t *cp;
7305 rfs4_openowner_t *oo;
7306 bool_t create;
7307 bool_t replay = FALSE;
7308 int can_reclaim;
7309
7310 DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7311 OPEN4args *, args);
7312
7313 if (cs->vp == NULL) {
7314 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7315 goto end;
7316 }
7317
7318 /*
7319 * Need to check clientid and lease expiration first based on
7320 * error ordering and incrementing sequence id.
7321 */
7322 cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7323 if (cp == NULL) {
7324 *cs->statusp = resp->status =
7325 rfs4_check_clientid(&owner->clientid, 0);
7326 goto end;
7327 }
7328
7329 if (rfs4_lease_expired(cp)) {
7330 rfs4_client_close(cp);
7331 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7332 goto end;
7333 }
7334 can_reclaim = cp->rc_can_reclaim;
7335
7336 /*
7337 * Find the open_owner for use from this point forward. Take
7338 * care in updating the sequence id based on the type of error
7339 * being returned.
7340 */
7341 retry:
7342 create = TRUE;
7343 oo = rfs4_findopenowner(owner, &create, args->seqid);
7344 if (oo == NULL) {
7345 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
7346 rfs4_client_rele(cp);
7347 goto end;
7348 }
7349
7350 /* Hold off access to the sequence space while the open is done */
7351 rfs4_sw_enter(&oo->ro_sw);
7352
7353 /*
7354 * If the open_owner existed before at the server, then check
7355 * the sequence id.
7356 */
7357 if (!create && !oo->ro_postpone_confirm) {
7358 switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7359 case NFS4_CHKSEQ_BAD:
7360 if ((args->seqid > oo->ro_open_seqid) &&
7361 oo->ro_need_confirm) {
7362 rfs4_free_opens(oo, TRUE, FALSE);
7363 rfs4_sw_exit(&oo->ro_sw);
7364 rfs4_openowner_rele(oo);
7365 goto retry;
7366 }
7367 resp->status = NFS4ERR_BAD_SEQID;
7368 goto out;
7369 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7370 replay = TRUE;
7371 goto out;
7372 default:
7373 break;
7374 }
7375
7376 /*
7377 * Sequence was ok and open owner exists
7378 * check to see if we have yet to see an
7379 * open_confirm.
7380 */
7381 if (oo->ro_need_confirm) {
7382 rfs4_free_opens(oo, TRUE, FALSE);
7383 rfs4_sw_exit(&oo->ro_sw);
7384 rfs4_openowner_rele(oo);
7385 goto retry;
7386 }
7387 }
7388 /* Grace only applies to regular-type OPENs */
7389 if (rfs4_clnt_in_grace(cp) &&
7390 (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7391 *cs->statusp = resp->status = NFS4ERR_GRACE;
7392 goto out;
7393 }
7394
7395 /*
7396 * If previous state at the server existed then can_reclaim
7397 * will be set. If not reply NFS4ERR_NO_GRACE to the
7398 * client.
7399 */
7400 if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7401 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7402 goto out;
7403 }
7404
7405
7406 /*
7407 * Reject the open if the client has missed the grace period
7408 */
7409 if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7410 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7411 goto out;
7412 }
7413
7414 /* Couple of up-front bookkeeping items */
7415 if (oo->ro_need_confirm) {
7416 /*
7417 * If this is a reclaim OPEN then we should not ask
7418 * for a confirmation of the open_owner per the
7419 * protocol specification.
7420 */
7421 if (claim == CLAIM_PREVIOUS)
7422 oo->ro_need_confirm = FALSE;
7423 else
7424 resp->rflags |= OPEN4_RESULT_CONFIRM;
7425 }
7426 resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7427
7428 /*
7429 * If there is an unshared filesystem mounted on this vnode,
7430 * do not allow to open/create in this directory.
7431 */
7432 if (vn_ismntpt(cs->vp)) {
7433 *cs->statusp = resp->status = NFS4ERR_ACCESS;
7434 goto out;
7435 }
7436
7437 /*
7438 * access must READ, WRITE, or BOTH. No access is invalid.
7439 * deny can be READ, WRITE, BOTH, or NONE.
7440 * bits not defined for access/deny are invalid.
7441 */
7442 if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7443 (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7444 (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7445 *cs->statusp = resp->status = NFS4ERR_INVAL;
7446 goto out;
7447 }
7448
7449
7450 /*
7451 * make sure attrset is zero before response is built.
7452 */
7453 resp->attrset = 0;
7454
7455 switch (claim) {
7456 case CLAIM_NULL:
7457 rfs4_do_opennull(cs, req, args, oo, resp);
7458 break;
7459 case CLAIM_PREVIOUS:
7460 rfs4_do_openprev(cs, req, args, oo, resp);
7461 break;
7462 case CLAIM_DELEGATE_CUR:
7463 rfs4_do_opendelcur(cs, req, args, oo, resp);
7464 break;
7465 case CLAIM_DELEGATE_PREV:
7466 rfs4_do_opendelprev(cs, req, args, oo, resp);
7467 break;
7468 default:
7469 resp->status = NFS4ERR_INVAL;
7470 break;
7471 }
7472
7473 out:
7474 rfs4_client_rele(cp);
7475
7476 /* Catch sequence id handling here to make it a little easier */
7477 switch (resp->status) {
7478 case NFS4ERR_BADXDR:
7479 case NFS4ERR_BAD_SEQID:
7480 case NFS4ERR_BAD_STATEID:
7481 case NFS4ERR_NOFILEHANDLE:
7482 case NFS4ERR_RESOURCE:
7483 case NFS4ERR_STALE_CLIENTID:
7484 case NFS4ERR_STALE_STATEID:
7485 /*
7486 * The protocol states that if any of these errors are
7487 * being returned, the sequence id should not be
7488 * incremented. Any other return requires an
7489 * increment.
7490 */
7491 break;
7492 default:
7493 /* Always update the lease in this case */
7494 rfs4_update_lease(oo->ro_client);
7495
7496 /* Regular response - copy the result */
7497 if (!replay)
7498 rfs4_update_open_resp(oo, resop, &cs->fh);
7499
7500 /*
7501 * REPLAY case: Only if the previous response was OK
7502 * do we copy the filehandle. If not OK, no
7503 * filehandle to copy.
7504 */
7505 if (replay == TRUE &&
7506 resp->status == NFS4_OK &&
7507 oo->ro_reply_fh.nfs_fh4_val) {
7508 /*
7509 * If this is a replay, we must restore the
7510 * current filehandle/vp to that of what was
7511 * returned originally. Try our best to do
7512 * it.
7513 */
7514 nfs_fh4_fmt_t *fh_fmtp =
7515 (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7516
7517 cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7518 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7519
7520 if (cs->exi == NULL) {
7521 resp->status = NFS4ERR_STALE;
7522 goto finish;
7523 }
7524
7525 VN_RELE(cs->vp);
7526
7527 cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7528 &resp->status);
7529
7530 if (cs->vp == NULL)
7531 goto finish;
7532
7533 nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7534 }
7535
7536 /*
7537 * If this was a replay, no need to update the
7538 * sequence id. If the open_owner was not created on
7539 * this pass, then update. The first use of an
7540 * open_owner will not bump the sequence id.
7541 */
7542 if (replay == FALSE && !create)
7543 rfs4_update_open_sequence(oo);
7544 /*
7545 * If the client is receiving an error and the
7546 * open_owner needs to be confirmed, there is no way
7547 * to notify the client of this fact ignoring the fact
7548 * that the server has no method of returning a
7549 * stateid to confirm. Therefore, the server needs to
7550 * mark this open_owner in a way as to avoid the
7551 * sequence id checking the next time the client uses
7552 * this open_owner.
7553 */
7554 if (resp->status != NFS4_OK && oo->ro_need_confirm)
7555 oo->ro_postpone_confirm = TRUE;
7556 /*
7557 * If OK response then clear the postpone flag and
7558 * reset the sequence id to keep in sync with the
7559 * client.
7560 */
7561 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7562 oo->ro_postpone_confirm = FALSE;
7563 oo->ro_open_seqid = args->seqid;
7564 }
7565 break;
7566 }
7567
7568 finish:
7569 *cs->statusp = resp->status;
7570
7571 rfs4_sw_exit(&oo->ro_sw);
7572 rfs4_openowner_rele(oo);
7573
7574 end:
7575 DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7576 OPEN4res *, resp);
7577 }
7578
7579 /*ARGSUSED*/
7580 void
rfs4_op_open_confirm(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)7581 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7582 struct svc_req *req, struct compound_state *cs)
7583 {
7584 OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7585 OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7586 rfs4_state_t *sp;
7587 nfsstat4 status;
7588
7589 DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7590 OPEN_CONFIRM4args *, args);
7591
7592 if (cs->vp == NULL) {
7593 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7594 goto out;
7595 }
7596
7597 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7598 if (status != NFS4_OK) {
7599 *cs->statusp = resp->status = status;
7600 goto out;
7601 }
7602
7603 /* Ensure specified filehandle matches */
7604 if (cs->vp != sp->rs_finfo->rf_vp) {
7605 rfs4_state_rele(sp);
7606 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7607 goto out;
7608 }
7609
7610 /* hold off other access to open_owner while we tinker */
7611 rfs4_sw_enter(&sp->rs_owner->ro_sw);
7612
7613 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7614 case NFS4_CHECK_STATEID_OKAY:
7615 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7616 resop) != 0) {
7617 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7618 break;
7619 }
7620 /*
7621 * If it is the appropriate stateid and determined to
7622 * be "OKAY" then this means that the stateid does not
7623 * need to be confirmed and the client is in error for
7624 * sending an OPEN_CONFIRM.
7625 */
7626 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7627 break;
7628 case NFS4_CHECK_STATEID_OLD:
7629 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7630 break;
7631 case NFS4_CHECK_STATEID_BAD:
7632 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7633 break;
7634 case NFS4_CHECK_STATEID_EXPIRED:
7635 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7636 break;
7637 case NFS4_CHECK_STATEID_CLOSED:
7638 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7639 break;
7640 case NFS4_CHECK_STATEID_REPLAY:
7641 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7642 resop)) {
7643 case NFS4_CHKSEQ_OKAY:
7644 /*
7645 * This is replayed stateid; if seqid matches
7646 * next expected, then client is using wrong seqid.
7647 */
7648 /* fall through */
7649 case NFS4_CHKSEQ_BAD:
7650 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7651 break;
7652 case NFS4_CHKSEQ_REPLAY:
7653 /*
7654 * Note this case is the duplicate case so
7655 * resp->status is already set.
7656 */
7657 *cs->statusp = resp->status;
7658 rfs4_update_lease(sp->rs_owner->ro_client);
7659 break;
7660 }
7661 break;
7662 case NFS4_CHECK_STATEID_UNCONFIRMED:
7663 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7664 resop) != NFS4_CHKSEQ_OKAY) {
7665 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7666 break;
7667 }
7668 *cs->statusp = resp->status = NFS4_OK;
7669
7670 next_stateid(&sp->rs_stateid);
7671 resp->open_stateid = sp->rs_stateid.stateid;
7672 sp->rs_owner->ro_need_confirm = FALSE;
7673 rfs4_update_lease(sp->rs_owner->ro_client);
7674 rfs4_update_open_sequence(sp->rs_owner);
7675 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7676 break;
7677 default:
7678 ASSERT(FALSE);
7679 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7680 break;
7681 }
7682 rfs4_sw_exit(&sp->rs_owner->ro_sw);
7683 rfs4_state_rele(sp);
7684
7685 out:
7686 DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7687 OPEN_CONFIRM4res *, resp);
7688 }
7689
7690 /*ARGSUSED*/
7691 void
rfs4_op_open_downgrade(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)7692 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7693 struct svc_req *req, struct compound_state *cs)
7694 {
7695 OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7696 OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7697 uint32_t access = args->share_access;
7698 uint32_t deny = args->share_deny;
7699 nfsstat4 status;
7700 rfs4_state_t *sp;
7701 rfs4_file_t *fp;
7702 int fflags = 0;
7703
7704 DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7705 OPEN_DOWNGRADE4args *, args);
7706
7707 if (cs->vp == NULL) {
7708 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7709 goto out;
7710 }
7711
7712 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7713 if (status != NFS4_OK) {
7714 *cs->statusp = resp->status = status;
7715 goto out;
7716 }
7717
7718 /* Ensure specified filehandle matches */
7719 if (cs->vp != sp->rs_finfo->rf_vp) {
7720 rfs4_state_rele(sp);
7721 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7722 goto out;
7723 }
7724
7725 /* hold off other access to open_owner while we tinker */
7726 rfs4_sw_enter(&sp->rs_owner->ro_sw);
7727
7728 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7729 case NFS4_CHECK_STATEID_OKAY:
7730 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7731 resop) != NFS4_CHKSEQ_OKAY) {
7732 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7733 goto end;
7734 }
7735 break;
7736 case NFS4_CHECK_STATEID_OLD:
7737 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7738 goto end;
7739 case NFS4_CHECK_STATEID_BAD:
7740 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7741 goto end;
7742 case NFS4_CHECK_STATEID_EXPIRED:
7743 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7744 goto end;
7745 case NFS4_CHECK_STATEID_CLOSED:
7746 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7747 goto end;
7748 case NFS4_CHECK_STATEID_UNCONFIRMED:
7749 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7750 goto end;
7751 case NFS4_CHECK_STATEID_REPLAY:
7752 /* Check the sequence id for the open owner */
7753 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7754 resop)) {
7755 case NFS4_CHKSEQ_OKAY:
7756 /*
7757 * This is replayed stateid; if seqid matches
7758 * next expected, then client is using wrong seqid.
7759 */
7760 /* fall through */
7761 case NFS4_CHKSEQ_BAD:
7762 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7763 goto end;
7764 case NFS4_CHKSEQ_REPLAY:
7765 /*
7766 * Note this case is the duplicate case so
7767 * resp->status is already set.
7768 */
7769 *cs->statusp = resp->status;
7770 rfs4_update_lease(sp->rs_owner->ro_client);
7771 goto end;
7772 }
7773 break;
7774 default:
7775 ASSERT(FALSE);
7776 break;
7777 }
7778
7779 rfs4_dbe_lock(sp->rs_dbe);
7780 /*
7781 * Check that the new access modes and deny modes are valid.
7782 * Check that no invalid bits are set.
7783 */
7784 if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7785 (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7786 *cs->statusp = resp->status = NFS4ERR_INVAL;
7787 rfs4_update_open_sequence(sp->rs_owner);
7788 rfs4_dbe_unlock(sp->rs_dbe);
7789 goto end;
7790 }
7791
7792 /*
7793 * The new modes must be a subset of the current modes and
7794 * the access must specify at least one mode. To test that
7795 * the new mode is a subset of the current modes we bitwise
7796 * AND them together and check that the result equals the new
7797 * mode. For example:
7798 * New mode, access == R and current mode, sp->rs_open_access == RW
7799 * access & sp->rs_open_access == R == access, so the new access mode
7800 * is valid. Consider access == RW, sp->rs_open_access = R
7801 * access & sp->rs_open_access == R != access, so the new access mode
7802 * is invalid.
7803 */
7804 if ((access & sp->rs_open_access) != access ||
7805 (deny & sp->rs_open_deny) != deny ||
7806 (access &
7807 (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7808 *cs->statusp = resp->status = NFS4ERR_INVAL;
7809 rfs4_update_open_sequence(sp->rs_owner);
7810 rfs4_dbe_unlock(sp->rs_dbe);
7811 goto end;
7812 }
7813
7814 /*
7815 * Release any share locks associated with this stateID.
7816 * Strictly speaking, this violates the spec because the
7817 * spec effectively requires that open downgrade be atomic.
7818 * At present, fs_shrlock does not have this capability.
7819 */
7820 (void) rfs4_unshare(sp);
7821
7822 status = rfs4_share(sp, access, deny);
7823 if (status != NFS4_OK) {
7824 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7825 rfs4_update_open_sequence(sp->rs_owner);
7826 rfs4_dbe_unlock(sp->rs_dbe);
7827 goto end;
7828 }
7829
7830 fp = sp->rs_finfo;
7831 rfs4_dbe_lock(fp->rf_dbe);
7832
7833 /*
7834 * If the current mode has deny read and the new mode
7835 * does not, decrement the number of deny read mode bits
7836 * and if it goes to zero turn off the deny read bit
7837 * on the file.
7838 */
7839 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
7840 (deny & OPEN4_SHARE_DENY_READ) == 0) {
7841 fp->rf_deny_read--;
7842 if (fp->rf_deny_read == 0)
7843 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
7844 }
7845
7846 /*
7847 * If the current mode has deny write and the new mode
7848 * does not, decrement the number of deny write mode bits
7849 * and if it goes to zero turn off the deny write bit
7850 * on the file.
7851 */
7852 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
7853 (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
7854 fp->rf_deny_write--;
7855 if (fp->rf_deny_write == 0)
7856 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7857 }
7858
7859 /*
7860 * If the current mode has access read and the new mode
7861 * does not, decrement the number of access read mode bits
7862 * and if it goes to zero turn off the access read bit
7863 * on the file. set fflags to FREAD for the call to
7864 * vn_open_downgrade().
7865 */
7866 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
7867 (access & OPEN4_SHARE_ACCESS_READ) == 0) {
7868 fp->rf_access_read--;
7869 if (fp->rf_access_read == 0)
7870 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
7871 fflags |= FREAD;
7872 }
7873
7874 /*
7875 * If the current mode has access write and the new mode
7876 * does not, decrement the number of access write mode bits
7877 * and if it goes to zero turn off the access write bit
7878 * on the file. set fflags to FWRITE for the call to
7879 * vn_open_downgrade().
7880 */
7881 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
7882 (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7883 fp->rf_access_write--;
7884 if (fp->rf_access_write == 0)
7885 fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
7886 fflags |= FWRITE;
7887 }
7888
7889 /* Check that the file is still accessible */
7890 ASSERT(fp->rf_share_access);
7891
7892 rfs4_dbe_unlock(fp->rf_dbe);
7893
7894 /* now set the new open access and deny modes */
7895 sp->rs_open_access = access;
7896 sp->rs_open_deny = deny;
7897
7898 /*
7899 * we successfully downgraded the share lock, now we need to downgrade
7900 * the open. it is possible that the downgrade was only for a deny
7901 * mode and we have nothing else to do.
7902 */
7903 if ((fflags & (FREAD|FWRITE)) != 0)
7904 vn_open_downgrade(cs->vp, fflags);
7905
7906 /* Update the stateid */
7907 next_stateid(&sp->rs_stateid);
7908 resp->open_stateid = sp->rs_stateid.stateid;
7909
7910 rfs4_dbe_unlock(sp->rs_dbe);
7911
7912 *cs->statusp = resp->status = NFS4_OK;
7913 /* Update the lease */
7914 rfs4_update_lease(sp->rs_owner->ro_client);
7915 /* And the sequence */
7916 rfs4_update_open_sequence(sp->rs_owner);
7917 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7918
7919 end:
7920 rfs4_sw_exit(&sp->rs_owner->ro_sw);
7921 rfs4_state_rele(sp);
7922 out:
7923 DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
7924 OPEN_DOWNGRADE4res *, resp);
7925 }
7926
7927 /*
7928 * The logic behind this function is detailed in the NFSv4 RFC in the
7929 * SETCLIENTID operation description under IMPLEMENTATION. Refer to
7930 * that section for explicit guidance to server behavior for
7931 * SETCLIENTID.
7932 */
7933 void
rfs4_op_setclientid(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)7934 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7935 struct svc_req *req, struct compound_state *cs)
7936 {
7937 SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
7938 SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
7939 rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
7940 rfs4_clntip_t *ci;
7941 bool_t create;
7942 char *addr, *netid;
7943 int len;
7944
7945 DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
7946 SETCLIENTID4args *, args);
7947 retry:
7948 newcp = cp_confirmed = cp_unconfirmed = NULL;
7949
7950 /*
7951 * Save the caller's IP address
7952 */
7953 args->client.cl_addr =
7954 (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
7955
7956 /*
7957 * Record if it is a Solaris client that cannot handle referrals.
7958 */
7959 if (strstr(args->client.id_val, "Solaris") &&
7960 !strstr(args->client.id_val, "+referrals")) {
7961 /* Add a "yes, it's downrev" record */
7962 create = TRUE;
7963 ci = rfs4_find_clntip(args->client.cl_addr, &create);
7964 ASSERT(ci != NULL);
7965 rfs4_dbe_rele(ci->ri_dbe);
7966 } else {
7967 /* Remove any previous record */
7968 rfs4_invalidate_clntip(args->client.cl_addr);
7969 }
7970
7971 /*
7972 * In search of an EXISTING client matching the incoming
7973 * request to establish a new client identifier at the server
7974 */
7975 create = TRUE;
7976 cp = rfs4_findclient(&args->client, &create, NULL);
7977
7978 /* Should never happen */
7979 ASSERT(cp != NULL);
7980
7981 if (cp == NULL) {
7982 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
7983 goto out;
7984 }
7985
7986 /*
7987 * Easiest case. Client identifier is newly created and is
7988 * unconfirmed. Also note that for this case, no other
7989 * entries exist for the client identifier. Nothing else to
7990 * check. Just setup the response and respond.
7991 */
7992 if (create) {
7993 *cs->statusp = res->status = NFS4_OK;
7994 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
7995 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
7996 cp->rc_confirm_verf;
7997 /* Setup callback information; CB_NULL confirmation later */
7998 rfs4_client_setcb(cp, &args->callback, args->callback_ident);
7999
8000 rfs4_client_rele(cp);
8001 goto out;
8002 }
8003
8004 /*
8005 * An existing, confirmed client may exist but it may not have
8006 * been active for at least one lease period. If so, then
8007 * "close" the client and create a new client identifier
8008 */
8009 if (rfs4_lease_expired(cp)) {
8010 rfs4_client_close(cp);
8011 goto retry;
8012 }
8013
8014 if (cp->rc_need_confirm == TRUE)
8015 cp_unconfirmed = cp;
8016 else
8017 cp_confirmed = cp;
8018
8019 cp = NULL;
8020
8021 /*
8022 * We have a confirmed client, now check for an
8023 * unconfimred entry
8024 */
8025 if (cp_confirmed) {
8026 /* If creds don't match then client identifier is inuse */
8027 if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
8028 rfs4_cbinfo_t *cbp;
8029 /*
8030 * Some one else has established this client
8031 * id. Try and say * who they are. We will use
8032 * the call back address supplied by * the
8033 * first client.
8034 */
8035 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8036
8037 addr = netid = NULL;
8038
8039 cbp = &cp_confirmed->rc_cbinfo;
8040 if (cbp->cb_callback.cb_location.r_addr &&
8041 cbp->cb_callback.cb_location.r_netid) {
8042 cb_client4 *cbcp = &cbp->cb_callback;
8043
8044 len = strlen(cbcp->cb_location.r_addr)+1;
8045 addr = kmem_alloc(len, KM_SLEEP);
8046 bcopy(cbcp->cb_location.r_addr, addr, len);
8047 len = strlen(cbcp->cb_location.r_netid)+1;
8048 netid = kmem_alloc(len, KM_SLEEP);
8049 bcopy(cbcp->cb_location.r_netid, netid, len);
8050 }
8051
8052 res->SETCLIENTID4res_u.client_using.r_addr = addr;
8053 res->SETCLIENTID4res_u.client_using.r_netid = netid;
8054
8055 rfs4_client_rele(cp_confirmed);
8056 }
8057
8058 /*
8059 * Confirmed, creds match, and verifier matches; must
8060 * be an update of the callback info
8061 */
8062 if (cp_confirmed->rc_nfs_client.verifier ==
8063 args->client.verifier) {
8064 /* Setup callback information */
8065 rfs4_client_setcb(cp_confirmed, &args->callback,
8066 args->callback_ident);
8067
8068 /* everything okay -- move ahead */
8069 *cs->statusp = res->status = NFS4_OK;
8070 res->SETCLIENTID4res_u.resok4.clientid =
8071 cp_confirmed->rc_clientid;
8072
8073 /* update the confirm_verifier and return it */
8074 rfs4_client_scv_next(cp_confirmed);
8075 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8076 cp_confirmed->rc_confirm_verf;
8077
8078 rfs4_client_rele(cp_confirmed);
8079 goto out;
8080 }
8081
8082 /*
8083 * Creds match but the verifier doesn't. Must search
8084 * for an unconfirmed client that would be replaced by
8085 * this request.
8086 */
8087 create = FALSE;
8088 cp_unconfirmed = rfs4_findclient(&args->client, &create,
8089 cp_confirmed);
8090 }
8091
8092 /*
8093 * At this point, we have taken care of the brand new client
8094 * struct, INUSE case, update of an existing, and confirmed
8095 * client struct.
8096 */
8097
8098 /*
8099 * check to see if things have changed while we originally
8100 * picked up the client struct. If they have, then return and
8101 * retry the processing of this SETCLIENTID request.
8102 */
8103 if (cp_unconfirmed) {
8104 rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8105 if (!cp_unconfirmed->rc_need_confirm) {
8106 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8107 rfs4_client_rele(cp_unconfirmed);
8108 if (cp_confirmed)
8109 rfs4_client_rele(cp_confirmed);
8110 goto retry;
8111 }
8112 /* do away with the old unconfirmed one */
8113 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8114 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8115 rfs4_client_rele(cp_unconfirmed);
8116 cp_unconfirmed = NULL;
8117 }
8118
8119 /*
8120 * This search will temporarily hide the confirmed client
8121 * struct while a new client struct is created as the
8122 * unconfirmed one.
8123 */
8124 create = TRUE;
8125 newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8126
8127 ASSERT(newcp != NULL);
8128
8129 if (newcp == NULL) {
8130 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8131 rfs4_client_rele(cp_confirmed);
8132 goto out;
8133 }
8134
8135 /*
8136 * If one was not created, then a similar request must be in
8137 * process so release and start over with this one
8138 */
8139 if (create != TRUE) {
8140 rfs4_client_rele(newcp);
8141 if (cp_confirmed)
8142 rfs4_client_rele(cp_confirmed);
8143 goto retry;
8144 }
8145
8146 *cs->statusp = res->status = NFS4_OK;
8147 res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8148 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8149 newcp->rc_confirm_verf;
8150 /* Setup callback information; CB_NULL confirmation later */
8151 rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8152
8153 newcp->rc_cp_confirmed = cp_confirmed;
8154
8155 rfs4_client_rele(newcp);
8156
8157 out:
8158 DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8159 SETCLIENTID4res *, res);
8160 }
8161
8162 /*ARGSUSED*/
8163 void
rfs4_op_setclientid_confirm(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)8164 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8165 struct svc_req *req, struct compound_state *cs)
8166 {
8167 SETCLIENTID_CONFIRM4args *args =
8168 &argop->nfs_argop4_u.opsetclientid_confirm;
8169 SETCLIENTID_CONFIRM4res *res =
8170 &resop->nfs_resop4_u.opsetclientid_confirm;
8171 rfs4_client_t *cp, *cptoclose = NULL;
8172
8173 DTRACE_NFSV4_2(op__setclientid__confirm__start,
8174 struct compound_state *, cs,
8175 SETCLIENTID_CONFIRM4args *, args);
8176
8177 *cs->statusp = res->status = NFS4_OK;
8178
8179 cp = rfs4_findclient_by_id(args->clientid, TRUE);
8180
8181 if (cp == NULL) {
8182 *cs->statusp = res->status =
8183 rfs4_check_clientid(&args->clientid, 1);
8184 goto out;
8185 }
8186
8187 if (!creds_ok(cp, req, cs)) {
8188 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8189 rfs4_client_rele(cp);
8190 goto out;
8191 }
8192
8193 /* If the verifier doesn't match, the record doesn't match */
8194 if (cp->rc_confirm_verf != args->setclientid_confirm) {
8195 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8196 rfs4_client_rele(cp);
8197 goto out;
8198 }
8199
8200 rfs4_dbe_lock(cp->rc_dbe);
8201 cp->rc_need_confirm = FALSE;
8202 if (cp->rc_cp_confirmed) {
8203 cptoclose = cp->rc_cp_confirmed;
8204 cptoclose->rc_ss_remove = 1;
8205 cp->rc_cp_confirmed = NULL;
8206 }
8207
8208 /*
8209 * Update the client's associated server instance, if it's changed
8210 * since the client was created.
8211 */
8212 if (rfs4_servinst(cp) != rfs4_cur_servinst)
8213 rfs4_servinst_assign(cp, rfs4_cur_servinst);
8214
8215 /*
8216 * Record clientid in stable storage.
8217 * Must be done after server instance has been assigned.
8218 */
8219 rfs4_ss_clid(cp);
8220
8221 rfs4_dbe_unlock(cp->rc_dbe);
8222
8223 if (cptoclose)
8224 /* don't need to rele, client_close does it */
8225 rfs4_client_close(cptoclose);
8226
8227 /* If needed, initiate CB_NULL call for callback path */
8228 rfs4_deleg_cb_check(cp);
8229 rfs4_update_lease(cp);
8230
8231 /*
8232 * Check to see if client can perform reclaims
8233 */
8234 rfs4_ss_chkclid(cp);
8235
8236 rfs4_client_rele(cp);
8237
8238 out:
8239 DTRACE_NFSV4_2(op__setclientid__confirm__done,
8240 struct compound_state *, cs,
8241 SETCLIENTID_CONFIRM4 *, res);
8242 }
8243
8244
8245 /*ARGSUSED*/
8246 void
rfs4_op_close(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)8247 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8248 struct svc_req *req, struct compound_state *cs)
8249 {
8250 CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8251 CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8252 rfs4_state_t *sp;
8253 nfsstat4 status;
8254
8255 DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8256 CLOSE4args *, args);
8257
8258 if (cs->vp == NULL) {
8259 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8260 goto out;
8261 }
8262
8263 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8264 if (status != NFS4_OK) {
8265 *cs->statusp = resp->status = status;
8266 goto out;
8267 }
8268
8269 /* Ensure specified filehandle matches */
8270 if (cs->vp != sp->rs_finfo->rf_vp) {
8271 rfs4_state_rele(sp);
8272 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8273 goto out;
8274 }
8275
8276 /* hold off other access to open_owner while we tinker */
8277 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8278
8279 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8280 case NFS4_CHECK_STATEID_OKAY:
8281 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8282 resop) != NFS4_CHKSEQ_OKAY) {
8283 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8284 goto end;
8285 }
8286 break;
8287 case NFS4_CHECK_STATEID_OLD:
8288 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8289 goto end;
8290 case NFS4_CHECK_STATEID_BAD:
8291 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8292 goto end;
8293 case NFS4_CHECK_STATEID_EXPIRED:
8294 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8295 goto end;
8296 case NFS4_CHECK_STATEID_CLOSED:
8297 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8298 goto end;
8299 case NFS4_CHECK_STATEID_UNCONFIRMED:
8300 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8301 goto end;
8302 case NFS4_CHECK_STATEID_REPLAY:
8303 /* Check the sequence id for the open owner */
8304 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8305 resop)) {
8306 case NFS4_CHKSEQ_OKAY:
8307 /*
8308 * This is replayed stateid; if seqid matches
8309 * next expected, then client is using wrong seqid.
8310 */
8311 /* FALL THROUGH */
8312 case NFS4_CHKSEQ_BAD:
8313 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8314 goto end;
8315 case NFS4_CHKSEQ_REPLAY:
8316 /*
8317 * Note this case is the duplicate case so
8318 * resp->status is already set.
8319 */
8320 *cs->statusp = resp->status;
8321 rfs4_update_lease(sp->rs_owner->ro_client);
8322 goto end;
8323 }
8324 break;
8325 default:
8326 ASSERT(FALSE);
8327 break;
8328 }
8329
8330 rfs4_dbe_lock(sp->rs_dbe);
8331
8332 /* Update the stateid. */
8333 next_stateid(&sp->rs_stateid);
8334 resp->open_stateid = sp->rs_stateid.stateid;
8335
8336 rfs4_dbe_unlock(sp->rs_dbe);
8337
8338 rfs4_update_lease(sp->rs_owner->ro_client);
8339 rfs4_update_open_sequence(sp->rs_owner);
8340 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8341
8342 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8343
8344 *cs->statusp = resp->status = status;
8345
8346 end:
8347 rfs4_sw_exit(&sp->rs_owner->ro_sw);
8348 rfs4_state_rele(sp);
8349 out:
8350 DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8351 CLOSE4res *, resp);
8352 }
8353
8354 /*
8355 * Manage the counts on the file struct and close all file locks
8356 */
8357 /*ARGSUSED*/
8358 void
rfs4_release_share_lock_state(rfs4_state_t * sp,cred_t * cr,bool_t close_of_client)8359 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8360 bool_t close_of_client)
8361 {
8362 rfs4_file_t *fp = sp->rs_finfo;
8363 rfs4_lo_state_t *lsp;
8364 int fflags = 0;
8365
8366 /*
8367 * If this call is part of the larger closing down of client
8368 * state then it is just easier to release all locks
8369 * associated with this client instead of going through each
8370 * individual file and cleaning locks there.
8371 */
8372 if (close_of_client) {
8373 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8374 !list_is_empty(&sp->rs_lostatelist) &&
8375 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8376 /* Is the PxFS kernel module loaded? */
8377 if (lm_remove_file_locks != NULL) {
8378 int new_sysid;
8379
8380 /* Encode the cluster nodeid in new sysid */
8381 new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8382 lm_set_nlmid_flk(&new_sysid);
8383
8384 /*
8385 * This PxFS routine removes file locks for a
8386 * client over all nodes of a cluster.
8387 */
8388 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8389 "lm_remove_file_locks(sysid=0x%x)\n",
8390 new_sysid));
8391 (*lm_remove_file_locks)(new_sysid);
8392 } else {
8393 struct flock64 flk;
8394
8395 /* Release all locks for this client */
8396 flk.l_type = F_UNLKSYS;
8397 flk.l_whence = 0;
8398 flk.l_start = 0;
8399 flk.l_len = 0;
8400 flk.l_sysid =
8401 sp->rs_owner->ro_client->rc_sysidt;
8402 flk.l_pid = 0;
8403 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8404 &flk, F_REMOTELOCK | FREAD | FWRITE,
8405 (u_offset_t)0, NULL, CRED(), NULL);
8406 }
8407
8408 sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8409 }
8410 }
8411
8412 /*
8413 * Release all locks on this file by this lock owner or at
8414 * least mark the locks as having been released
8415 */
8416 for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8417 lsp = list_next(&sp->rs_lostatelist, lsp)) {
8418 lsp->rls_locks_cleaned = TRUE;
8419
8420 /* Was this already taken care of above? */
8421 if (!close_of_client &&
8422 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8423 (void) cleanlocks(sp->rs_finfo->rf_vp,
8424 lsp->rls_locker->rl_pid,
8425 lsp->rls_locker->rl_client->rc_sysidt);
8426 }
8427
8428 /*
8429 * Release any shrlocks associated with this open state ID.
8430 * This must be done before the rfs4_state gets marked closed.
8431 */
8432 if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8433 (void) rfs4_unshare(sp);
8434
8435 if (sp->rs_open_access) {
8436 rfs4_dbe_lock(fp->rf_dbe);
8437
8438 /*
8439 * Decrement the count for each access and deny bit that this
8440 * state has contributed to the file.
8441 * If the file counts go to zero
8442 * clear the appropriate bit in the appropriate mask.
8443 */
8444 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8445 fp->rf_access_read--;
8446 fflags |= FREAD;
8447 if (fp->rf_access_read == 0)
8448 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8449 }
8450 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8451 fp->rf_access_write--;
8452 fflags |= FWRITE;
8453 if (fp->rf_access_write == 0)
8454 fp->rf_share_access &=
8455 ~OPEN4_SHARE_ACCESS_WRITE;
8456 }
8457 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8458 fp->rf_deny_read--;
8459 if (fp->rf_deny_read == 0)
8460 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8461 }
8462 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8463 fp->rf_deny_write--;
8464 if (fp->rf_deny_write == 0)
8465 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8466 }
8467
8468 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8469
8470 rfs4_dbe_unlock(fp->rf_dbe);
8471
8472 sp->rs_open_access = 0;
8473 sp->rs_open_deny = 0;
8474 }
8475 }
8476
8477 /*
8478 * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8479 */
8480 static nfsstat4
lock_denied(LOCK4denied * dp,struct flock64 * flk)8481 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8482 {
8483 rfs4_lockowner_t *lo;
8484 rfs4_client_t *cp;
8485 uint32_t len;
8486
8487 lo = rfs4_findlockowner_by_pid(flk->l_pid);
8488 if (lo != NULL) {
8489 cp = lo->rl_client;
8490 if (rfs4_lease_expired(cp)) {
8491 rfs4_lockowner_rele(lo);
8492 rfs4_dbe_hold(cp->rc_dbe);
8493 rfs4_client_close(cp);
8494 return (NFS4ERR_EXPIRED);
8495 }
8496 dp->owner.clientid = lo->rl_owner.clientid;
8497 len = lo->rl_owner.owner_len;
8498 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8499 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8500 dp->owner.owner_len = len;
8501 rfs4_lockowner_rele(lo);
8502 goto finish;
8503 }
8504
8505 /*
8506 * Its not a NFS4 lock. We take advantage that the upper 32 bits
8507 * of the client id contain the boot time for a NFS4 lock. So we
8508 * fabricate and identity by setting clientid to the sysid, and
8509 * the lock owner to the pid.
8510 */
8511 dp->owner.clientid = flk->l_sysid;
8512 len = sizeof (pid_t);
8513 dp->owner.owner_len = len;
8514 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8515 bcopy(&flk->l_pid, dp->owner.owner_val, len);
8516 finish:
8517 dp->offset = flk->l_start;
8518 dp->length = flk->l_len;
8519
8520 if (flk->l_type == F_RDLCK)
8521 dp->locktype = READ_LT;
8522 else if (flk->l_type == F_WRLCK)
8523 dp->locktype = WRITE_LT;
8524 else
8525 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8526
8527 return (NFS4_OK);
8528 }
8529
8530 static int
setlock(vnode_t * vp,struct flock64 * flock,int flag,cred_t * cred)8531 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8532 {
8533 int error;
8534 struct flock64 flk;
8535 int i;
8536 clock_t delaytime;
8537 int cmd;
8538
8539 cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8540 retry:
8541 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8542
8543 for (i = 0; i < rfs4_maxlock_tries; i++) {
8544 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8545 error = VOP_FRLOCK(vp, cmd,
8546 flock, flag, (u_offset_t)0, NULL, cred, NULL);
8547
8548 if (error != EAGAIN && error != EACCES)
8549 break;
8550
8551 if (i < rfs4_maxlock_tries - 1) {
8552 delay(delaytime);
8553 delaytime *= 2;
8554 }
8555 }
8556
8557 if (error == EAGAIN || error == EACCES) {
8558 /* Get the owner of the lock */
8559 flk = *flock;
8560 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8561 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag,
8562 (u_offset_t)0, NULL, cred, NULL) == 0) {
8563 if (flk.l_type == F_UNLCK) {
8564 /* No longer locked, retry */
8565 goto retry;
8566 }
8567 *flock = flk;
8568 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8569 F_GETLK, &flk);
8570 }
8571 }
8572
8573 return (error);
8574 }
8575
8576 /*ARGSUSED*/
8577 static nfsstat4
rfs4_do_lock(rfs4_lo_state_t * lsp,nfs_lock_type4 locktype,offset4 offset,length4 length,cred_t * cred,nfs_resop4 * resop)8578 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8579 offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8580 {
8581 nfsstat4 status;
8582 rfs4_lockowner_t *lo = lsp->rls_locker;
8583 rfs4_state_t *sp = lsp->rls_state;
8584 struct flock64 flock;
8585 int16_t ltype;
8586 int flag;
8587 int error;
8588 sysid_t sysid;
8589 LOCK4res *lres;
8590
8591 if (rfs4_lease_expired(lo->rl_client)) {
8592 return (NFS4ERR_EXPIRED);
8593 }
8594
8595 if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8596 return (status);
8597
8598 /* Check for zero length. To lock to end of file use all ones for V4 */
8599 if (length == 0)
8600 return (NFS4ERR_INVAL);
8601 else if (length == (length4)(~0))
8602 length = 0; /* Posix to end of file */
8603
8604 retry:
8605 rfs4_dbe_lock(sp->rs_dbe);
8606 if (sp->rs_closed) {
8607 rfs4_dbe_unlock(sp->rs_dbe);
8608 return (NFS4ERR_OLD_STATEID);
8609 }
8610
8611 if (resop->resop != OP_LOCKU) {
8612 switch (locktype) {
8613 case READ_LT:
8614 case READW_LT:
8615 if ((sp->rs_share_access
8616 & OPEN4_SHARE_ACCESS_READ) == 0) {
8617 rfs4_dbe_unlock(sp->rs_dbe);
8618
8619 return (NFS4ERR_OPENMODE);
8620 }
8621 ltype = F_RDLCK;
8622 break;
8623 case WRITE_LT:
8624 case WRITEW_LT:
8625 if ((sp->rs_share_access
8626 & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8627 rfs4_dbe_unlock(sp->rs_dbe);
8628
8629 return (NFS4ERR_OPENMODE);
8630 }
8631 ltype = F_WRLCK;
8632 break;
8633 }
8634 } else
8635 ltype = F_UNLCK;
8636
8637 flock.l_type = ltype;
8638 flock.l_whence = 0; /* SEEK_SET */
8639 flock.l_start = offset;
8640 flock.l_len = length;
8641 flock.l_sysid = sysid;
8642 flock.l_pid = lsp->rls_locker->rl_pid;
8643
8644 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8645 if (flock.l_len < 0 || flock.l_start < 0) {
8646 rfs4_dbe_unlock(sp->rs_dbe);
8647 return (NFS4ERR_INVAL);
8648 }
8649
8650 /*
8651 * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8652 * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8653 */
8654 flag = (int)sp->rs_share_access | F_REMOTELOCK;
8655
8656 error = setlock(sp->rs_finfo->rf_vp, &flock, flag, cred);
8657 if (error == 0) {
8658 rfs4_dbe_lock(lsp->rls_dbe);
8659 next_stateid(&lsp->rls_lockid);
8660 rfs4_dbe_unlock(lsp->rls_dbe);
8661 }
8662
8663 rfs4_dbe_unlock(sp->rs_dbe);
8664
8665 /*
8666 * N.B. We map error values to nfsv4 errors. This is differrent
8667 * than puterrno4 routine.
8668 */
8669 switch (error) {
8670 case 0:
8671 status = NFS4_OK;
8672 break;
8673 case EAGAIN:
8674 case EACCES: /* Old value */
8675 /* Can only get here if op is OP_LOCK */
8676 ASSERT(resop->resop == OP_LOCK);
8677 lres = &resop->nfs_resop4_u.oplock;
8678 status = NFS4ERR_DENIED;
8679 if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8680 == NFS4ERR_EXPIRED)
8681 goto retry;
8682 break;
8683 case ENOLCK:
8684 status = NFS4ERR_DELAY;
8685 break;
8686 case EOVERFLOW:
8687 status = NFS4ERR_INVAL;
8688 break;
8689 case EINVAL:
8690 status = NFS4ERR_NOTSUPP;
8691 break;
8692 default:
8693 status = NFS4ERR_SERVERFAULT;
8694 break;
8695 }
8696
8697 return (status);
8698 }
8699
8700 /*ARGSUSED*/
8701 void
rfs4_op_lock(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)8702 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8703 struct svc_req *req, struct compound_state *cs)
8704 {
8705 LOCK4args *args = &argop->nfs_argop4_u.oplock;
8706 LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8707 nfsstat4 status;
8708 stateid4 *stateid;
8709 rfs4_lockowner_t *lo;
8710 rfs4_client_t *cp;
8711 rfs4_state_t *sp = NULL;
8712 rfs4_lo_state_t *lsp = NULL;
8713 bool_t ls_sw_held = FALSE;
8714 bool_t create = TRUE;
8715 bool_t lcreate = TRUE;
8716 bool_t dup_lock = FALSE;
8717 int rc;
8718
8719 DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
8720 LOCK4args *, args);
8721
8722 if (cs->vp == NULL) {
8723 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8724 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8725 cs, LOCK4res *, resp);
8726 return;
8727 }
8728
8729 if (args->locker.new_lock_owner) {
8730 /* Create a new lockowner for this instance */
8731 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8732
8733 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8734
8735 stateid = &olo->open_stateid;
8736 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8737 if (status != NFS4_OK) {
8738 NFS4_DEBUG(rfs4_debug,
8739 (CE_NOTE, "Get state failed in lock %d", status));
8740 *cs->statusp = resp->status = status;
8741 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8742 cs, LOCK4res *, resp);
8743 return;
8744 }
8745
8746 /* Ensure specified filehandle matches */
8747 if (cs->vp != sp->rs_finfo->rf_vp) {
8748 rfs4_state_rele(sp);
8749 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8750 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8751 cs, LOCK4res *, resp);
8752 return;
8753 }
8754
8755 /* hold off other access to open_owner while we tinker */
8756 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8757
8758 switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
8759 case NFS4_CHECK_STATEID_OLD:
8760 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8761 goto end;
8762 case NFS4_CHECK_STATEID_BAD:
8763 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8764 goto end;
8765 case NFS4_CHECK_STATEID_EXPIRED:
8766 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8767 goto end;
8768 case NFS4_CHECK_STATEID_UNCONFIRMED:
8769 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8770 goto end;
8771 case NFS4_CHECK_STATEID_CLOSED:
8772 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8773 goto end;
8774 case NFS4_CHECK_STATEID_OKAY:
8775 case NFS4_CHECK_STATEID_REPLAY:
8776 switch (rfs4_check_olo_seqid(olo->open_seqid,
8777 sp->rs_owner, resop)) {
8778 case NFS4_CHKSEQ_OKAY:
8779 if (rc == NFS4_CHECK_STATEID_OKAY)
8780 break;
8781 /*
8782 * This is replayed stateid; if seqid
8783 * matches next expected, then client
8784 * is using wrong seqid.
8785 */
8786 /* FALLTHROUGH */
8787 case NFS4_CHKSEQ_BAD:
8788 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8789 goto end;
8790 case NFS4_CHKSEQ_REPLAY:
8791 /* This is a duplicate LOCK request */
8792 dup_lock = TRUE;
8793
8794 /*
8795 * For a duplicate we do not want to
8796 * create a new lockowner as it should
8797 * already exist.
8798 * Turn off the lockowner create flag.
8799 */
8800 lcreate = FALSE;
8801 }
8802 break;
8803 }
8804
8805 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
8806 if (lo == NULL) {
8807 NFS4_DEBUG(rfs4_debug,
8808 (CE_NOTE, "rfs4_op_lock: no lock owner"));
8809 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
8810 goto end;
8811 }
8812
8813 lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
8814 if (lsp == NULL) {
8815 rfs4_update_lease(sp->rs_owner->ro_client);
8816 /*
8817 * Only update theh open_seqid if this is not
8818 * a duplicate request
8819 */
8820 if (dup_lock == FALSE) {
8821 rfs4_update_open_sequence(sp->rs_owner);
8822 }
8823
8824 NFS4_DEBUG(rfs4_debug,
8825 (CE_NOTE, "rfs4_op_lock: no state"));
8826 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8827 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8828 rfs4_lockowner_rele(lo);
8829 goto end;
8830 }
8831
8832 /*
8833 * This is the new_lock_owner branch and the client is
8834 * supposed to be associating a new lock_owner with
8835 * the open file at this point. If we find that a
8836 * lock_owner/state association already exists and a
8837 * successful LOCK request was returned to the client,
8838 * an error is returned to the client since this is
8839 * not appropriate. The client should be using the
8840 * existing lock_owner branch.
8841 */
8842 if (dup_lock == FALSE && create == FALSE) {
8843 if (lsp->rls_lock_completed == TRUE) {
8844 *cs->statusp =
8845 resp->status = NFS4ERR_BAD_SEQID;
8846 rfs4_lockowner_rele(lo);
8847 goto end;
8848 }
8849 }
8850
8851 rfs4_update_lease(sp->rs_owner->ro_client);
8852
8853 /*
8854 * Only update theh open_seqid if this is not
8855 * a duplicate request
8856 */
8857 if (dup_lock == FALSE) {
8858 rfs4_update_open_sequence(sp->rs_owner);
8859 }
8860
8861 /*
8862 * If this is a duplicate lock request, just copy the
8863 * previously saved reply and return.
8864 */
8865 if (dup_lock == TRUE) {
8866 /* verify that lock_seqid's match */
8867 if (lsp->rls_seqid != olo->lock_seqid) {
8868 NFS4_DEBUG(rfs4_debug,
8869 (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
8870 "lsp->seqid=%d old->seqid=%d",
8871 lsp->rls_seqid, olo->lock_seqid));
8872 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8873 } else {
8874 rfs4_copy_reply(resop, &lsp->rls_reply);
8875 /*
8876 * Make sure to copy the just
8877 * retrieved reply status into the
8878 * overall compound status
8879 */
8880 *cs->statusp = resp->status;
8881 }
8882 rfs4_lockowner_rele(lo);
8883 goto end;
8884 }
8885
8886 rfs4_dbe_lock(lsp->rls_dbe);
8887
8888 /* Make sure to update the lock sequence id */
8889 lsp->rls_seqid = olo->lock_seqid;
8890
8891 NFS4_DEBUG(rfs4_debug,
8892 (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
8893
8894 /*
8895 * This is used to signify the newly created lockowner
8896 * stateid and its sequence number. The checks for
8897 * sequence number and increment don't occur on the
8898 * very first lock request for a lockowner.
8899 */
8900 lsp->rls_skip_seqid_check = TRUE;
8901
8902 /* hold off other access to lsp while we tinker */
8903 rfs4_sw_enter(&lsp->rls_sw);
8904 ls_sw_held = TRUE;
8905
8906 rfs4_dbe_unlock(lsp->rls_dbe);
8907
8908 rfs4_lockowner_rele(lo);
8909 } else {
8910 stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
8911 /* get lsp and hold the lock on the underlying file struct */
8912 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
8913 != NFS4_OK) {
8914 *cs->statusp = resp->status = status;
8915 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8916 cs, LOCK4res *, resp);
8917 return;
8918 }
8919 create = FALSE; /* We didn't create lsp */
8920
8921 /* Ensure specified filehandle matches */
8922 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
8923 rfs4_lo_state_rele(lsp, TRUE);
8924 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8925 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8926 cs, LOCK4res *, resp);
8927 return;
8928 }
8929
8930 /* hold off other access to lsp while we tinker */
8931 rfs4_sw_enter(&lsp->rls_sw);
8932 ls_sw_held = TRUE;
8933
8934 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
8935 /*
8936 * The stateid looks like it was okay (expected to be
8937 * the next one)
8938 */
8939 case NFS4_CHECK_STATEID_OKAY:
8940 /*
8941 * The sequence id is now checked. Determine
8942 * if this is a replay or if it is in the
8943 * expected (next) sequence. In the case of a
8944 * replay, there are two replay conditions
8945 * that may occur. The first is the normal
8946 * condition where a LOCK is done with a
8947 * NFS4_OK response and the stateid is
8948 * updated. That case is handled below when
8949 * the stateid is identified as a REPLAY. The
8950 * second is the case where an error is
8951 * returned, like NFS4ERR_DENIED, and the
8952 * sequence number is updated but the stateid
8953 * is not updated. This second case is dealt
8954 * with here. So it may seem odd that the
8955 * stateid is okay but the sequence id is a
8956 * replay but it is okay.
8957 */
8958 switch (rfs4_check_lock_seqid(
8959 args->locker.locker4_u.lock_owner.lock_seqid,
8960 lsp, resop)) {
8961 case NFS4_CHKSEQ_REPLAY:
8962 if (resp->status != NFS4_OK) {
8963 /*
8964 * Here is our replay and need
8965 * to verify that the last
8966 * response was an error.
8967 */
8968 *cs->statusp = resp->status;
8969 goto end;
8970 }
8971 /*
8972 * This is done since the sequence id
8973 * looked like a replay but it didn't
8974 * pass our check so a BAD_SEQID is
8975 * returned as a result.
8976 */
8977 /*FALLTHROUGH*/
8978 case NFS4_CHKSEQ_BAD:
8979 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8980 goto end;
8981 case NFS4_CHKSEQ_OKAY:
8982 /* Everything looks okay move ahead */
8983 break;
8984 }
8985 break;
8986 case NFS4_CHECK_STATEID_OLD:
8987 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8988 goto end;
8989 case NFS4_CHECK_STATEID_BAD:
8990 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8991 goto end;
8992 case NFS4_CHECK_STATEID_EXPIRED:
8993 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8994 goto end;
8995 case NFS4_CHECK_STATEID_CLOSED:
8996 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8997 goto end;
8998 case NFS4_CHECK_STATEID_REPLAY:
8999 switch (rfs4_check_lock_seqid(
9000 args->locker.locker4_u.lock_owner.lock_seqid,
9001 lsp, resop)) {
9002 case NFS4_CHKSEQ_OKAY:
9003 /*
9004 * This is a replayed stateid; if
9005 * seqid matches the next expected,
9006 * then client is using wrong seqid.
9007 */
9008 case NFS4_CHKSEQ_BAD:
9009 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9010 goto end;
9011 case NFS4_CHKSEQ_REPLAY:
9012 rfs4_update_lease(lsp->rls_locker->rl_client);
9013 *cs->statusp = status = resp->status;
9014 goto end;
9015 }
9016 break;
9017 default:
9018 ASSERT(FALSE);
9019 break;
9020 }
9021
9022 rfs4_update_lock_sequence(lsp);
9023 rfs4_update_lease(lsp->rls_locker->rl_client);
9024 }
9025
9026 /*
9027 * NFS4 only allows locking on regular files, so
9028 * verify type of object.
9029 */
9030 if (cs->vp->v_type != VREG) {
9031 if (cs->vp->v_type == VDIR)
9032 status = NFS4ERR_ISDIR;
9033 else
9034 status = NFS4ERR_INVAL;
9035 goto out;
9036 }
9037
9038 cp = lsp->rls_state->rs_owner->ro_client;
9039
9040 if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9041 status = NFS4ERR_GRACE;
9042 goto out;
9043 }
9044
9045 if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9046 status = NFS4ERR_NO_GRACE;
9047 goto out;
9048 }
9049
9050 if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9051 status = NFS4ERR_NO_GRACE;
9052 goto out;
9053 }
9054
9055 if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9056 cs->deleg = TRUE;
9057
9058 status = rfs4_do_lock(lsp, args->locktype,
9059 args->offset, args->length, cs->cr, resop);
9060
9061 out:
9062 lsp->rls_skip_seqid_check = FALSE;
9063
9064 *cs->statusp = resp->status = status;
9065
9066 if (status == NFS4_OK) {
9067 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9068 lsp->rls_lock_completed = TRUE;
9069 }
9070 /*
9071 * Only update the "OPEN" response here if this was a new
9072 * lock_owner
9073 */
9074 if (sp)
9075 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9076
9077 rfs4_update_lock_resp(lsp, resop);
9078
9079 end:
9080 if (lsp) {
9081 if (ls_sw_held)
9082 rfs4_sw_exit(&lsp->rls_sw);
9083 /*
9084 * If an sp obtained, then the lsp does not represent
9085 * a lock on the file struct.
9086 */
9087 if (sp != NULL)
9088 rfs4_lo_state_rele(lsp, FALSE);
9089 else
9090 rfs4_lo_state_rele(lsp, TRUE);
9091 }
9092 if (sp) {
9093 rfs4_sw_exit(&sp->rs_owner->ro_sw);
9094 rfs4_state_rele(sp);
9095 }
9096
9097 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9098 LOCK4res *, resp);
9099 }
9100
9101 /* free function for LOCK/LOCKT */
9102 static void
lock_denied_free(nfs_resop4 * resop)9103 lock_denied_free(nfs_resop4 *resop)
9104 {
9105 LOCK4denied *dp = NULL;
9106
9107 switch (resop->resop) {
9108 case OP_LOCK:
9109 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9110 dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9111 break;
9112 case OP_LOCKT:
9113 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9114 dp = &resop->nfs_resop4_u.oplockt.denied;
9115 break;
9116 default:
9117 break;
9118 }
9119
9120 if (dp)
9121 kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9122 }
9123
9124 /*ARGSUSED*/
9125 void
rfs4_op_locku(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)9126 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9127 struct svc_req *req, struct compound_state *cs)
9128 {
9129 LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9130 LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9131 nfsstat4 status;
9132 stateid4 *stateid = &args->lock_stateid;
9133 rfs4_lo_state_t *lsp;
9134
9135 DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9136 LOCKU4args *, args);
9137
9138 if (cs->vp == NULL) {
9139 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9140 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9141 LOCKU4res *, resp);
9142 return;
9143 }
9144
9145 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9146 *cs->statusp = resp->status = status;
9147 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9148 LOCKU4res *, resp);
9149 return;
9150 }
9151
9152 /* Ensure specified filehandle matches */
9153 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9154 rfs4_lo_state_rele(lsp, TRUE);
9155 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9156 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9157 LOCKU4res *, resp);
9158 return;
9159 }
9160
9161 /* hold off other access to lsp while we tinker */
9162 rfs4_sw_enter(&lsp->rls_sw);
9163
9164 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9165 case NFS4_CHECK_STATEID_OKAY:
9166 if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9167 != NFS4_CHKSEQ_OKAY) {
9168 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9169 goto end;
9170 }
9171 break;
9172 case NFS4_CHECK_STATEID_OLD:
9173 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9174 goto end;
9175 case NFS4_CHECK_STATEID_BAD:
9176 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9177 goto end;
9178 case NFS4_CHECK_STATEID_EXPIRED:
9179 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9180 goto end;
9181 case NFS4_CHECK_STATEID_CLOSED:
9182 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9183 goto end;
9184 case NFS4_CHECK_STATEID_REPLAY:
9185 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9186 case NFS4_CHKSEQ_OKAY:
9187 /*
9188 * This is a replayed stateid; if
9189 * seqid matches the next expected,
9190 * then client is using wrong seqid.
9191 */
9192 case NFS4_CHKSEQ_BAD:
9193 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9194 goto end;
9195 case NFS4_CHKSEQ_REPLAY:
9196 rfs4_update_lease(lsp->rls_locker->rl_client);
9197 *cs->statusp = status = resp->status;
9198 goto end;
9199 }
9200 break;
9201 default:
9202 ASSERT(FALSE);
9203 break;
9204 }
9205
9206 rfs4_update_lock_sequence(lsp);
9207 rfs4_update_lease(lsp->rls_locker->rl_client);
9208
9209 /*
9210 * NFS4 only allows locking on regular files, so
9211 * verify type of object.
9212 */
9213 if (cs->vp->v_type != VREG) {
9214 if (cs->vp->v_type == VDIR)
9215 status = NFS4ERR_ISDIR;
9216 else
9217 status = NFS4ERR_INVAL;
9218 goto out;
9219 }
9220
9221 if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9222 status = NFS4ERR_GRACE;
9223 goto out;
9224 }
9225
9226 status = rfs4_do_lock(lsp, args->locktype,
9227 args->offset, args->length, cs->cr, resop);
9228
9229 out:
9230 *cs->statusp = resp->status = status;
9231
9232 if (status == NFS4_OK)
9233 resp->lock_stateid = lsp->rls_lockid.stateid;
9234
9235 rfs4_update_lock_resp(lsp, resop);
9236
9237 end:
9238 rfs4_sw_exit(&lsp->rls_sw);
9239 rfs4_lo_state_rele(lsp, TRUE);
9240
9241 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9242 LOCKU4res *, resp);
9243 }
9244
9245 /*
9246 * LOCKT is a best effort routine, the client can not be guaranteed that
9247 * the status return is still in effect by the time the reply is received.
9248 * They are numerous race conditions in this routine, but we are not required
9249 * and can not be accurate.
9250 */
9251 /*ARGSUSED*/
9252 void
rfs4_op_lockt(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)9253 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9254 struct svc_req *req, struct compound_state *cs)
9255 {
9256 LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9257 LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9258 rfs4_lockowner_t *lo;
9259 rfs4_client_t *cp;
9260 bool_t create = FALSE;
9261 struct flock64 flk;
9262 int error;
9263 int flag = FREAD | FWRITE;
9264 int ltype;
9265 length4 posix_length;
9266 sysid_t sysid;
9267 pid_t pid;
9268
9269 DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9270 LOCKT4args *, args);
9271
9272 if (cs->vp == NULL) {
9273 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9274 goto out;
9275 }
9276
9277 /*
9278 * NFS4 only allows locking on regular files, so
9279 * verify type of object.
9280 */
9281 if (cs->vp->v_type != VREG) {
9282 if (cs->vp->v_type == VDIR)
9283 *cs->statusp = resp->status = NFS4ERR_ISDIR;
9284 else
9285 *cs->statusp = resp->status = NFS4ERR_INVAL;
9286 goto out;
9287 }
9288
9289 /*
9290 * Check out the clientid to ensure the server knows about it
9291 * so that we correctly inform the client of a server reboot.
9292 */
9293 if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9294 == NULL) {
9295 *cs->statusp = resp->status =
9296 rfs4_check_clientid(&args->owner.clientid, 0);
9297 goto out;
9298 }
9299 if (rfs4_lease_expired(cp)) {
9300 rfs4_client_close(cp);
9301 /*
9302 * Protocol doesn't allow returning NFS4ERR_STALE as
9303 * other operations do on this check so STALE_CLIENTID
9304 * is returned instead
9305 */
9306 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9307 goto out;
9308 }
9309
9310 if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9311 *cs->statusp = resp->status = NFS4ERR_GRACE;
9312 rfs4_client_rele(cp);
9313 goto out;
9314 }
9315 rfs4_client_rele(cp);
9316
9317 resp->status = NFS4_OK;
9318
9319 switch (args->locktype) {
9320 case READ_LT:
9321 case READW_LT:
9322 ltype = F_RDLCK;
9323 break;
9324 case WRITE_LT:
9325 case WRITEW_LT:
9326 ltype = F_WRLCK;
9327 break;
9328 }
9329
9330 posix_length = args->length;
9331 /* Check for zero length. To lock to end of file use all ones for V4 */
9332 if (posix_length == 0) {
9333 *cs->statusp = resp->status = NFS4ERR_INVAL;
9334 goto out;
9335 } else if (posix_length == (length4)(~0)) {
9336 posix_length = 0; /* Posix to end of file */
9337 }
9338
9339 /* Find or create a lockowner */
9340 lo = rfs4_findlockowner(&args->owner, &create);
9341
9342 if (lo) {
9343 pid = lo->rl_pid;
9344 if ((resp->status =
9345 rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9346 goto err;
9347 } else {
9348 pid = 0;
9349 sysid = lockt_sysid;
9350 }
9351 retry:
9352 flk.l_type = ltype;
9353 flk.l_whence = 0; /* SEEK_SET */
9354 flk.l_start = args->offset;
9355 flk.l_len = posix_length;
9356 flk.l_sysid = sysid;
9357 flk.l_pid = pid;
9358 flag |= F_REMOTELOCK;
9359
9360 LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9361
9362 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9363 if (flk.l_len < 0 || flk.l_start < 0) {
9364 resp->status = NFS4ERR_INVAL;
9365 goto err;
9366 }
9367 error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9368 NULL, cs->cr, NULL);
9369
9370 /*
9371 * N.B. We map error values to nfsv4 errors. This is differrent
9372 * than puterrno4 routine.
9373 */
9374 switch (error) {
9375 case 0:
9376 if (flk.l_type == F_UNLCK)
9377 resp->status = NFS4_OK;
9378 else {
9379 if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9380 goto retry;
9381 resp->status = NFS4ERR_DENIED;
9382 }
9383 break;
9384 case EOVERFLOW:
9385 resp->status = NFS4ERR_INVAL;
9386 break;
9387 case EINVAL:
9388 resp->status = NFS4ERR_NOTSUPP;
9389 break;
9390 default:
9391 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9392 error);
9393 resp->status = NFS4ERR_SERVERFAULT;
9394 break;
9395 }
9396
9397 err:
9398 if (lo)
9399 rfs4_lockowner_rele(lo);
9400 *cs->statusp = resp->status;
9401 out:
9402 DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9403 LOCKT4res *, resp);
9404 }
9405
9406 int
rfs4_share(rfs4_state_t * sp,uint32_t access,uint32_t deny)9407 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9408 {
9409 int err;
9410 int cmd;
9411 vnode_t *vp;
9412 struct shrlock shr;
9413 struct shr_locowner shr_loco;
9414 int fflags = 0;
9415
9416 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9417 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9418
9419 if (sp->rs_closed)
9420 return (NFS4ERR_OLD_STATEID);
9421
9422 vp = sp->rs_finfo->rf_vp;
9423 ASSERT(vp);
9424
9425 shr.s_access = shr.s_deny = 0;
9426
9427 if (access & OPEN4_SHARE_ACCESS_READ) {
9428 fflags |= FREAD;
9429 shr.s_access |= F_RDACC;
9430 }
9431 if (access & OPEN4_SHARE_ACCESS_WRITE) {
9432 fflags |= FWRITE;
9433 shr.s_access |= F_WRACC;
9434 }
9435 ASSERT(shr.s_access);
9436
9437 if (deny & OPEN4_SHARE_DENY_READ)
9438 shr.s_deny |= F_RDDNY;
9439 if (deny & OPEN4_SHARE_DENY_WRITE)
9440 shr.s_deny |= F_WRDNY;
9441
9442 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9443 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9444 shr_loco.sl_pid = shr.s_pid;
9445 shr_loco.sl_id = shr.s_sysid;
9446 shr.s_owner = (caddr_t)&shr_loco;
9447 shr.s_own_len = sizeof (shr_loco);
9448
9449 cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9450
9451 err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9452 if (err != 0) {
9453 if (err == EAGAIN)
9454 err = NFS4ERR_SHARE_DENIED;
9455 else
9456 err = puterrno4(err);
9457 return (err);
9458 }
9459
9460 sp->rs_share_access |= access;
9461 sp->rs_share_deny |= deny;
9462
9463 return (0);
9464 }
9465
9466 int
rfs4_unshare(rfs4_state_t * sp)9467 rfs4_unshare(rfs4_state_t *sp)
9468 {
9469 int err;
9470 struct shrlock shr;
9471 struct shr_locowner shr_loco;
9472
9473 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9474
9475 if (sp->rs_closed || sp->rs_share_access == 0)
9476 return (0);
9477
9478 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9479 ASSERT(sp->rs_finfo->rf_vp);
9480
9481 shr.s_access = shr.s_deny = 0;
9482 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9483 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9484 shr_loco.sl_pid = shr.s_pid;
9485 shr_loco.sl_id = shr.s_sysid;
9486 shr.s_owner = (caddr_t)&shr_loco;
9487 shr.s_own_len = sizeof (shr_loco);
9488
9489 err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9490 NULL);
9491 if (err != 0) {
9492 err = puterrno4(err);
9493 return (err);
9494 }
9495
9496 sp->rs_share_access = 0;
9497 sp->rs_share_deny = 0;
9498
9499 return (0);
9500
9501 }
9502
9503 static int
rdma_setup_read_data4(READ4args * args,READ4res * rok)9504 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9505 {
9506 struct clist *wcl;
9507 count4 count = rok->data_len;
9508 int wlist_len;
9509
9510 wcl = args->wlist;
9511 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9512 return (FALSE);
9513 }
9514 wcl = args->wlist;
9515 rok->wlist_len = wlist_len;
9516 rok->wlist = wcl;
9517 return (TRUE);
9518 }
9519
9520 /* tunable to disable server referrals */
9521 int rfs4_no_referrals = 0;
9522
9523 /*
9524 * Find an NFS record in reparse point data.
9525 * Returns 0 for success and <0 or an errno value on failure.
9526 */
9527 int
vn_find_nfs_record(vnode_t * vp,nvlist_t ** nvlp,char ** svcp,char ** datap)9528 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9529 {
9530 int err;
9531 char *stype, *val;
9532 nvlist_t *nvl;
9533 nvpair_t *curr;
9534
9535 if ((nvl = reparse_init()) == NULL)
9536 return (-1);
9537
9538 if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9539 reparse_free(nvl);
9540 return (err);
9541 }
9542
9543 curr = NULL;
9544 while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9545 if ((stype = nvpair_name(curr)) == NULL) {
9546 reparse_free(nvl);
9547 return (-2);
9548 }
9549 if (strncasecmp(stype, "NFS", 3) == 0)
9550 break;
9551 }
9552
9553 if ((curr == NULL) ||
9554 (nvpair_value_string(curr, &val))) {
9555 reparse_free(nvl);
9556 return (-3);
9557 }
9558 *nvlp = nvl;
9559 *svcp = stype;
9560 *datap = val;
9561 return (0);
9562 }
9563
9564 int
vn_is_nfs_reparse(vnode_t * vp,cred_t * cr)9565 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9566 {
9567 nvlist_t *nvl;
9568 char *s, *d;
9569
9570 if (rfs4_no_referrals != 0)
9571 return (B_FALSE);
9572
9573 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9574 return (B_FALSE);
9575
9576 if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9577 return (B_FALSE);
9578
9579 reparse_free(nvl);
9580
9581 return (B_TRUE);
9582 }
9583
9584 /*
9585 * There is a user-level copy of this routine in ref_subr.c.
9586 * Changes should be kept in sync.
9587 */
9588 static int
nfs4_create_components(char * path,component4 * comp4)9589 nfs4_create_components(char *path, component4 *comp4)
9590 {
9591 int slen, plen, ncomp;
9592 char *ori_path, *nxtc, buf[MAXNAMELEN];
9593
9594 if (path == NULL)
9595 return (0);
9596
9597 plen = strlen(path) + 1; /* include the terminator */
9598 ori_path = path;
9599 ncomp = 0;
9600
9601 /* count number of components in the path */
9602 for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9603 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9604 if ((slen = nxtc - path) == 0) {
9605 path = nxtc + 1;
9606 continue;
9607 }
9608
9609 if (comp4 != NULL) {
9610 bcopy(path, buf, slen);
9611 buf[slen] = '\0';
9612 (void) str_to_utf8(buf, &comp4[ncomp]);
9613 }
9614
9615 ncomp++; /* 1 valid component */
9616 path = nxtc + 1;
9617 }
9618 if (*nxtc == '\0' || *nxtc == '\n')
9619 break;
9620 }
9621
9622 return (ncomp);
9623 }
9624
9625 /*
9626 * There is a user-level copy of this routine in ref_subr.c.
9627 * Changes should be kept in sync.
9628 */
9629 static int
make_pathname4(char * path,pathname4 * pathname)9630 make_pathname4(char *path, pathname4 *pathname)
9631 {
9632 int ncomp;
9633 component4 *comp4;
9634
9635 if (pathname == NULL)
9636 return (0);
9637
9638 if (path == NULL) {
9639 pathname->pathname4_val = NULL;
9640 pathname->pathname4_len = 0;
9641 return (0);
9642 }
9643
9644 /* count number of components to alloc buffer */
9645 if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9646 pathname->pathname4_val = NULL;
9647 pathname->pathname4_len = 0;
9648 return (0);
9649 }
9650 comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9651
9652 /* copy components into allocated buffer */
9653 ncomp = nfs4_create_components(path, comp4);
9654
9655 pathname->pathname4_val = comp4;
9656 pathname->pathname4_len = ncomp;
9657
9658 return (ncomp);
9659 }
9660
9661 #define xdr_fs_locations4 xdr_fattr4_fs_locations
9662
9663 fs_locations4 *
fetch_referral(vnode_t * vp,cred_t * cr)9664 fetch_referral(vnode_t *vp, cred_t *cr)
9665 {
9666 nvlist_t *nvl;
9667 char *stype, *sdata;
9668 fs_locations4 *result;
9669 char buf[1024];
9670 size_t bufsize;
9671 XDR xdr;
9672 int err;
9673
9674 /*
9675 * Check attrs to ensure it's a reparse point
9676 */
9677 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9678 return (NULL);
9679
9680 /*
9681 * Look for an NFS record and get the type and data
9682 */
9683 if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9684 return (NULL);
9685
9686 /*
9687 * With the type and data, upcall to get the referral
9688 */
9689 bufsize = sizeof (buf);
9690 bzero(buf, sizeof (buf));
9691 err = reparse_kderef((const char *)stype, (const char *)sdata,
9692 buf, &bufsize);
9693 reparse_free(nvl);
9694
9695 DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9696 char *, stype, char *, sdata, char *, buf, int, err);
9697 if (err) {
9698 cmn_err(CE_NOTE,
9699 "reparsed daemon not running: unable to get referral (%d)",
9700 err);
9701 return (NULL);
9702 }
9703
9704 /*
9705 * We get an XDR'ed record back from the kderef call
9706 */
9707 xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9708 result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9709 err = xdr_fs_locations4(&xdr, result);
9710 XDR_DESTROY(&xdr);
9711 if (err != TRUE) {
9712 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9713 int, err);
9714 return (NULL);
9715 }
9716
9717 /*
9718 * Look at path to recover fs_root, ignoring the leading '/'
9719 */
9720 (void) make_pathname4(vp->v_path, &result->fs_root);
9721
9722 return (result);
9723 }
9724
9725 char *
build_symlink(vnode_t * vp,cred_t * cr,size_t * strsz)9726 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
9727 {
9728 fs_locations4 *fsl;
9729 fs_location4 *fs;
9730 char *server, *path, *symbuf;
9731 static char *prefix = "/net/";
9732 int i, size, npaths;
9733 uint_t len;
9734
9735 /* Get the referral */
9736 if ((fsl = fetch_referral(vp, cr)) == NULL)
9737 return (NULL);
9738
9739 /* Deal with only the first location and first server */
9740 fs = &fsl->locations_val[0];
9741 server = utf8_to_str(&fs->server_val[0], &len, NULL);
9742 if (server == NULL) {
9743 rfs4_free_fs_locations4(fsl);
9744 kmem_free(fsl, sizeof (fs_locations4));
9745 return (NULL);
9746 }
9747
9748 /* Figure out size for "/net/" + host + /path/path/path + NULL */
9749 size = strlen(prefix) + len;
9750 for (i = 0; i < fs->rootpath.pathname4_len; i++)
9751 size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
9752
9753 /* Allocate the symlink buffer and fill it */
9754 symbuf = kmem_zalloc(size, KM_SLEEP);
9755 (void) strcat(symbuf, prefix);
9756 (void) strcat(symbuf, server);
9757 kmem_free(server, len);
9758
9759 npaths = 0;
9760 for (i = 0; i < fs->rootpath.pathname4_len; i++) {
9761 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
9762 if (path == NULL)
9763 continue;
9764 (void) strcat(symbuf, "/");
9765 (void) strcat(symbuf, path);
9766 npaths++;
9767 kmem_free(path, len);
9768 }
9769
9770 rfs4_free_fs_locations4(fsl);
9771 kmem_free(fsl, sizeof (fs_locations4));
9772
9773 if (strsz != NULL)
9774 *strsz = size;
9775 return (symbuf);
9776 }
9777
9778 /*
9779 * Check to see if we have a downrev Solaris client, so that we
9780 * can send it a symlink instead of a referral.
9781 */
9782 int
client_is_downrev(struct svc_req * req)9783 client_is_downrev(struct svc_req *req)
9784 {
9785 struct sockaddr *ca;
9786 rfs4_clntip_t *ci;
9787 bool_t create = FALSE;
9788 int is_downrev;
9789
9790 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9791 ASSERT(ca);
9792 ci = rfs4_find_clntip(ca, &create);
9793 if (ci == NULL)
9794 return (0);
9795 is_downrev = ci->ri_no_referrals;
9796 rfs4_dbe_rele(ci->ri_dbe);
9797 return (is_downrev);
9798 }
9799