1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <sys/types.h>
27 #include <sys/sysmacros.h>
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/fcntl.h>
31 #include <sys/vfs.h>
32 #include <sys/vnode.h>
33 #include <sys/share.h>
34 #include <sys/cmn_err.h>
35 #include <sys/kmem.h>
36 #include <sys/debug.h>
37 #include <sys/t_lock.h>
38 #include <sys/errno.h>
39 #include <sys/nbmlock.h>
40
41 int share_debug = 0;
42
43 #ifdef DEBUG
44 static void print_shares(struct vnode *);
45 static void print_share(struct shrlock *);
46 #endif
47
48 static int isreadonly(struct vnode *);
49
50 /*
51 * Add the share reservation shr to vp.
52 */
53 int
add_share(struct vnode * vp,struct shrlock * shr)54 add_share(struct vnode *vp, struct shrlock *shr)
55 {
56 struct shrlocklist *shrl;
57
58 /*
59 * An access of zero is not legal, however some older clients
60 * generate it anyways. Allow the request only if it is
61 * coming from a remote system. Be generous in what you
62 * accept and strict in what you send.
63 */
64 if ((shr->s_access == 0) && (GETSYSID(shr->s_sysid) == 0)) {
65 return (EINVAL);
66 }
67
68 /*
69 * Sanity check to make sure we have valid options.
70 * There is known overlap but it doesn't hurt to be careful.
71 */
72 if (shr->s_access & ~(F_RDACC|F_WRACC|F_RWACC|F_RMACC|F_MDACC)) {
73 return (EINVAL);
74 }
75 if (shr->s_deny & ~(F_NODNY|F_RDDNY|F_WRDNY|F_RWDNY|F_COMPAT|
76 F_MANDDNY|F_RMDNY)) {
77 return (EINVAL);
78 }
79
80 mutex_enter(&vp->v_lock);
81 for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
82 /*
83 * If the share owner matches previous request
84 * do special handling.
85 */
86 if ((shrl->shr->s_sysid == shr->s_sysid) &&
87 (shrl->shr->s_pid == shr->s_pid) &&
88 (shrl->shr->s_own_len == shr->s_own_len) &&
89 bcmp(shrl->shr->s_owner, shr->s_owner,
90 shr->s_own_len) == 0) {
91
92 /*
93 * If the existing request is F_COMPAT and
94 * is the first share then allow any F_COMPAT
95 * from the same process. Trick: If the existing
96 * F_COMPAT is write access then it must have
97 * the same owner as the first.
98 */
99 if ((shrl->shr->s_deny & F_COMPAT) &&
100 (shr->s_deny & F_COMPAT) &&
101 ((shrl->next == NULL) ||
102 (shrl->shr->s_access & F_WRACC)))
103 break;
104 }
105
106 /*
107 * If a first share has been done in compatibility mode
108 * handle the special cases.
109 */
110 if ((shrl->shr->s_deny & F_COMPAT) && (shrl->next == NULL)) {
111
112 if (!(shr->s_deny & F_COMPAT)) {
113 /*
114 * If not compat and want write access or
115 * want to deny read or
116 * write exists, fails
117 */
118 if ((shr->s_access & F_WRACC) ||
119 (shr->s_deny & F_RDDNY) ||
120 (shrl->shr->s_access & F_WRACC)) {
121 mutex_exit(&vp->v_lock);
122 return (EAGAIN);
123 }
124 /*
125 * If read only file allow, this may allow
126 * a deny write but that is meaningless on
127 * a read only file.
128 */
129 if (isreadonly(vp))
130 break;
131 mutex_exit(&vp->v_lock);
132 return (EAGAIN);
133 }
134 /*
135 * This is a compat request and read access
136 * and the first was also read access
137 * we always allow it, otherwise we reject because
138 * we have handled the only valid write case above.
139 */
140 if ((shr->s_access == F_RDACC) &&
141 (shrl->shr->s_access == F_RDACC))
142 break;
143 mutex_exit(&vp->v_lock);
144 return (EAGAIN);
145 }
146
147 /*
148 * If we are trying to share in compatibility mode
149 * and the current share is compat (and not the first)
150 * we don't know enough.
151 */
152 if ((shrl->shr->s_deny & F_COMPAT) && (shr->s_deny & F_COMPAT))
153 continue;
154
155 /*
156 * If this is a compat we check for what can't succeed.
157 */
158 if (shr->s_deny & F_COMPAT) {
159 /*
160 * If we want write access or
161 * if anyone is denying read or
162 * if anyone has write access we fail
163 */
164 if ((shr->s_access & F_WRACC) ||
165 (shrl->shr->s_deny & F_RDDNY) ||
166 (shrl->shr->s_access & F_WRACC)) {
167 mutex_exit(&vp->v_lock);
168 return (EAGAIN);
169 }
170 /*
171 * If the first was opened with only read access
172 * and is a read only file we allow.
173 */
174 if (shrl->next == NULL) {
175 if ((shrl->shr->s_access == F_RDACC) &&
176 isreadonly(vp)) {
177 break;
178 }
179 mutex_exit(&vp->v_lock);
180 return (EAGAIN);
181 }
182 /*
183 * We still can't determine our fate so continue
184 */
185 continue;
186 }
187
188 /*
189 * Simple bitwise test, if we are trying to access what
190 * someone else is denying or we are trying to deny
191 * what someone else is accessing we fail.
192 */
193 if ((shr->s_access & shrl->shr->s_deny) ||
194 (shr->s_deny & shrl->shr->s_access)) {
195 mutex_exit(&vp->v_lock);
196 return (EAGAIN);
197 }
198 }
199
200 shrl = kmem_alloc(sizeof (struct shrlocklist), KM_SLEEP);
201 shrl->shr = kmem_alloc(sizeof (struct shrlock), KM_SLEEP);
202 shrl->shr->s_access = shr->s_access;
203 shrl->shr->s_deny = shr->s_deny;
204
205 /*
206 * Make sure no other deny modes are also set with F_COMPAT
207 */
208 if (shrl->shr->s_deny & F_COMPAT)
209 shrl->shr->s_deny = F_COMPAT;
210 shrl->shr->s_sysid = shr->s_sysid; /* XXX ref cnt? */
211 shrl->shr->s_pid = shr->s_pid;
212 shrl->shr->s_own_len = shr->s_own_len;
213 shrl->shr->s_owner = kmem_alloc(shr->s_own_len, KM_SLEEP);
214 bcopy(shr->s_owner, shrl->shr->s_owner, shr->s_own_len);
215 shrl->next = vp->v_shrlocks;
216 vp->v_shrlocks = shrl;
217 #ifdef DEBUG
218 if (share_debug)
219 print_shares(vp);
220 #endif
221
222 mutex_exit(&vp->v_lock);
223
224 return (0);
225 }
226
227 /*
228 * nlmid sysid pid
229 * ===== ===== ===
230 * !=0 !=0 =0 in cluster; NLM lock
231 * !=0 =0 =0 in cluster; special case for NLM lock
232 * !=0 =0 !=0 in cluster; PXFS local lock
233 * !=0 !=0 !=0 cannot happen
234 * =0 !=0 =0 not in cluster; NLM lock
235 * =0 =0 !=0 not in cluster; local lock
236 * =0 =0 =0 cannot happen
237 * =0 !=0 !=0 cannot happen
238 */
239 static int
is_match_for_del(struct shrlock * shr,struct shrlock * element)240 is_match_for_del(struct shrlock *shr, struct shrlock *element)
241 {
242 int nlmid1, nlmid2;
243 int result = 0;
244
245 nlmid1 = GETNLMID(shr->s_sysid);
246 nlmid2 = GETNLMID(element->s_sysid);
247
248 if (nlmid1 != 0) { /* in a cluster */
249 if (GETSYSID(shr->s_sysid) != 0 && shr->s_pid == 0) {
250 /*
251 * Lock obtained through nlm server. Just need to
252 * compare whole sysids. pid will always = 0.
253 */
254 result = shr->s_sysid == element->s_sysid;
255 } else if (GETSYSID(shr->s_sysid) == 0 && shr->s_pid == 0) {
256 /*
257 * This is a special case. The NLM server wishes to
258 * delete all share locks obtained through nlmid1.
259 */
260 result = (nlmid1 == nlmid2);
261 } else if (GETSYSID(shr->s_sysid) == 0 && shr->s_pid != 0) {
262 /*
263 * Lock obtained locally through PXFS. Match nlmids
264 * and pids.
265 */
266 result = (nlmid1 == nlmid2 &&
267 shr->s_pid == element->s_pid);
268 }
269 } else { /* not in a cluster */
270 result = ((shr->s_sysid == 0 &&
271 shr->s_pid == element->s_pid) ||
272 (shr->s_sysid != 0 &&
273 shr->s_sysid == element->s_sysid));
274 }
275 return (result);
276 }
277
278 /*
279 * Delete the given share reservation. Returns 0 if okay, EINVAL if the
280 * share could not be found. If the share reservation is an NBMAND share
281 * reservation, signal anyone waiting for the share to go away (e.g.,
282 * blocking lock requests).
283 */
284
285 int
del_share(struct vnode * vp,struct shrlock * shr)286 del_share(struct vnode *vp, struct shrlock *shr)
287 {
288 struct shrlocklist *shrl;
289 struct shrlocklist **shrlp;
290 int found = 0;
291 int is_nbmand = 0;
292
293 mutex_enter(&vp->v_lock);
294 /*
295 * Delete the shares with the matching sysid and owner
296 * But if own_len == 0 and sysid == 0 delete all with matching pid
297 * But if own_len == 0 delete all with matching sysid.
298 */
299 shrlp = &vp->v_shrlocks;
300 while (*shrlp) {
301 if ((shr->s_own_len == (*shrlp)->shr->s_own_len &&
302 (bcmp(shr->s_owner, (*shrlp)->shr->s_owner,
303 shr->s_own_len) == 0)) ||
304
305 (shr->s_own_len == 0 &&
306 is_match_for_del(shr, (*shrlp)->shr))) {
307
308 shrl = *shrlp;
309 *shrlp = shrl->next;
310
311 if (shrl->shr->s_deny & F_MANDDNY)
312 is_nbmand = 1;
313
314 /* XXX deref sysid */
315 kmem_free(shrl->shr->s_owner, shrl->shr->s_own_len);
316 kmem_free(shrl->shr, sizeof (struct shrlock));
317 kmem_free(shrl, sizeof (struct shrlocklist));
318 found++;
319 continue;
320 }
321 shrlp = &(*shrlp)->next;
322 }
323
324 if (is_nbmand)
325 cv_broadcast(&vp->v_cv);
326
327 mutex_exit(&vp->v_lock);
328 return (found ? 0 : EINVAL);
329 }
330
331 /*
332 * Clean up all local share reservations that the given process has with
333 * the given file.
334 */
335 void
cleanshares(struct vnode * vp,pid_t pid)336 cleanshares(struct vnode *vp, pid_t pid)
337 {
338 struct shrlock shr;
339
340 if (vp->v_shrlocks == NULL)
341 return;
342
343 shr.s_access = 0;
344 shr.s_deny = 0;
345 shr.s_pid = pid;
346 shr.s_sysid = 0;
347 shr.s_own_len = 0;
348 shr.s_owner = NULL;
349
350 (void) del_share(vp, &shr);
351 }
352
353 static int
is_match_for_has_remote(int32_t sysid1,int32_t sysid2)354 is_match_for_has_remote(int32_t sysid1, int32_t sysid2)
355 {
356 int result = 0;
357
358 if (GETNLMID(sysid1) != 0) { /* in a cluster */
359 if (GETSYSID(sysid1) != 0) {
360 /*
361 * Lock obtained through nlm server. Just need to
362 * compare whole sysids.
363 */
364 result = (sysid1 == sysid2);
365 } else if (GETSYSID(sysid1) == 0) {
366 /*
367 * This is a special case. The NLM server identified
368 * by nlmid1 wishes to find out if it has obtained
369 * any share locks on the vnode.
370 */
371 result = (GETNLMID(sysid1) == GETNLMID(sysid2));
372 }
373 } else { /* not in a cluster */
374 result = ((sysid1 != 0 && sysid1 == sysid2) ||
375 (sysid1 == 0 && sysid2 != 0));
376 }
377 return (result);
378 }
379
380
381 /*
382 * Determine whether there are any shares for the given vnode
383 * with a remote sysid. Returns zero if not, non-zero if there are.
384 * If sysid is non-zero then determine if this sysid has a share.
385 *
386 * Note that the return value from this function is potentially invalid
387 * once it has been returned. The caller is responsible for providing its
388 * own synchronization mechanism to ensure that the return value is useful.
389 */
390 int
shr_has_remote_shares(vnode_t * vp,int32_t sysid)391 shr_has_remote_shares(vnode_t *vp, int32_t sysid)
392 {
393 struct shrlocklist *shrl;
394 int result = 0;
395
396 mutex_enter(&vp->v_lock);
397 shrl = vp->v_shrlocks;
398 while (shrl) {
399 if (is_match_for_has_remote(sysid, shrl->shr->s_sysid)) {
400
401 result = 1;
402 break;
403 }
404 shrl = shrl->next;
405 }
406 mutex_exit(&vp->v_lock);
407 return (result);
408 }
409
410 static int
isreadonly(struct vnode * vp)411 isreadonly(struct vnode *vp)
412 {
413 return (vp->v_type != VCHR && vp->v_type != VBLK &&
414 vp->v_type != VFIFO && vn_is_readonly(vp));
415 }
416
417 #ifdef DEBUG
418 static void
print_shares(struct vnode * vp)419 print_shares(struct vnode *vp)
420 {
421 struct shrlocklist *shrl;
422
423 if (vp->v_shrlocks == NULL) {
424 printf("<NULL>\n");
425 return;
426 }
427
428 shrl = vp->v_shrlocks;
429 while (shrl) {
430 print_share(shrl->shr);
431 shrl = shrl->next;
432 }
433 }
434
435 static void
print_share(struct shrlock * shr)436 print_share(struct shrlock *shr)
437 {
438 int i;
439
440 if (shr == NULL) {
441 printf("<NULL>\n");
442 return;
443 }
444
445 printf(" access(%d): ", shr->s_access);
446 if (shr->s_access & F_RDACC)
447 printf("R");
448 if (shr->s_access & F_WRACC)
449 printf("W");
450 if ((shr->s_access & (F_RDACC|F_WRACC)) == 0)
451 printf("N");
452 printf("\n");
453 printf(" deny: ");
454 if (shr->s_deny & F_COMPAT)
455 printf("C");
456 if (shr->s_deny & F_RDDNY)
457 printf("R");
458 if (shr->s_deny & F_WRDNY)
459 printf("W");
460 if (shr->s_deny == F_NODNY)
461 printf("N");
462 printf("\n");
463 printf(" sysid: %d\n", shr->s_sysid);
464 printf(" pid: %d\n", shr->s_pid);
465 printf(" owner: [%d]", shr->s_own_len);
466 printf("'");
467 for (i = 0; i < shr->s_own_len; i++)
468 printf("%02x", (unsigned)shr->s_owner[i]);
469 printf("'\n");
470 }
471 #endif
472
473 /*
474 * Return non-zero if the given I/O request conflicts with a registered
475 * share reservation.
476 *
477 * A process is identified by the tuple (sysid, pid). When the caller
478 * context is passed to nbl_share_conflict, the sysid and pid in the
479 * caller context are used. Otherwise the sysid is zero, and the pid is
480 * taken from the current process.
481 *
482 * Conflict Algorithm:
483 * 1. An op request of NBL_READ will fail if a different
484 * process has a mandatory share reservation with deny read.
485 *
486 * 2. An op request of NBL_WRITE will fail if a different
487 * process has a mandatory share reservation with deny write.
488 *
489 * 3. An op request of NBL_READWRITE will fail if a different
490 * process has a mandatory share reservation with deny read
491 * or deny write.
492 *
493 * 4. An op request of NBL_REMOVE will fail if there is
494 * a mandatory share reservation with an access of read,
495 * write, or remove. (Anything other than meta data access).
496 *
497 * 5. An op request of NBL_RENAME will fail if there is
498 * a mandatory share reservation with:
499 * a) access write or access remove
500 * or
501 * b) access read and deny remove
502 *
503 * Otherwise there is no conflict and the op request succeeds.
504 *
505 * This behavior is required for interoperability between
506 * the nfs server, cifs server, and local access.
507 * This behavior can result in non-posix semantics.
508 *
509 * When mandatory share reservations are enabled, a process
510 * should call nbl_share_conflict to determine if the
511 * desired operation would conflict with an existing share
512 * reservation.
513 *
514 * The call to nbl_share_conflict may be skipped if the
515 * process has an existing share reservation and the operation
516 * is being performed in the context of that existing share
517 * reservation.
518 */
519 int
nbl_share_conflict(vnode_t * vp,nbl_op_t op,caller_context_t * ct)520 nbl_share_conflict(vnode_t *vp, nbl_op_t op, caller_context_t *ct)
521 {
522 struct shrlocklist *shrl;
523 int conflict = 0;
524 pid_t pid;
525 int sysid;
526
527 ASSERT(nbl_in_crit(vp));
528
529 if (ct == NULL) {
530 pid = curproc->p_pid;
531 sysid = 0;
532 } else {
533 pid = ct->cc_pid;
534 sysid = ct->cc_sysid;
535 }
536
537 mutex_enter(&vp->v_lock);
538 for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
539 if (!(shrl->shr->s_deny & F_MANDDNY))
540 continue;
541 /*
542 * NBL_READ, NBL_WRITE, and NBL_READWRITE need to
543 * check if the share reservation being examined
544 * belongs to the current process.
545 * NBL_REMOVE and NBL_RENAME do not.
546 * This behavior is required by the conflict
547 * algorithm described above.
548 */
549 switch (op) {
550 case NBL_READ:
551 if ((shrl->shr->s_deny & F_RDDNY) &&
552 (shrl->shr->s_sysid != sysid ||
553 shrl->shr->s_pid != pid))
554 conflict = 1;
555 break;
556 case NBL_WRITE:
557 if ((shrl->shr->s_deny & F_WRDNY) &&
558 (shrl->shr->s_sysid != sysid ||
559 shrl->shr->s_pid != pid))
560 conflict = 1;
561 break;
562 case NBL_READWRITE:
563 if ((shrl->shr->s_deny & F_RWDNY) &&
564 (shrl->shr->s_sysid != sysid ||
565 shrl->shr->s_pid != pid))
566 conflict = 1;
567 break;
568 case NBL_REMOVE:
569 if (shrl->shr->s_access & (F_RWACC|F_RMACC))
570 conflict = 1;
571 break;
572 case NBL_RENAME:
573 if (shrl->shr->s_access & (F_WRACC|F_RMACC))
574 conflict = 1;
575
576 else if ((shrl->shr->s_access & F_RDACC) &&
577 (shrl->shr->s_deny & F_RMDNY))
578 conflict = 1;
579 break;
580 #ifdef DEBUG
581 default:
582 cmn_err(CE_PANIC,
583 "nbl_share_conflict: bogus op (%d)",
584 op);
585 break;
586 #endif
587 }
588 if (conflict)
589 break;
590 }
591
592 mutex_exit(&vp->v_lock);
593 return (conflict);
594 }
595
596 /*
597 * Determine if the given process has a NBMAND share reservation on the
598 * given vnode. Returns 1 if the process has such a share reservation,
599 * returns 0 otherwise.
600 */
601 int
proc_has_nbmand_share_on_vp(vnode_t * vp,pid_t pid)602 proc_has_nbmand_share_on_vp(vnode_t *vp, pid_t pid)
603 {
604 struct shrlocklist *shrl;
605
606 /*
607 * Any NBMAND share reservation on the vp for this process?
608 */
609 mutex_enter(&vp->v_lock);
610 for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
611 if (shrl->shr->s_sysid == 0 &&
612 (shrl->shr->s_deny & F_MANDDNY) &&
613 (shrl->shr->s_pid == pid)) {
614 mutex_exit(&vp->v_lock);
615 return (1);
616 }
617 }
618 mutex_exit(&vp->v_lock);
619
620 return (0);
621 }
622