1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 #include <sys/types.h>
27 #include <sys/t_lock.h>
28 #include <sys/param.h>
29 #include <sys/time.h>
30 #include <sys/systm.h>
31 #include <sys/sysmacros.h>
32 #include <sys/resource.h>
33 #include <sys/signal.h>
34 #include <sys/cred.h>
35 #include <sys/user.h>
36 #include <sys/buf.h>
37 #include <sys/vfs.h>
38 #include <sys/vfs_opreg.h>
39 #include <sys/stat.h>
40 #include <sys/vnode.h>
41 #include <sys/mode.h>
42 #include <sys/proc.h>
43 #include <sys/disp.h>
44 #include <sys/file.h>
45 #include <sys/fcntl.h>
46 #include <sys/flock.h>
47 #include <sys/kmem.h>
48 #include <sys/uio.h>
49 #include <sys/dnlc.h>
50 #include <sys/conf.h>
51 #include <sys/errno.h>
52 #include <sys/mman.h>
53 #include <sys/fbuf.h>
54 #include <sys/pathname.h>
55 #include <sys/debug.h>
56 #include <sys/vmsystm.h>
57 #include <sys/cmn_err.h>
58 #include <sys/dirent.h>
59 #include <sys/errno.h>
60 #include <sys/modctl.h>
61 #include <sys/statvfs.h>
62 #include <sys/mount.h>
63 #include <sys/sunddi.h>
64 #include <sys/bootconf.h>
65 #include <sys/policy.h>
66
67 #include <vm/hat.h>
68 #include <vm/page.h>
69 #include <vm/pvn.h>
70 #include <vm/as.h>
71 #include <vm/seg.h>
72 #include <vm/seg_map.h>
73 #include <vm/seg_kmem.h>
74 #include <vm/seg_vn.h>
75 #include <vm/rm.h>
76 #include <vm/page.h>
77 #include <sys/swap.h>
78
79 #include <fs/fs_subr.h>
80
81 #include <sys/fs/udf_volume.h>
82 #include <sys/fs/udf_inode.h>
83
84 static int32_t udf_open(struct vnode **,
85 int32_t, struct cred *, caller_context_t *);
86 static int32_t udf_close(struct vnode *,
87 int32_t, int32_t, offset_t, struct cred *, caller_context_t *);
88 static int32_t udf_read(struct vnode *,
89 struct uio *, int32_t, struct cred *, caller_context_t *);
90 static int32_t udf_write(struct vnode *,
91 struct uio *, int32_t, struct cred *, caller_context_t *);
92 static int32_t udf_ioctl(struct vnode *,
93 int32_t, intptr_t, int32_t, struct cred *, int32_t *,
94 caller_context_t *);
95 static int32_t udf_getattr(struct vnode *,
96 struct vattr *, int32_t, struct cred *, caller_context_t *);
97 static int32_t udf_setattr(struct vnode *,
98 struct vattr *, int32_t, struct cred *, caller_context_t *);
99 static int32_t udf_access(struct vnode *,
100 int32_t, int32_t, struct cred *, caller_context_t *);
101 static int32_t udf_lookup(struct vnode *,
102 char *, struct vnode **, struct pathname *,
103 int32_t, struct vnode *, struct cred *,
104 caller_context_t *, int *, pathname_t *);
105 static int32_t udf_create(struct vnode *,
106 char *, struct vattr *, enum vcexcl,
107 int32_t, struct vnode **, struct cred *, int32_t,
108 caller_context_t *, vsecattr_t *);
109 static int32_t udf_remove(struct vnode *,
110 char *, struct cred *, caller_context_t *, int);
111 static int32_t udf_link(struct vnode *,
112 struct vnode *, char *, struct cred *, caller_context_t *, int);
113 static int32_t udf_rename(struct vnode *,
114 char *, struct vnode *, char *, struct cred *, caller_context_t *, int);
115 static int32_t udf_mkdir(struct vnode *,
116 char *, struct vattr *, struct vnode **, struct cred *,
117 caller_context_t *, int, vsecattr_t *);
118 static int32_t udf_rmdir(struct vnode *,
119 char *, struct vnode *, struct cred *, caller_context_t *, int);
120 static int32_t udf_readdir(struct vnode *,
121 struct uio *, struct cred *, int32_t *, caller_context_t *, int);
122 static int32_t udf_symlink(struct vnode *,
123 char *, struct vattr *, char *, struct cred *, caller_context_t *, int);
124 static int32_t udf_readlink(struct vnode *,
125 struct uio *, struct cred *, caller_context_t *);
126 static int32_t udf_fsync(struct vnode *,
127 int32_t, struct cred *, caller_context_t *);
128 static void udf_inactive(struct vnode *,
129 struct cred *, caller_context_t *);
130 static int32_t udf_fid(struct vnode *, struct fid *, caller_context_t *);
131 static int udf_rwlock(struct vnode *, int32_t, caller_context_t *);
132 static void udf_rwunlock(struct vnode *, int32_t, caller_context_t *);
133 static int32_t udf_seek(struct vnode *, offset_t, offset_t *,
134 caller_context_t *);
135 static int32_t udf_frlock(struct vnode *, int32_t,
136 struct flock64 *, int32_t, offset_t, struct flk_callback *, cred_t *,
137 caller_context_t *);
138 static int32_t udf_space(struct vnode *, int32_t,
139 struct flock64 *, int32_t, offset_t, cred_t *, caller_context_t *);
140 static int32_t udf_getpage(struct vnode *, offset_t,
141 size_t, uint32_t *, struct page **, size_t,
142 struct seg *, caddr_t, enum seg_rw, struct cred *, caller_context_t *);
143 static int32_t udf_putpage(struct vnode *, offset_t,
144 size_t, int32_t, struct cred *, caller_context_t *);
145 static int32_t udf_map(struct vnode *, offset_t, struct as *,
146 caddr_t *, size_t, uint8_t, uint8_t, uint32_t, struct cred *,
147 caller_context_t *);
148 static int32_t udf_addmap(struct vnode *, offset_t, struct as *,
149 caddr_t, size_t, uint8_t, uint8_t, uint32_t, struct cred *,
150 caller_context_t *);
151 static int32_t udf_delmap(struct vnode *, offset_t, struct as *,
152 caddr_t, size_t, uint32_t, uint32_t, uint32_t, struct cred *,
153 caller_context_t *);
154 static int32_t udf_l_pathconf(struct vnode *, int32_t,
155 ulong_t *, struct cred *, caller_context_t *);
156 static int32_t udf_pageio(struct vnode *, struct page *,
157 u_offset_t, size_t, int32_t, struct cred *, caller_context_t *);
158
159 int32_t ud_getpage_miss(struct vnode *, u_offset_t,
160 size_t, struct seg *, caddr_t, page_t *pl[],
161 size_t, enum seg_rw, int32_t);
162 void ud_getpage_ra(struct vnode *, u_offset_t, struct seg *, caddr_t);
163 int32_t ud_putpages(struct vnode *, offset_t, size_t, int32_t, struct cred *);
164 int32_t ud_page_fill(struct ud_inode *, page_t *,
165 u_offset_t, uint32_t, u_offset_t *);
166 int32_t ud_iodone(struct buf *);
167 int32_t ud_rdip(struct ud_inode *, struct uio *, int32_t, cred_t *);
168 int32_t ud_wrip(struct ud_inode *, struct uio *, int32_t, cred_t *);
169 int32_t ud_multi_strat(struct ud_inode *, page_t *, struct buf *, u_offset_t);
170 int32_t ud_slave_done(struct buf *);
171
172 /*
173 * Structures to control multiple IO operations to get or put pages
174 * that are backed by discontiguous blocks. The master struct is
175 * a dummy that holds the original bp from pageio_setup. The
176 * slave struct holds the working bp's to do the actual IO. Once
177 * all the slave IOs complete. The master is processed as if a single
178 * IO op has completed.
179 */
180 uint32_t master_index = 0;
181 typedef struct mio_master {
182 kmutex_t mm_mutex; /* protect the fields below */
183 int32_t mm_size;
184 buf_t *mm_bp; /* original bp */
185 int32_t mm_resid; /* bytes remaining to transfer */
186 int32_t mm_error; /* accumulated error from slaves */
187 int32_t mm_index; /* XXX debugging */
188 } mio_master_t;
189
190 typedef struct mio_slave {
191 buf_t ms_buf; /* working buffer for this IO chunk */
192 mio_master_t *ms_ptr; /* pointer to master */
193 } mio_slave_t;
194
195 struct vnodeops *udf_vnodeops;
196
197 const fs_operation_def_t udf_vnodeops_template[] = {
198 VOPNAME_OPEN, { .vop_open = udf_open },
199 VOPNAME_CLOSE, { .vop_close = udf_close },
200 VOPNAME_READ, { .vop_read = udf_read },
201 VOPNAME_WRITE, { .vop_write = udf_write },
202 VOPNAME_IOCTL, { .vop_ioctl = udf_ioctl },
203 VOPNAME_GETATTR, { .vop_getattr = udf_getattr },
204 VOPNAME_SETATTR, { .vop_setattr = udf_setattr },
205 VOPNAME_ACCESS, { .vop_access = udf_access },
206 VOPNAME_LOOKUP, { .vop_lookup = udf_lookup },
207 VOPNAME_CREATE, { .vop_create = udf_create },
208 VOPNAME_REMOVE, { .vop_remove = udf_remove },
209 VOPNAME_LINK, { .vop_link = udf_link },
210 VOPNAME_RENAME, { .vop_rename = udf_rename },
211 VOPNAME_MKDIR, { .vop_mkdir = udf_mkdir },
212 VOPNAME_RMDIR, { .vop_rmdir = udf_rmdir },
213 VOPNAME_READDIR, { .vop_readdir = udf_readdir },
214 VOPNAME_SYMLINK, { .vop_symlink = udf_symlink },
215 VOPNAME_READLINK, { .vop_readlink = udf_readlink },
216 VOPNAME_FSYNC, { .vop_fsync = udf_fsync },
217 VOPNAME_INACTIVE, { .vop_inactive = udf_inactive },
218 VOPNAME_FID, { .vop_fid = udf_fid },
219 VOPNAME_RWLOCK, { .vop_rwlock = udf_rwlock },
220 VOPNAME_RWUNLOCK, { .vop_rwunlock = udf_rwunlock },
221 VOPNAME_SEEK, { .vop_seek = udf_seek },
222 VOPNAME_FRLOCK, { .vop_frlock = udf_frlock },
223 VOPNAME_SPACE, { .vop_space = udf_space },
224 VOPNAME_GETPAGE, { .vop_getpage = udf_getpage },
225 VOPNAME_PUTPAGE, { .vop_putpage = udf_putpage },
226 VOPNAME_MAP, { .vop_map = udf_map },
227 VOPNAME_ADDMAP, { .vop_addmap = udf_addmap },
228 VOPNAME_DELMAP, { .vop_delmap = udf_delmap },
229 VOPNAME_PATHCONF, { .vop_pathconf = udf_l_pathconf },
230 VOPNAME_PAGEIO, { .vop_pageio = udf_pageio },
231 VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support },
232 NULL, NULL
233 };
234
235 /* ARGSUSED */
236 static int32_t
udf_open(struct vnode ** vpp,int32_t flag,struct cred * cr,caller_context_t * ct)237 udf_open(
238 struct vnode **vpp,
239 int32_t flag,
240 struct cred *cr,
241 caller_context_t *ct)
242 {
243 ud_printf("udf_open\n");
244
245 return (0);
246 }
247
248 /* ARGSUSED */
249 static int32_t
udf_close(struct vnode * vp,int32_t flag,int32_t count,offset_t offset,struct cred * cr,caller_context_t * ct)250 udf_close(
251 struct vnode *vp,
252 int32_t flag,
253 int32_t count,
254 offset_t offset,
255 struct cred *cr,
256 caller_context_t *ct)
257 {
258 struct ud_inode *ip = VTOI(vp);
259
260 ud_printf("udf_close\n");
261
262 ITIMES(ip);
263
264 cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
265 cleanshares(vp, ttoproc(curthread)->p_pid);
266
267 /*
268 * Push partially filled cluster at last close.
269 * ``last close'' is approximated because the dnlc
270 * may have a hold on the vnode.
271 */
272 if (vp->v_count <= 2 && vp->v_type != VBAD) {
273 struct ud_inode *ip = VTOI(vp);
274 if (ip->i_delaylen) {
275 (void) ud_putpages(vp, ip->i_delayoff, ip->i_delaylen,
276 B_ASYNC | B_FREE, cr);
277 ip->i_delaylen = 0;
278 }
279 }
280
281 return (0);
282 }
283
284 /* ARGSUSED */
285 static int32_t
udf_read(struct vnode * vp,struct uio * uiop,int32_t ioflag,struct cred * cr,caller_context_t * ct)286 udf_read(
287 struct vnode *vp,
288 struct uio *uiop,
289 int32_t ioflag,
290 struct cred *cr,
291 caller_context_t *ct)
292 {
293 struct ud_inode *ip = VTOI(vp);
294 int32_t error;
295
296 ud_printf("udf_read\n");
297
298 #ifdef __lock_lint
299 rw_enter(&ip->i_rwlock, RW_READER);
300 #endif
301
302 ASSERT(RW_READ_HELD(&ip->i_rwlock));
303
304 if (MANDLOCK(vp, ip->i_char)) {
305 /*
306 * udf_getattr ends up being called by chklock
307 */
308 error = chklock(vp, FREAD, uiop->uio_loffset,
309 uiop->uio_resid, uiop->uio_fmode, ct);
310 if (error) {
311 goto end;
312 }
313 }
314
315 rw_enter(&ip->i_contents, RW_READER);
316 error = ud_rdip(ip, uiop, ioflag, cr);
317 rw_exit(&ip->i_contents);
318
319 end:
320 #ifdef __lock_lint
321 rw_exit(&ip->i_rwlock);
322 #endif
323
324 return (error);
325 }
326
327
328 int32_t ud_WRITES = 1;
329 int32_t ud_HW = 96 * 1024;
330 int32_t ud_LW = 64 * 1024;
331 int32_t ud_throttles = 0;
332
333 /* ARGSUSED */
334 static int32_t
udf_write(struct vnode * vp,struct uio * uiop,int32_t ioflag,struct cred * cr,caller_context_t * ct)335 udf_write(
336 struct vnode *vp,
337 struct uio *uiop,
338 int32_t ioflag,
339 struct cred *cr,
340 caller_context_t *ct)
341 {
342 struct ud_inode *ip = VTOI(vp);
343 int32_t error = 0;
344
345 ud_printf("udf_write\n");
346
347 #ifdef __lock_lint
348 rw_enter(&ip->i_rwlock, RW_WRITER);
349 #endif
350
351 ASSERT(RW_WRITE_HELD(&ip->i_rwlock));
352
353 if (MANDLOCK(vp, ip->i_char)) {
354 /*
355 * ud_getattr ends up being called by chklock
356 */
357 error = chklock(vp, FWRITE, uiop->uio_loffset,
358 uiop->uio_resid, uiop->uio_fmode, ct);
359 if (error) {
360 goto end;
361 }
362 }
363 /*
364 * Throttle writes.
365 */
366 mutex_enter(&ip->i_tlock);
367 if (ud_WRITES && (ip->i_writes > ud_HW)) {
368 while (ip->i_writes > ud_HW) {
369 ud_throttles++;
370 cv_wait(&ip->i_wrcv, &ip->i_tlock);
371 }
372 }
373 mutex_exit(&ip->i_tlock);
374
375 /*
376 * Write to the file
377 */
378 rw_enter(&ip->i_contents, RW_WRITER);
379 if ((ioflag & FAPPEND) != 0 && (ip->i_type == VREG)) {
380 /*
381 * In append mode start at end of file.
382 */
383 uiop->uio_loffset = ip->i_size;
384 }
385 error = ud_wrip(ip, uiop, ioflag, cr);
386 rw_exit(&ip->i_contents);
387
388 end:
389 #ifdef __lock_lint
390 rw_exit(&ip->i_rwlock);
391 #endif
392
393 return (error);
394 }
395
396 /* ARGSUSED */
397 static int32_t
udf_ioctl(struct vnode * vp,int32_t cmd,intptr_t arg,int32_t flag,struct cred * cr,int32_t * rvalp,caller_context_t * ct)398 udf_ioctl(
399 struct vnode *vp,
400 int32_t cmd,
401 intptr_t arg,
402 int32_t flag,
403 struct cred *cr,
404 int32_t *rvalp,
405 caller_context_t *ct)
406 {
407 return (ENOTTY);
408 }
409
410 /* ARGSUSED */
411 static int32_t
udf_getattr(struct vnode * vp,struct vattr * vap,int32_t flags,struct cred * cr,caller_context_t * ct)412 udf_getattr(
413 struct vnode *vp,
414 struct vattr *vap,
415 int32_t flags,
416 struct cred *cr,
417 caller_context_t *ct)
418 {
419 struct ud_inode *ip = VTOI(vp);
420
421 ud_printf("udf_getattr\n");
422
423 if (vap->va_mask == AT_SIZE) {
424 /*
425 * for performance, if only the size is requested don't bother
426 * with anything else.
427 */
428 vap->va_size = ip->i_size;
429 return (0);
430 }
431
432 rw_enter(&ip->i_contents, RW_READER);
433
434 vap->va_type = vp->v_type;
435 vap->va_mode = UD2VA_PERM(ip->i_perm) | ip->i_char;
436
437 vap->va_uid = ip->i_uid;
438 vap->va_gid = ip->i_gid;
439 vap->va_fsid = ip->i_dev;
440 vap->va_nodeid = ip->i_icb_lbano;
441 vap->va_nlink = ip->i_nlink;
442 vap->va_size = ip->i_size;
443 vap->va_seq = ip->i_seq;
444 if (vp->v_type == VCHR || vp->v_type == VBLK) {
445 vap->va_rdev = ip->i_rdev;
446 } else {
447 vap->va_rdev = 0;
448 }
449
450 mutex_enter(&ip->i_tlock);
451 ITIMES_NOLOCK(ip); /* mark correct time in inode */
452 vap->va_atime.tv_sec = (time_t)ip->i_atime.tv_sec;
453 vap->va_atime.tv_nsec = ip->i_atime.tv_nsec;
454 vap->va_mtime.tv_sec = (time_t)ip->i_mtime.tv_sec;
455 vap->va_mtime.tv_nsec = ip->i_mtime.tv_nsec;
456 vap->va_ctime.tv_sec = (time_t)ip->i_ctime.tv_sec;
457 vap->va_ctime.tv_nsec = ip->i_ctime.tv_nsec;
458 mutex_exit(&ip->i_tlock);
459
460 switch (ip->i_type) {
461 case VBLK:
462 vap->va_blksize = MAXBSIZE;
463 break;
464 case VCHR:
465 vap->va_blksize = MAXBSIZE;
466 break;
467 default:
468 vap->va_blksize = ip->i_udf->udf_lbsize;
469 break;
470 }
471 vap->va_nblocks = ip->i_lbr << ip->i_udf->udf_l2d_shift;
472
473 rw_exit(&ip->i_contents);
474
475 return (0);
476 }
477
478 static int
ud_iaccess_vmode(void * ip,int mode,struct cred * cr)479 ud_iaccess_vmode(void *ip, int mode, struct cred *cr)
480 {
481 return (ud_iaccess(ip, UD_UPERM2DPERM(mode), cr, 0));
482 }
483
484 /*ARGSUSED4*/
485 static int32_t
udf_setattr(struct vnode * vp,struct vattr * vap,int32_t flags,struct cred * cr,caller_context_t * ct)486 udf_setattr(
487 struct vnode *vp,
488 struct vattr *vap,
489 int32_t flags,
490 struct cred *cr,
491 caller_context_t *ct)
492 {
493 int32_t error = 0;
494 uint32_t mask = vap->va_mask;
495 struct ud_inode *ip;
496 timestruc_t now;
497 struct vattr ovap;
498
499 ud_printf("udf_setattr\n");
500
501 ip = VTOI(vp);
502
503 /*
504 * not updates allowed to 4096 files
505 */
506 if (ip->i_astrat == STRAT_TYPE4096) {
507 return (EINVAL);
508 }
509
510 /*
511 * Cannot set these attributes
512 */
513 if (mask & AT_NOSET) {
514 return (EINVAL);
515 }
516
517 rw_enter(&ip->i_rwlock, RW_WRITER);
518 rw_enter(&ip->i_contents, RW_WRITER);
519
520 ovap.va_uid = ip->i_uid;
521 ovap.va_mode = UD2VA_PERM(ip->i_perm) | ip->i_char;
522 error = secpolicy_vnode_setattr(cr, vp, vap, &ovap, flags,
523 ud_iaccess_vmode, ip);
524 if (error)
525 goto update_inode;
526
527 mask = vap->va_mask;
528 /*
529 * Change file access modes.
530 */
531 if (mask & AT_MODE) {
532 ip->i_perm = VA2UD_PERM(vap->va_mode);
533 ip->i_char = vap->va_mode & (VSUID | VSGID | VSVTX);
534 mutex_enter(&ip->i_tlock);
535 ip->i_flag |= ICHG;
536 mutex_exit(&ip->i_tlock);
537 }
538 if (mask & (AT_UID|AT_GID)) {
539 if (mask & AT_UID) {
540 ip->i_uid = vap->va_uid;
541 }
542 if (mask & AT_GID) {
543 ip->i_gid = vap->va_gid;
544 }
545 mutex_enter(&ip->i_tlock);
546 ip->i_flag |= ICHG;
547 mutex_exit(&ip->i_tlock);
548 }
549 /*
550 * Truncate file. Must have write permission and not be a directory.
551 */
552 if (mask & AT_SIZE) {
553 if (vp->v_type == VDIR) {
554 error = EISDIR;
555 goto update_inode;
556 }
557 if (error = ud_iaccess(ip, IWRITE, cr, 0)) {
558 goto update_inode;
559 }
560 if (vap->va_size > MAXOFFSET_T) {
561 error = EFBIG;
562 goto update_inode;
563 }
564 if (error = ud_itrunc(ip, vap->va_size, 0, cr)) {
565 goto update_inode;
566 }
567 }
568 /*
569 * Change file access or modified times.
570 */
571 if (mask & (AT_ATIME|AT_MTIME)) {
572 mutex_enter(&ip->i_tlock);
573 if (mask & AT_ATIME) {
574 ip->i_atime.tv_sec = vap->va_atime.tv_sec;
575 ip->i_atime.tv_nsec = vap->va_atime.tv_nsec;
576 ip->i_flag &= ~IACC;
577 }
578 if (mask & AT_MTIME) {
579 ip->i_mtime.tv_sec = vap->va_mtime.tv_sec;
580 ip->i_mtime.tv_nsec = vap->va_mtime.tv_nsec;
581 gethrestime(&now);
582 ip->i_ctime.tv_sec = now.tv_sec;
583 ip->i_ctime.tv_nsec = now.tv_nsec;
584 ip->i_flag &= ~(IUPD|ICHG);
585 ip->i_flag |= IMODTIME;
586 }
587 ip->i_flag |= IMOD;
588 mutex_exit(&ip->i_tlock);
589 }
590
591 update_inode:
592 if (curthread->t_flag & T_DONTPEND) {
593 ud_iupdat(ip, 1);
594 } else {
595 ITIMES_NOLOCK(ip);
596 }
597 rw_exit(&ip->i_contents);
598 rw_exit(&ip->i_rwlock);
599
600 return (error);
601 }
602
603 /* ARGSUSED */
604 static int32_t
udf_access(struct vnode * vp,int32_t mode,int32_t flags,struct cred * cr,caller_context_t * ct)605 udf_access(
606 struct vnode *vp,
607 int32_t mode,
608 int32_t flags,
609 struct cred *cr,
610 caller_context_t *ct)
611 {
612 struct ud_inode *ip = VTOI(vp);
613
614 ud_printf("udf_access\n");
615
616 if (ip->i_udf == NULL) {
617 return (EIO);
618 }
619
620 return (ud_iaccess(ip, UD_UPERM2DPERM(mode), cr, 1));
621 }
622
623 int32_t udfs_stickyhack = 1;
624
625 /* ARGSUSED */
626 static int32_t
udf_lookup(struct vnode * dvp,char * nm,struct vnode ** vpp,struct pathname * pnp,int32_t flags,struct vnode * rdir,struct cred * cr,caller_context_t * ct,int * direntflags,pathname_t * realpnp)627 udf_lookup(
628 struct vnode *dvp,
629 char *nm,
630 struct vnode **vpp,
631 struct pathname *pnp,
632 int32_t flags,
633 struct vnode *rdir,
634 struct cred *cr,
635 caller_context_t *ct,
636 int *direntflags,
637 pathname_t *realpnp)
638 {
639 int32_t error;
640 struct vnode *vp;
641 struct ud_inode *ip, *xip;
642
643 ud_printf("udf_lookup\n");
644 /*
645 * Null component name is a synonym for directory being searched.
646 */
647 if (*nm == '\0') {
648 VN_HOLD(dvp);
649 *vpp = dvp;
650 error = 0;
651 goto out;
652 }
653
654 /*
655 * Fast path: Check the directory name lookup cache.
656 */
657 ip = VTOI(dvp);
658 if (vp = dnlc_lookup(dvp, nm)) {
659 /*
660 * Check accessibility of directory.
661 */
662 if ((error = ud_iaccess(ip, IEXEC, cr, 1)) != 0) {
663 VN_RELE(vp);
664 }
665 xip = VTOI(vp);
666 } else {
667 error = ud_dirlook(ip, nm, &xip, cr, 1);
668 ITIMES(ip);
669 }
670
671 if (error == 0) {
672 ip = xip;
673 *vpp = ITOV(ip);
674 if ((ip->i_type != VDIR) &&
675 (ip->i_char & ISVTX) &&
676 ((ip->i_perm & IEXEC) == 0) &&
677 udfs_stickyhack) {
678 mutex_enter(&(*vpp)->v_lock);
679 (*vpp)->v_flag |= VISSWAP;
680 mutex_exit(&(*vpp)->v_lock);
681 }
682 ITIMES(ip);
683 /*
684 * If vnode is a device return special vnode instead.
685 */
686 if (IS_DEVVP(*vpp)) {
687 struct vnode *newvp;
688 newvp = specvp(*vpp, (*vpp)->v_rdev,
689 (*vpp)->v_type, cr);
690 VN_RELE(*vpp);
691 if (newvp == NULL) {
692 error = ENOSYS;
693 } else {
694 *vpp = newvp;
695 }
696 }
697 }
698 out:
699 return (error);
700 }
701
702 /* ARGSUSED */
703 static int32_t
udf_create(struct vnode * dvp,char * name,struct vattr * vap,enum vcexcl excl,int32_t mode,struct vnode ** vpp,struct cred * cr,int32_t flag,caller_context_t * ct,vsecattr_t * vsecp)704 udf_create(
705 struct vnode *dvp,
706 char *name,
707 struct vattr *vap,
708 enum vcexcl excl,
709 int32_t mode,
710 struct vnode **vpp,
711 struct cred *cr,
712 int32_t flag,
713 caller_context_t *ct,
714 vsecattr_t *vsecp)
715 {
716 int32_t error;
717 struct ud_inode *ip = VTOI(dvp), *xip;
718
719 ud_printf("udf_create\n");
720
721 if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr) != 0)
722 vap->va_mode &= ~VSVTX;
723
724 if (*name == '\0') {
725 /*
726 * Null component name refers to the directory itself.
727 */
728 VN_HOLD(dvp);
729 ITIMES(ip);
730 error = EEXIST;
731 } else {
732 xip = NULL;
733 rw_enter(&ip->i_rwlock, RW_WRITER);
734 error = ud_direnter(ip, name, DE_CREATE,
735 (struct ud_inode *)0, (struct ud_inode *)0,
736 vap, &xip, cr, ct);
737 rw_exit(&ip->i_rwlock);
738 ITIMES(ip);
739 ip = xip;
740 }
741 #ifdef __lock_lint
742 rw_enter(&ip->i_contents, RW_WRITER);
743 #else
744 if (ip != NULL) {
745 rw_enter(&ip->i_contents, RW_WRITER);
746 }
747 #endif
748
749 /*
750 * If the file already exists and this is a non-exclusive create,
751 * check permissions and allow access for non-directories.
752 * Read-only create of an existing directory is also allowed.
753 * We fail an exclusive create of anything which already exists.
754 */
755 if (error == EEXIST) {
756 if (excl == NONEXCL) {
757 if ((ip->i_type == VDIR) && (mode & VWRITE)) {
758 error = EISDIR;
759 } else if (mode) {
760 error = ud_iaccess(ip,
761 UD_UPERM2DPERM(mode), cr, 0);
762 } else {
763 error = 0;
764 }
765 }
766 if (error) {
767 rw_exit(&ip->i_contents);
768 VN_RELE(ITOV(ip));
769 goto out;
770 } else if ((ip->i_type == VREG) &&
771 (vap->va_mask & AT_SIZE) && vap->va_size == 0) {
772 /*
773 * Truncate regular files, if requested by caller.
774 * Grab i_rwlock to make sure no one else is
775 * currently writing to the file (we promised
776 * bmap we would do this).
777 * Must get the locks in the correct order.
778 */
779 if (ip->i_size == 0) {
780 ip->i_flag |= ICHG | IUPD;
781 } else {
782 rw_exit(&ip->i_contents);
783 rw_enter(&ip->i_rwlock, RW_WRITER);
784 rw_enter(&ip->i_contents, RW_WRITER);
785 (void) ud_itrunc(ip, 0, 0, cr);
786 rw_exit(&ip->i_rwlock);
787 }
788 vnevent_create(ITOV(ip), ct);
789 }
790 }
791
792 if (error == 0) {
793 *vpp = ITOV(ip);
794 ITIMES(ip);
795 }
796 #ifdef __lock_lint
797 rw_exit(&ip->i_contents);
798 #else
799 if (ip != NULL) {
800 rw_exit(&ip->i_contents);
801 }
802 #endif
803 if (error) {
804 goto out;
805 }
806
807 /*
808 * If vnode is a device return special vnode instead.
809 */
810 if (!error && IS_DEVVP(*vpp)) {
811 struct vnode *newvp;
812
813 newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
814 VN_RELE(*vpp);
815 if (newvp == NULL) {
816 error = ENOSYS;
817 goto out;
818 }
819 *vpp = newvp;
820 }
821 out:
822 return (error);
823 }
824
825 /* ARGSUSED */
826 static int32_t
udf_remove(struct vnode * vp,char * nm,struct cred * cr,caller_context_t * ct,int flags)827 udf_remove(
828 struct vnode *vp,
829 char *nm,
830 struct cred *cr,
831 caller_context_t *ct,
832 int flags)
833 {
834 int32_t error;
835 struct ud_inode *ip = VTOI(vp);
836
837 ud_printf("udf_remove\n");
838
839 rw_enter(&ip->i_rwlock, RW_WRITER);
840 error = ud_dirremove(ip, nm,
841 (struct ud_inode *)0, (struct vnode *)0, DR_REMOVE, cr, ct);
842 rw_exit(&ip->i_rwlock);
843 ITIMES(ip);
844
845 return (error);
846 }
847
848 /* ARGSUSED */
849 static int32_t
udf_link(struct vnode * tdvp,struct vnode * svp,char * tnm,struct cred * cr,caller_context_t * ct,int flags)850 udf_link(
851 struct vnode *tdvp,
852 struct vnode *svp,
853 char *tnm,
854 struct cred *cr,
855 caller_context_t *ct,
856 int flags)
857 {
858 int32_t error;
859 struct vnode *realvp;
860 struct ud_inode *sip;
861 struct ud_inode *tdp;
862
863 ud_printf("udf_link\n");
864 if (VOP_REALVP(svp, &realvp, ct) == 0) {
865 svp = realvp;
866 }
867
868 /*
869 * Do not allow links to directories
870 */
871 if (svp->v_type == VDIR) {
872 return (EPERM);
873 }
874
875 sip = VTOI(svp);
876
877 if (sip->i_uid != crgetuid(cr) && secpolicy_basic_link(cr) != 0)
878 return (EPERM);
879
880 tdp = VTOI(tdvp);
881
882 rw_enter(&tdp->i_rwlock, RW_WRITER);
883 error = ud_direnter(tdp, tnm, DE_LINK, (struct ud_inode *)0,
884 sip, (struct vattr *)0, (struct ud_inode **)0, cr, ct);
885 rw_exit(&tdp->i_rwlock);
886 ITIMES(sip);
887 ITIMES(tdp);
888
889 if (error == 0) {
890 vnevent_link(svp, ct);
891 }
892
893 return (error);
894 }
895
896 /* ARGSUSED */
897 static int32_t
udf_rename(struct vnode * sdvp,char * snm,struct vnode * tdvp,char * tnm,struct cred * cr,caller_context_t * ct,int flags)898 udf_rename(
899 struct vnode *sdvp,
900 char *snm,
901 struct vnode *tdvp,
902 char *tnm,
903 struct cred *cr,
904 caller_context_t *ct,
905 int flags)
906 {
907 int32_t error = 0;
908 struct udf_vfs *udf_vfsp;
909 struct ud_inode *sip; /* source inode */
910 struct ud_inode *sdp, *tdp; /* source and target parent inode */
911 struct vnode *realvp;
912
913 ud_printf("udf_rename\n");
914
915 if (VOP_REALVP(tdvp, &realvp, ct) == 0) {
916 tdvp = realvp;
917 }
918
919 sdp = VTOI(sdvp);
920 tdp = VTOI(tdvp);
921
922 udf_vfsp = sdp->i_udf;
923
924 mutex_enter(&udf_vfsp->udf_rename_lck);
925 /*
926 * Look up inode of file we're supposed to rename.
927 */
928 if (error = ud_dirlook(sdp, snm, &sip, cr, 0)) {
929 mutex_exit(&udf_vfsp->udf_rename_lck);
930 return (error);
931 }
932 /*
933 * be sure this is not a directory with another file system mounted
934 * over it. If it is just give up the locks, and return with
935 * EBUSY
936 */
937 if (vn_mountedvfs(ITOV(sip)) != NULL) {
938 error = EBUSY;
939 goto errout;
940 }
941 /*
942 * Make sure we can delete the source entry. This requires
943 * write permission on the containing directory. If that
944 * directory is "sticky" it further requires (except for
945 * privileged users) that the user own the directory or the
946 * source entry, or else have permission to write the source
947 * entry.
948 */
949 rw_enter(&sdp->i_contents, RW_READER);
950 rw_enter(&sip->i_contents, RW_READER);
951 if ((error = ud_iaccess(sdp, IWRITE, cr, 0)) != 0 ||
952 (error = ud_sticky_remove_access(sdp, sip, cr)) != 0) {
953 rw_exit(&sip->i_contents);
954 rw_exit(&sdp->i_contents);
955 ITIMES(sip);
956 goto errout;
957 }
958
959 /*
960 * Check for renaming '.' or '..' or alias of '.'
961 */
962 if ((strcmp(snm, ".") == 0) ||
963 (strcmp(snm, "..") == 0) ||
964 (sdp == sip)) {
965 error = EINVAL;
966 rw_exit(&sip->i_contents);
967 rw_exit(&sdp->i_contents);
968 goto errout;
969 }
970 rw_exit(&sip->i_contents);
971 rw_exit(&sdp->i_contents);
972
973
974 /*
975 * Link source to the target.
976 */
977 rw_enter(&tdp->i_rwlock, RW_WRITER);
978 if (error = ud_direnter(tdp, tnm, DE_RENAME, sdp, sip,
979 (struct vattr *)0, (struct ud_inode **)0, cr, ct)) {
980 /*
981 * ESAME isn't really an error; it indicates that the
982 * operation should not be done because the source and target
983 * are the same file, but that no error should be reported.
984 */
985 if (error == ESAME) {
986 error = 0;
987 }
988 rw_exit(&tdp->i_rwlock);
989 goto errout;
990 }
991 vnevent_rename_src(ITOV(sip), sdvp, snm, ct);
992 rw_exit(&tdp->i_rwlock);
993
994 rw_enter(&sdp->i_rwlock, RW_WRITER);
995 /*
996 * Unlink the source.
997 * Remove the source entry. ud_dirremove() checks that the entry
998 * still reflects sip, and returns an error if it doesn't.
999 * If the entry has changed just forget about it. Release
1000 * the source inode.
1001 */
1002 if ((error = ud_dirremove(sdp, snm, sip, (struct vnode *)0,
1003 DR_RENAME, cr, ct)) == ENOENT) {
1004 error = 0;
1005 }
1006 rw_exit(&sdp->i_rwlock);
1007 errout:
1008 ITIMES(sdp);
1009 ITIMES(tdp);
1010 VN_RELE(ITOV(sip));
1011 mutex_exit(&udf_vfsp->udf_rename_lck);
1012
1013 return (error);
1014 }
1015
1016 /* ARGSUSED */
1017 static int32_t
udf_mkdir(struct vnode * dvp,char * dirname,struct vattr * vap,struct vnode ** vpp,struct cred * cr,caller_context_t * ct,int flags,vsecattr_t * vsecp)1018 udf_mkdir(
1019 struct vnode *dvp,
1020 char *dirname,
1021 struct vattr *vap,
1022 struct vnode **vpp,
1023 struct cred *cr,
1024 caller_context_t *ct,
1025 int flags,
1026 vsecattr_t *vsecp)
1027 {
1028 int32_t error;
1029 struct ud_inode *ip;
1030 struct ud_inode *xip;
1031
1032 ASSERT((vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
1033
1034 ud_printf("udf_mkdir\n");
1035
1036 ip = VTOI(dvp);
1037 rw_enter(&ip->i_rwlock, RW_WRITER);
1038 error = ud_direnter(ip, dirname, DE_MKDIR,
1039 (struct ud_inode *)0, (struct ud_inode *)0, vap, &xip, cr, ct);
1040 rw_exit(&ip->i_rwlock);
1041 ITIMES(ip);
1042 if (error == 0) {
1043 ip = xip;
1044 *vpp = ITOV(ip);
1045 ITIMES(ip);
1046 } else if (error == EEXIST) {
1047 ITIMES(xip);
1048 VN_RELE(ITOV(xip));
1049 }
1050
1051 return (error);
1052 }
1053
1054 /* ARGSUSED */
1055 static int32_t
udf_rmdir(struct vnode * vp,char * nm,struct vnode * cdir,struct cred * cr,caller_context_t * ct,int flags)1056 udf_rmdir(
1057 struct vnode *vp,
1058 char *nm,
1059 struct vnode *cdir,
1060 struct cred *cr,
1061 caller_context_t *ct,
1062 int flags)
1063 {
1064 int32_t error;
1065 struct ud_inode *ip = VTOI(vp);
1066
1067 ud_printf("udf_rmdir\n");
1068
1069 rw_enter(&ip->i_rwlock, RW_WRITER);
1070 error = ud_dirremove(ip, nm, (struct ud_inode *)0, cdir, DR_RMDIR,
1071 cr, ct);
1072 rw_exit(&ip->i_rwlock);
1073 ITIMES(ip);
1074
1075 return (error);
1076 }
1077
1078 /* ARGSUSED */
1079 static int32_t
udf_readdir(struct vnode * vp,struct uio * uiop,struct cred * cr,int32_t * eofp,caller_context_t * ct,int flags)1080 udf_readdir(
1081 struct vnode *vp,
1082 struct uio *uiop,
1083 struct cred *cr,
1084 int32_t *eofp,
1085 caller_context_t *ct,
1086 int flags)
1087 {
1088 struct ud_inode *ip;
1089 struct dirent64 *nd;
1090 struct udf_vfs *udf_vfsp;
1091 int32_t error = 0, len, outcount = 0;
1092 uint32_t dirsiz, offset;
1093 uint32_t bufsize, ndlen, dummy;
1094 caddr_t outbuf;
1095 caddr_t outb, end_outb;
1096 struct iovec *iovp;
1097
1098 uint8_t *dname;
1099 int32_t length;
1100
1101 uint8_t *buf = NULL;
1102
1103 struct fbuf *fbp = NULL;
1104 struct file_id *fid;
1105 uint8_t *name;
1106
1107
1108 ud_printf("udf_readdir\n");
1109
1110 ip = VTOI(vp);
1111 udf_vfsp = ip->i_udf;
1112
1113 dirsiz = ip->i_size;
1114 if ((uiop->uio_offset >= dirsiz) ||
1115 (ip->i_nlink <= 0)) {
1116 if (eofp) {
1117 *eofp = 1;
1118 }
1119 return (0);
1120 }
1121
1122 offset = uiop->uio_offset;
1123 iovp = uiop->uio_iov;
1124 bufsize = iovp->iov_len;
1125
1126 outb = outbuf = (char *)kmem_alloc((uint32_t)bufsize, KM_SLEEP);
1127 end_outb = outb + bufsize;
1128 nd = (struct dirent64 *)outbuf;
1129
1130 dname = (uint8_t *)kmem_zalloc(1024, KM_SLEEP);
1131 buf = (uint8_t *)kmem_zalloc(udf_vfsp->udf_lbsize, KM_SLEEP);
1132
1133 if (offset == 0) {
1134 len = DIRENT64_RECLEN(1);
1135 if (((caddr_t)nd + len) >= end_outb) {
1136 error = EINVAL;
1137 goto end;
1138 }
1139 nd->d_ino = ip->i_icb_lbano;
1140 nd->d_reclen = (uint16_t)len;
1141 nd->d_off = 0x10;
1142 nd->d_name[0] = '.';
1143 bzero(&nd->d_name[1], DIRENT64_NAMELEN(len) - 1);
1144 nd = (struct dirent64 *)((char *)nd + nd->d_reclen);
1145 outcount++;
1146 } else if (offset == 0x10) {
1147 offset = 0;
1148 }
1149
1150 while (offset < dirsiz) {
1151 error = ud_get_next_fid(ip, &fbp,
1152 offset, &fid, &name, buf);
1153 if (error != 0) {
1154 break;
1155 }
1156
1157 if ((fid->fid_flags & FID_DELETED) == 0) {
1158 if (fid->fid_flags & FID_PARENT) {
1159
1160 len = DIRENT64_RECLEN(2);
1161 if (((caddr_t)nd + len) >= end_outb) {
1162 error = EINVAL;
1163 break;
1164 }
1165
1166 nd->d_ino = ip->i_icb_lbano;
1167 nd->d_reclen = (uint16_t)len;
1168 nd->d_off = offset + FID_LEN(fid);
1169 nd->d_name[0] = '.';
1170 nd->d_name[1] = '.';
1171 bzero(&nd->d_name[2],
1172 DIRENT64_NAMELEN(len) - 2);
1173 nd = (struct dirent64 *)
1174 ((char *)nd + nd->d_reclen);
1175 } else {
1176 if ((error = ud_uncompress(fid->fid_idlen,
1177 &length, name, dname)) != 0) {
1178 break;
1179 }
1180 if (length == 0) {
1181 offset += FID_LEN(fid);
1182 continue;
1183 }
1184 len = DIRENT64_RECLEN(length);
1185 if (((caddr_t)nd + len) >= end_outb) {
1186 if (!outcount) {
1187 error = EINVAL;
1188 }
1189 break;
1190 }
1191 (void) strncpy(nd->d_name,
1192 (caddr_t)dname, length);
1193 bzero(&nd->d_name[length],
1194 DIRENT64_NAMELEN(len) - length);
1195 nd->d_ino = ud_xlate_to_daddr(udf_vfsp,
1196 SWAP_16(fid->fid_icb.lad_ext_prn),
1197 SWAP_32(fid->fid_icb.lad_ext_loc), 1,
1198 &dummy);
1199 nd->d_reclen = (uint16_t)len;
1200 nd->d_off = offset + FID_LEN(fid);
1201 nd = (struct dirent64 *)
1202 ((char *)nd + nd->d_reclen);
1203 }
1204 outcount++;
1205 }
1206
1207 offset += FID_LEN(fid);
1208 }
1209
1210 end:
1211 if (fbp != NULL) {
1212 fbrelse(fbp, S_OTHER);
1213 }
1214 ndlen = ((char *)nd - outbuf);
1215 /*
1216 * In case of error do not call uiomove.
1217 * Return the error to the caller.
1218 */
1219 if ((error == 0) && (ndlen != 0)) {
1220 error = uiomove(outbuf, (long)ndlen, UIO_READ, uiop);
1221 uiop->uio_offset = offset;
1222 }
1223 kmem_free((caddr_t)buf, udf_vfsp->udf_lbsize);
1224 kmem_free((caddr_t)dname, 1024);
1225 kmem_free(outbuf, (uint32_t)bufsize);
1226 if (eofp && error == 0) {
1227 *eofp = (uiop->uio_offset >= dirsiz);
1228 }
1229 return (error);
1230 }
1231
1232 /* ARGSUSED */
1233 static int32_t
udf_symlink(struct vnode * dvp,char * linkname,struct vattr * vap,char * target,struct cred * cr,caller_context_t * ct,int flags)1234 udf_symlink(
1235 struct vnode *dvp,
1236 char *linkname,
1237 struct vattr *vap,
1238 char *target,
1239 struct cred *cr,
1240 caller_context_t *ct,
1241 int flags)
1242 {
1243 int32_t error = 0, outlen;
1244 uint32_t ioflag = 0;
1245 struct ud_inode *ip, *dip = VTOI(dvp);
1246
1247 struct path_comp *pc;
1248 int8_t *dname = NULL, *uname = NULL, *sp;
1249
1250 ud_printf("udf_symlink\n");
1251
1252 ip = (struct ud_inode *)0;
1253 vap->va_type = VLNK;
1254 vap->va_rdev = 0;
1255
1256 rw_enter(&dip->i_rwlock, RW_WRITER);
1257 error = ud_direnter(dip, linkname, DE_CREATE,
1258 (struct ud_inode *)0, (struct ud_inode *)0, vap, &ip, cr, ct);
1259 rw_exit(&dip->i_rwlock);
1260 if (error == 0) {
1261 dname = kmem_zalloc(1024, KM_SLEEP);
1262 uname = kmem_zalloc(PAGESIZE, KM_SLEEP);
1263
1264 pc = (struct path_comp *)uname;
1265 /*
1266 * If the first character in target is "/"
1267 * then skip it and create entry for it
1268 */
1269 if (*target == '/') {
1270 pc->pc_type = 2;
1271 pc->pc_len = 0;
1272 pc = (struct path_comp *)(((char *)pc) + 4);
1273 while (*target == '/') {
1274 target++;
1275 }
1276 }
1277
1278 while (*target != NULL) {
1279 sp = target;
1280 while ((*target != '/') && (*target != '\0')) {
1281 target ++;
1282 }
1283 /*
1284 * We got the next component of the
1285 * path name. Create path_comp of
1286 * appropriate type
1287 */
1288 if (((target - sp) == 1) && (*sp == '.')) {
1289 /*
1290 * Dot entry.
1291 */
1292 pc->pc_type = 4;
1293 pc = (struct path_comp *)(((char *)pc) + 4);
1294 } else if (((target - sp) == 2) &&
1295 (*sp == '.') && ((*(sp + 1)) == '.')) {
1296 /*
1297 * DotDot entry.
1298 */
1299 pc->pc_type = 3;
1300 pc = (struct path_comp *)(((char *)pc) + 4);
1301 } else {
1302 /*
1303 * convert the user given name
1304 * into appropriate form to be put
1305 * on the media
1306 */
1307 outlen = 1024; /* set to size of dname */
1308 if (error = ud_compress(target - sp, &outlen,
1309 (uint8_t *)sp, (uint8_t *)dname)) {
1310 break;
1311 }
1312 pc->pc_type = 5;
1313 /* LINTED */
1314 pc->pc_len = outlen;
1315 dname[outlen] = '\0';
1316 (void) strcpy((char *)pc->pc_id, dname);
1317 pc = (struct path_comp *)
1318 (((char *)pc) + 4 + outlen);
1319 }
1320 while (*target == '/') {
1321 target++;
1322 }
1323 if (*target == NULL) {
1324 break;
1325 }
1326 }
1327
1328 rw_enter(&ip->i_contents, RW_WRITER);
1329 if (error == 0) {
1330 ioflag = FWRITE;
1331 if (curthread->t_flag & T_DONTPEND) {
1332 ioflag |= FDSYNC;
1333 }
1334 error = ud_rdwri(UIO_WRITE, ioflag, ip,
1335 uname, ((int8_t *)pc) - uname,
1336 (offset_t)0, UIO_SYSSPACE, (int32_t *)0, cr);
1337 }
1338 if (error) {
1339 ud_idrop(ip);
1340 rw_exit(&ip->i_contents);
1341 rw_enter(&dip->i_rwlock, RW_WRITER);
1342 (void) ud_dirremove(dip, linkname, (struct ud_inode *)0,
1343 (struct vnode *)0, DR_REMOVE, cr, ct);
1344 rw_exit(&dip->i_rwlock);
1345 goto update_inode;
1346 }
1347 rw_exit(&ip->i_contents);
1348 }
1349
1350 if ((error == 0) || (error == EEXIST)) {
1351 VN_RELE(ITOV(ip));
1352 }
1353
1354 update_inode:
1355 ITIMES(VTOI(dvp));
1356 if (uname != NULL) {
1357 kmem_free(uname, PAGESIZE);
1358 }
1359 if (dname != NULL) {
1360 kmem_free(dname, 1024);
1361 }
1362
1363 return (error);
1364 }
1365
1366 /* ARGSUSED */
1367 static int32_t
udf_readlink(struct vnode * vp,struct uio * uiop,struct cred * cr,caller_context_t * ct)1368 udf_readlink(
1369 struct vnode *vp,
1370 struct uio *uiop,
1371 struct cred *cr,
1372 caller_context_t *ct)
1373 {
1374 int32_t error = 0, off, id_len, size, len;
1375 int8_t *dname = NULL, *uname = NULL;
1376 struct ud_inode *ip;
1377 struct fbuf *fbp = NULL;
1378 struct path_comp *pc;
1379
1380 ud_printf("udf_readlink\n");
1381
1382 if (vp->v_type != VLNK) {
1383 return (EINVAL);
1384 }
1385
1386 ip = VTOI(vp);
1387 size = ip->i_size;
1388 if (size > PAGESIZE) {
1389 return (EIO);
1390 }
1391
1392 if (size == 0) {
1393 return (0);
1394 }
1395
1396 dname = kmem_zalloc(1024, KM_SLEEP);
1397 uname = kmem_zalloc(PAGESIZE, KM_SLEEP);
1398
1399 rw_enter(&ip->i_contents, RW_READER);
1400
1401 if ((error = fbread(vp, 0, size, S_READ, &fbp)) != 0) {
1402 goto end;
1403 }
1404
1405 off = 0;
1406
1407 while (off < size) {
1408 pc = (struct path_comp *)(fbp->fb_addr + off);
1409 switch (pc->pc_type) {
1410 case 1 :
1411 (void) strcpy(uname, ip->i_udf->udf_fsmnt);
1412 (void) strcat(uname, "/");
1413 break;
1414 case 2 :
1415 if (pc->pc_len != 0) {
1416 goto end;
1417 }
1418 uname[0] = '/';
1419 uname[1] = '\0';
1420 break;
1421 case 3 :
1422 (void) strcat(uname, "../");
1423 break;
1424 case 4 :
1425 (void) strcat(uname, "./");
1426 break;
1427 case 5 :
1428 if ((error = ud_uncompress(pc->pc_len, &id_len,
1429 pc->pc_id, (uint8_t *)dname)) != 0) {
1430 break;
1431 }
1432 dname[id_len] = '\0';
1433 (void) strcat(uname, dname);
1434 (void) strcat(uname, "/");
1435 break;
1436 default :
1437 error = EINVAL;
1438 goto end;
1439 }
1440 off += 4 + pc->pc_len;
1441 }
1442 len = strlen(uname) - 1;
1443 if (uname[len] == '/') {
1444 if (len == 0) {
1445 /*
1446 * special case link to /
1447 */
1448 len = 1;
1449 } else {
1450 uname[len] = '\0';
1451 }
1452 }
1453
1454 error = uiomove(uname, len, UIO_READ, uiop);
1455
1456 ITIMES(ip);
1457
1458 end:
1459 if (fbp != NULL) {
1460 fbrelse(fbp, S_OTHER);
1461 }
1462 rw_exit(&ip->i_contents);
1463 if (uname != NULL) {
1464 kmem_free(uname, PAGESIZE);
1465 }
1466 if (dname != NULL) {
1467 kmem_free(dname, 1024);
1468 }
1469 return (error);
1470 }
1471
1472 /* ARGSUSED */
1473 static int32_t
udf_fsync(struct vnode * vp,int32_t syncflag,struct cred * cr,caller_context_t * ct)1474 udf_fsync(
1475 struct vnode *vp,
1476 int32_t syncflag,
1477 struct cred *cr,
1478 caller_context_t *ct)
1479 {
1480 int32_t error = 0;
1481 struct ud_inode *ip = VTOI(vp);
1482
1483 ud_printf("udf_fsync\n");
1484
1485 rw_enter(&ip->i_contents, RW_WRITER);
1486 if (!(IS_SWAPVP(vp))) {
1487 error = ud_syncip(ip, 0, I_SYNC); /* Do synchronous writes */
1488 }
1489 if (error == 0) {
1490 error = ud_sync_indir(ip);
1491 }
1492 ITIMES(ip); /* XXX: is this necessary ??? */
1493 rw_exit(&ip->i_contents);
1494
1495 return (error);
1496 }
1497
1498 /* ARGSUSED */
1499 static void
udf_inactive(struct vnode * vp,struct cred * cr,caller_context_t * ct)1500 udf_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ct)
1501 {
1502 ud_printf("udf_iinactive\n");
1503
1504 ud_iinactive(VTOI(vp), cr);
1505 }
1506
1507 /* ARGSUSED */
1508 static int32_t
udf_fid(struct vnode * vp,struct fid * fidp,caller_context_t * ct)1509 udf_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
1510 {
1511 struct udf_fid *udfidp;
1512 struct ud_inode *ip = VTOI(vp);
1513
1514 ud_printf("udf_fid\n");
1515
1516 if (fidp->fid_len < (sizeof (struct udf_fid) - sizeof (uint16_t))) {
1517 fidp->fid_len = sizeof (struct udf_fid) - sizeof (uint16_t);
1518 return (ENOSPC);
1519 }
1520
1521 udfidp = (struct udf_fid *)fidp;
1522 bzero((char *)udfidp, sizeof (struct udf_fid));
1523 rw_enter(&ip->i_contents, RW_READER);
1524 udfidp->udfid_len = sizeof (struct udf_fid) - sizeof (uint16_t);
1525 udfidp->udfid_uinq_lo = ip->i_uniqid & 0xffffffff;
1526 udfidp->udfid_prn = ip->i_icb_prn;
1527 udfidp->udfid_icb_lbn = ip->i_icb_block;
1528 rw_exit(&ip->i_contents);
1529
1530 return (0);
1531 }
1532
1533 /* ARGSUSED2 */
1534 static int
udf_rwlock(struct vnode * vp,int32_t write_lock,caller_context_t * ctp)1535 udf_rwlock(struct vnode *vp, int32_t write_lock, caller_context_t *ctp)
1536 {
1537 struct ud_inode *ip = VTOI(vp);
1538
1539 ud_printf("udf_rwlock\n");
1540
1541 if (write_lock) {
1542 rw_enter(&ip->i_rwlock, RW_WRITER);
1543 } else {
1544 rw_enter(&ip->i_rwlock, RW_READER);
1545 }
1546 #ifdef __lock_lint
1547 rw_exit(&ip->i_rwlock);
1548 #endif
1549 return (write_lock);
1550 }
1551
1552 /* ARGSUSED */
1553 static void
udf_rwunlock(struct vnode * vp,int32_t write_lock,caller_context_t * ctp)1554 udf_rwunlock(struct vnode *vp, int32_t write_lock, caller_context_t *ctp)
1555 {
1556 struct ud_inode *ip = VTOI(vp);
1557
1558 ud_printf("udf_rwunlock\n");
1559
1560 #ifdef __lock_lint
1561 rw_enter(&ip->i_rwlock, RW_WRITER);
1562 #endif
1563
1564 rw_exit(&ip->i_rwlock);
1565
1566 }
1567
1568 /* ARGSUSED */
1569 static int32_t
udf_seek(struct vnode * vp,offset_t ooff,offset_t * noffp,caller_context_t * ct)1570 udf_seek(struct vnode *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
1571 {
1572 return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
1573 }
1574
1575 static int32_t
udf_frlock(struct vnode * vp,int32_t cmd,struct flock64 * bfp,int32_t flag,offset_t offset,struct flk_callback * flk_cbp,cred_t * cr,caller_context_t * ct)1576 udf_frlock(
1577 struct vnode *vp,
1578 int32_t cmd,
1579 struct flock64 *bfp,
1580 int32_t flag,
1581 offset_t offset,
1582 struct flk_callback *flk_cbp,
1583 cred_t *cr,
1584 caller_context_t *ct)
1585 {
1586 struct ud_inode *ip = VTOI(vp);
1587
1588 ud_printf("udf_frlock\n");
1589
1590 /*
1591 * If file is being mapped, disallow frlock.
1592 * XXX I am not holding tlock while checking i_mapcnt because the
1593 * current locking strategy drops all locks before calling fs_frlock.
1594 * So, mapcnt could change before we enter fs_frlock making is
1595 * meaningless to have held tlock in the first place.
1596 */
1597 if ((ip->i_mapcnt > 0) &&
1598 (MANDLOCK(vp, ip->i_char))) {
1599 return (EAGAIN);
1600 }
1601
1602 return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
1603 }
1604
1605 /*ARGSUSED6*/
1606 static int32_t
udf_space(struct vnode * vp,int32_t cmd,struct flock64 * bfp,int32_t flag,offset_t offset,cred_t * cr,caller_context_t * ct)1607 udf_space(
1608 struct vnode *vp,
1609 int32_t cmd,
1610 struct flock64 *bfp,
1611 int32_t flag,
1612 offset_t offset,
1613 cred_t *cr,
1614 caller_context_t *ct)
1615 {
1616 int32_t error = 0;
1617
1618 ud_printf("udf_space\n");
1619
1620 if (cmd != F_FREESP) {
1621 error = EINVAL;
1622 } else if ((error = convoff(vp, bfp, 0, offset)) == 0) {
1623 error = ud_freesp(vp, bfp, flag, cr);
1624 }
1625
1626 return (error);
1627 }
1628
1629 /* ARGSUSED */
1630 static int32_t
udf_getpage(struct vnode * vp,offset_t off,size_t len,uint32_t * protp,struct page ** plarr,size_t plsz,struct seg * seg,caddr_t addr,enum seg_rw rw,struct cred * cr,caller_context_t * ct)1631 udf_getpage(
1632 struct vnode *vp,
1633 offset_t off,
1634 size_t len,
1635 uint32_t *protp,
1636 struct page **plarr,
1637 size_t plsz,
1638 struct seg *seg,
1639 caddr_t addr,
1640 enum seg_rw rw,
1641 struct cred *cr,
1642 caller_context_t *ct)
1643 {
1644 struct ud_inode *ip = VTOI(vp);
1645 int32_t error, has_holes, beyond_eof, seqmode, dolock;
1646 int32_t pgsize = PAGESIZE;
1647 struct udf_vfs *udf_vfsp = ip->i_udf;
1648 page_t **pl;
1649 u_offset_t pgoff, eoff, uoff;
1650 krw_t rwtype;
1651 caddr_t pgaddr;
1652
1653 ud_printf("udf_getpage\n");
1654
1655 uoff = (u_offset_t)off; /* type conversion */
1656 if (protp) {
1657 *protp = PROT_ALL;
1658 }
1659 if (vp->v_flag & VNOMAP) {
1660 return (ENOSYS);
1661 }
1662 seqmode = ip->i_nextr == uoff && rw != S_CREATE;
1663
1664 rwtype = RW_READER;
1665 dolock = (rw_owner(&ip->i_contents) != curthread);
1666 retrylock:
1667 #ifdef __lock_lint
1668 rw_enter(&ip->i_contents, rwtype);
1669 #else
1670 if (dolock) {
1671 rw_enter(&ip->i_contents, rwtype);
1672 }
1673 #endif
1674
1675 /*
1676 * We may be getting called as a side effect of a bmap using
1677 * fbread() when the blocks might be being allocated and the
1678 * size has not yet been up'ed. In this case we want to be
1679 * able to return zero pages if we get back UDF_HOLE from
1680 * calling bmap for a non write case here. We also might have
1681 * to read some frags from the disk into a page if we are
1682 * extending the number of frags for a given lbn in bmap().
1683 */
1684 beyond_eof = uoff + len > ip->i_size + PAGEOFFSET;
1685 if (beyond_eof && seg != segkmap) {
1686 #ifdef __lock_lint
1687 rw_exit(&ip->i_contents);
1688 #else
1689 if (dolock) {
1690 rw_exit(&ip->i_contents);
1691 }
1692 #endif
1693 return (EFAULT);
1694 }
1695
1696 /*
1697 * Must hold i_contents lock throughout the call to pvn_getpages
1698 * since locked pages are returned from each call to ud_getapage.
1699 * Must *not* return locked pages and then try for contents lock
1700 * due to lock ordering requirements (inode > page)
1701 */
1702
1703 has_holes = ud_bmap_has_holes(ip);
1704
1705 if ((rw == S_WRITE || rw == S_CREATE) && (has_holes || beyond_eof)) {
1706 int32_t blk_size, count;
1707 u_offset_t offset;
1708
1709 /*
1710 * We must acquire the RW_WRITER lock in order to
1711 * call bmap_write().
1712 */
1713 if (dolock && rwtype == RW_READER) {
1714 rwtype = RW_WRITER;
1715
1716 if (!rw_tryupgrade(&ip->i_contents)) {
1717
1718 rw_exit(&ip->i_contents);
1719
1720 goto retrylock;
1721 }
1722 }
1723
1724 /*
1725 * May be allocating disk blocks for holes here as
1726 * a result of mmap faults. write(2) does the bmap_write
1727 * in rdip/wrip, not here. We are not dealing with frags
1728 * in this case.
1729 */
1730 offset = uoff;
1731 while ((offset < uoff + len) &&
1732 (offset < ip->i_size)) {
1733 /*
1734 * the variable "bnp" is to simplify the expression for
1735 * the compiler; * just passing in &bn to bmap_write
1736 * causes a compiler "loop"
1737 */
1738
1739 blk_size = udf_vfsp->udf_lbsize;
1740 if ((offset + blk_size) > ip->i_size) {
1741 count = ip->i_size - offset;
1742 } else {
1743 count = blk_size;
1744 }
1745 error = ud_bmap_write(ip, offset, count, 0, cr);
1746 if (error) {
1747 goto update_inode;
1748 }
1749 offset += count; /* XXX - make this contig */
1750 }
1751 }
1752
1753 /*
1754 * Can be a reader from now on.
1755 */
1756 #ifdef __lock_lint
1757 if (rwtype == RW_WRITER) {
1758 rw_downgrade(&ip->i_contents);
1759 }
1760 #else
1761 if (dolock && rwtype == RW_WRITER) {
1762 rw_downgrade(&ip->i_contents);
1763 }
1764 #endif
1765
1766 /*
1767 * We remove PROT_WRITE in cases when the file has UDF holes
1768 * because we don't want to call bmap_read() to check each
1769 * page if it is backed with a disk block.
1770 */
1771 if (protp && has_holes && rw != S_WRITE && rw != S_CREATE) {
1772 *protp &= ~PROT_WRITE;
1773 }
1774
1775 error = 0;
1776
1777 /*
1778 * The loop looks up pages in the range <off, off + len).
1779 * For each page, we first check if we should initiate an asynchronous
1780 * read ahead before we call page_lookup (we may sleep in page_lookup
1781 * for a previously initiated disk read).
1782 */
1783 eoff = (uoff + len);
1784 for (pgoff = uoff, pgaddr = addr, pl = plarr;
1785 pgoff < eoff; /* empty */) {
1786 page_t *pp;
1787 u_offset_t nextrio;
1788 se_t se;
1789
1790 se = ((rw == S_CREATE) ? SE_EXCL : SE_SHARED);
1791
1792 /*
1793 * Handle async getpage (faultahead)
1794 */
1795 if (plarr == NULL) {
1796 ip->i_nextrio = pgoff;
1797 ud_getpage_ra(vp, pgoff, seg, pgaddr);
1798 pgoff += pgsize;
1799 pgaddr += pgsize;
1800 continue;
1801 }
1802
1803 /*
1804 * Check if we should initiate read ahead of next cluster.
1805 * We call page_exists only when we need to confirm that
1806 * we have the current page before we initiate the read ahead.
1807 */
1808 nextrio = ip->i_nextrio;
1809 if (seqmode &&
1810 pgoff + RD_CLUSTSZ(ip) >= nextrio && pgoff <= nextrio &&
1811 nextrio < ip->i_size && page_exists(vp, pgoff))
1812 ud_getpage_ra(vp, pgoff, seg, pgaddr);
1813
1814 if ((pp = page_lookup(vp, pgoff, se)) != NULL) {
1815
1816 /*
1817 * We found the page in the page cache.
1818 */
1819 *pl++ = pp;
1820 pgoff += pgsize;
1821 pgaddr += pgsize;
1822 len -= pgsize;
1823 plsz -= pgsize;
1824 } else {
1825
1826 /*
1827 * We have to create the page, or read it from disk.
1828 */
1829 if (error = ud_getpage_miss(vp, pgoff, len,
1830 seg, pgaddr, pl, plsz, rw, seqmode)) {
1831 goto error_out;
1832 }
1833
1834 while (*pl != NULL) {
1835 pl++;
1836 pgoff += pgsize;
1837 pgaddr += pgsize;
1838 len -= pgsize;
1839 plsz -= pgsize;
1840 }
1841 }
1842 }
1843
1844 /*
1845 * Return pages up to plsz if they are in the page cache.
1846 * We cannot return pages if there is a chance that they are
1847 * backed with a UDF hole and rw is S_WRITE or S_CREATE.
1848 */
1849 if (plarr && !(has_holes && (rw == S_WRITE || rw == S_CREATE))) {
1850
1851 ASSERT((protp == NULL) ||
1852 !(has_holes && (*protp & PROT_WRITE)));
1853
1854 eoff = pgoff + plsz;
1855 while (pgoff < eoff) {
1856 page_t *pp;
1857
1858 if ((pp = page_lookup_nowait(vp, pgoff,
1859 SE_SHARED)) == NULL)
1860 break;
1861
1862 *pl++ = pp;
1863 pgoff += pgsize;
1864 plsz -= pgsize;
1865 }
1866 }
1867
1868 if (plarr)
1869 *pl = NULL; /* Terminate page list */
1870 ip->i_nextr = pgoff;
1871
1872 error_out:
1873 if (error && plarr) {
1874 /*
1875 * Release any pages we have locked.
1876 */
1877 while (pl > &plarr[0])
1878 page_unlock(*--pl);
1879
1880 plarr[0] = NULL;
1881 }
1882
1883 update_inode:
1884 #ifdef __lock_lint
1885 rw_exit(&ip->i_contents);
1886 #else
1887 if (dolock) {
1888 rw_exit(&ip->i_contents);
1889 }
1890 #endif
1891
1892 /*
1893 * If the inode is not already marked for IACC (in rwip() for read)
1894 * and the inode is not marked for no access time update (in rwip()
1895 * for write) then update the inode access time and mod time now.
1896 */
1897 mutex_enter(&ip->i_tlock);
1898 if ((ip->i_flag & (IACC | INOACC)) == 0) {
1899 if ((rw != S_OTHER) && (ip->i_type != VDIR)) {
1900 ip->i_flag |= IACC;
1901 }
1902 if (rw == S_WRITE) {
1903 ip->i_flag |= IUPD;
1904 }
1905 ITIMES_NOLOCK(ip);
1906 }
1907 mutex_exit(&ip->i_tlock);
1908
1909 return (error);
1910 }
1911
1912 int32_t ud_delay = 1;
1913
1914 /* ARGSUSED */
1915 static int32_t
udf_putpage(struct vnode * vp,offset_t off,size_t len,int32_t flags,struct cred * cr,caller_context_t * ct)1916 udf_putpage(
1917 struct vnode *vp,
1918 offset_t off,
1919 size_t len,
1920 int32_t flags,
1921 struct cred *cr,
1922 caller_context_t *ct)
1923 {
1924 struct ud_inode *ip;
1925 int32_t error = 0;
1926
1927 ud_printf("udf_putpage\n");
1928
1929 ip = VTOI(vp);
1930 #ifdef __lock_lint
1931 rw_enter(&ip->i_contents, RW_WRITER);
1932 #endif
1933
1934 if (vp->v_count == 0) {
1935 cmn_err(CE_WARN, "ud_putpage : bad v_count");
1936 error = EINVAL;
1937 goto out;
1938 }
1939
1940 if (vp->v_flag & VNOMAP) {
1941 error = ENOSYS;
1942 goto out;
1943 }
1944
1945 if (flags & B_ASYNC) {
1946 if (ud_delay && len &&
1947 (flags & ~(B_ASYNC|B_DONTNEED|B_FREE)) == 0) {
1948 mutex_enter(&ip->i_tlock);
1949
1950 /*
1951 * If nobody stalled, start a new cluster.
1952 */
1953 if (ip->i_delaylen == 0) {
1954 ip->i_delayoff = off;
1955 ip->i_delaylen = len;
1956 mutex_exit(&ip->i_tlock);
1957 goto out;
1958 }
1959
1960 /*
1961 * If we have a full cluster or they are not contig,
1962 * then push last cluster and start over.
1963 */
1964 if (ip->i_delaylen >= WR_CLUSTSZ(ip) ||
1965 ip->i_delayoff + ip->i_delaylen != off) {
1966 u_offset_t doff;
1967 size_t dlen;
1968
1969 doff = ip->i_delayoff;
1970 dlen = ip->i_delaylen;
1971 ip->i_delayoff = off;
1972 ip->i_delaylen = len;
1973 mutex_exit(&ip->i_tlock);
1974 error = ud_putpages(vp, doff, dlen, flags, cr);
1975 /* LMXXX - flags are new val, not old */
1976 goto out;
1977 }
1978
1979 /*
1980 * There is something there, it's not full, and
1981 * it is contig.
1982 */
1983 ip->i_delaylen += len;
1984 mutex_exit(&ip->i_tlock);
1985 goto out;
1986 }
1987
1988 /*
1989 * Must have weird flags or we are not clustering.
1990 */
1991 }
1992
1993 error = ud_putpages(vp, off, len, flags, cr);
1994
1995 out:
1996 #ifdef __lock_lint
1997 rw_exit(&ip->i_contents);
1998 #endif
1999 return (error);
2000 }
2001
2002 /* ARGSUSED */
2003 static int32_t
udf_map(struct vnode * vp,offset_t off,struct as * as,caddr_t * addrp,size_t len,uint8_t prot,uint8_t maxprot,uint32_t flags,struct cred * cr,caller_context_t * ct)2004 udf_map(
2005 struct vnode *vp,
2006 offset_t off,
2007 struct as *as,
2008 caddr_t *addrp,
2009 size_t len,
2010 uint8_t prot,
2011 uint8_t maxprot,
2012 uint32_t flags,
2013 struct cred *cr,
2014 caller_context_t *ct)
2015 {
2016 struct segvn_crargs vn_a;
2017 int32_t error = 0;
2018
2019 ud_printf("udf_map\n");
2020
2021 if (vp->v_flag & VNOMAP) {
2022 error = ENOSYS;
2023 goto end;
2024 }
2025
2026 if ((off < (offset_t)0) ||
2027 ((off + len) < (offset_t)0)) {
2028 error = EINVAL;
2029 goto end;
2030 }
2031
2032 if (vp->v_type != VREG) {
2033 error = ENODEV;
2034 goto end;
2035 }
2036
2037 /*
2038 * If file is being locked, disallow mapping.
2039 */
2040 if (vn_has_mandatory_locks(vp, VTOI(vp)->i_char)) {
2041 error = EAGAIN;
2042 goto end;
2043 }
2044
2045 as_rangelock(as);
2046 error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
2047 if (error != 0) {
2048 as_rangeunlock(as);
2049 goto end;
2050 }
2051
2052 vn_a.vp = vp;
2053 vn_a.offset = off;
2054 vn_a.type = flags & MAP_TYPE;
2055 vn_a.prot = prot;
2056 vn_a.maxprot = maxprot;
2057 vn_a.cred = cr;
2058 vn_a.amp = NULL;
2059 vn_a.flags = flags & ~MAP_TYPE;
2060 vn_a.szc = 0;
2061 vn_a.lgrp_mem_policy_flags = 0;
2062
2063 error = as_map(as, *addrp, len, segvn_create, (caddr_t)&vn_a);
2064 as_rangeunlock(as);
2065
2066 end:
2067 return (error);
2068 }
2069
2070 /* ARGSUSED */
2071 static int32_t
udf_addmap(struct vnode * vp,offset_t off,struct as * as,caddr_t addr,size_t len,uint8_t prot,uint8_t maxprot,uint32_t flags,struct cred * cr,caller_context_t * ct)2072 udf_addmap(struct vnode *vp,
2073 offset_t off,
2074 struct as *as,
2075 caddr_t addr,
2076 size_t len,
2077 uint8_t prot,
2078 uint8_t maxprot,
2079 uint32_t flags,
2080 struct cred *cr,
2081 caller_context_t *ct)
2082 {
2083 struct ud_inode *ip = VTOI(vp);
2084
2085 ud_printf("udf_addmap\n");
2086
2087 if (vp->v_flag & VNOMAP) {
2088 return (ENOSYS);
2089 }
2090
2091 mutex_enter(&ip->i_tlock);
2092 ip->i_mapcnt += btopr(len);
2093 mutex_exit(&ip->i_tlock);
2094
2095 return (0);
2096 }
2097
2098 /* ARGSUSED */
2099 static int32_t
udf_delmap(struct vnode * vp,offset_t off,struct as * as,caddr_t addr,size_t len,uint32_t prot,uint32_t maxprot,uint32_t flags,struct cred * cr,caller_context_t * ct)2100 udf_delmap(
2101 struct vnode *vp, offset_t off,
2102 struct as *as,
2103 caddr_t addr,
2104 size_t len,
2105 uint32_t prot,
2106 uint32_t maxprot,
2107 uint32_t flags,
2108 struct cred *cr,
2109 caller_context_t *ct)
2110 {
2111 struct ud_inode *ip = VTOI(vp);
2112
2113 ud_printf("udf_delmap\n");
2114
2115 if (vp->v_flag & VNOMAP) {
2116 return (ENOSYS);
2117 }
2118
2119 mutex_enter(&ip->i_tlock);
2120 ip->i_mapcnt -= btopr(len); /* Count released mappings */
2121 ASSERT(ip->i_mapcnt >= 0);
2122 mutex_exit(&ip->i_tlock);
2123
2124 return (0);
2125 }
2126
2127 /* ARGSUSED */
2128 static int32_t
udf_l_pathconf(struct vnode * vp,int32_t cmd,ulong_t * valp,struct cred * cr,caller_context_t * ct)2129 udf_l_pathconf(
2130 struct vnode *vp,
2131 int32_t cmd,
2132 ulong_t *valp,
2133 struct cred *cr,
2134 caller_context_t *ct)
2135 {
2136 int32_t error = 0;
2137
2138 ud_printf("udf_l_pathconf\n");
2139
2140 if (cmd == _PC_FILESIZEBITS) {
2141 /*
2142 * udf supports 64 bits as file size
2143 * but there are several other restrictions
2144 * it only supports 32-bit block numbers and
2145 * daddr32_t is only and int32_t so taking these
2146 * into account we can stay just as where ufs is
2147 */
2148 *valp = 41;
2149 } else if (cmd == _PC_TIMESTAMP_RESOLUTION) {
2150 /* nanosecond timestamp resolution */
2151 *valp = 1L;
2152 } else {
2153 error = fs_pathconf(vp, cmd, valp, cr, ct);
2154 }
2155
2156 return (error);
2157 }
2158
2159 uint32_t ud_pageio_reads = 0, ud_pageio_writes = 0;
2160 #ifndef __lint
2161 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", ud_pageio_reads))
2162 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", ud_pageio_writes))
2163 #endif
2164 /*
2165 * Assumption is that there will not be a pageio request
2166 * to a enbedded file
2167 */
2168 /* ARGSUSED */
2169 static int32_t
udf_pageio(struct vnode * vp,struct page * pp,u_offset_t io_off,size_t io_len,int32_t flags,struct cred * cr,caller_context_t * ct)2170 udf_pageio(
2171 struct vnode *vp,
2172 struct page *pp,
2173 u_offset_t io_off,
2174 size_t io_len,
2175 int32_t flags,
2176 struct cred *cr,
2177 caller_context_t *ct)
2178 {
2179 daddr_t bn;
2180 struct buf *bp;
2181 struct ud_inode *ip = VTOI(vp);
2182 int32_t dolock, error = 0, contig, multi_io;
2183 size_t done_len = 0, cur_len = 0;
2184 page_t *npp = NULL, *opp = NULL, *cpp = pp;
2185
2186 if (pp == NULL) {
2187 return (EINVAL);
2188 }
2189
2190 dolock = (rw_owner(&ip->i_contents) != curthread);
2191
2192 /*
2193 * We need a better check. Ideally, we would use another
2194 * vnodeops so that hlocked and forcibly unmounted file
2195 * systems would return EIO where appropriate and w/o the
2196 * need for these checks.
2197 */
2198 if (ip->i_udf == NULL) {
2199 return (EIO);
2200 }
2201
2202 #ifdef __lock_lint
2203 rw_enter(&ip->i_contents, RW_READER);
2204 #else
2205 if (dolock) {
2206 rw_enter(&ip->i_contents, RW_READER);
2207 }
2208 #endif
2209
2210 /*
2211 * Break the io request into chunks, one for each contiguous
2212 * stretch of disk blocks in the target file.
2213 */
2214 while (done_len < io_len) {
2215 ASSERT(cpp);
2216 bp = NULL;
2217 contig = 0;
2218 if (error = ud_bmap_read(ip, (u_offset_t)(io_off + done_len),
2219 &bn, &contig)) {
2220 break;
2221 }
2222
2223 if (bn == UDF_HOLE) { /* No holey swapfiles */
2224 cmn_err(CE_WARN, "SWAP file has HOLES");
2225 error = EINVAL;
2226 break;
2227 }
2228
2229 cur_len = MIN(io_len - done_len, contig);
2230
2231 /*
2232 * Check if more than one I/O is
2233 * required to complete the given
2234 * I/O operation
2235 */
2236 if (ip->i_udf->udf_lbsize < PAGESIZE) {
2237 if (cur_len >= PAGESIZE) {
2238 multi_io = 0;
2239 cur_len &= PAGEMASK;
2240 } else {
2241 multi_io = 1;
2242 cur_len = MIN(io_len - done_len, PAGESIZE);
2243 }
2244 }
2245 page_list_break(&cpp, &npp, btop(cur_len));
2246
2247 bp = pageio_setup(cpp, cur_len, ip->i_devvp, flags);
2248 ASSERT(bp != NULL);
2249
2250 bp->b_edev = ip->i_dev;
2251 bp->b_dev = cmpdev(ip->i_dev);
2252 bp->b_blkno = bn;
2253 bp->b_un.b_addr = (caddr_t)0;
2254 bp->b_file = vp;
2255 bp->b_offset = (offset_t)(io_off + done_len);
2256
2257 /*
2258 * ub.ub_pageios.value.ul++;
2259 */
2260 if (multi_io == 0) {
2261 (void) bdev_strategy(bp);
2262 } else {
2263 error = ud_multi_strat(ip, cpp, bp,
2264 (u_offset_t)(io_off + done_len));
2265 if (error != 0) {
2266 pageio_done(bp);
2267 break;
2268 }
2269 }
2270 if (flags & B_READ) {
2271 ud_pageio_reads++;
2272 } else {
2273 ud_pageio_writes++;
2274 }
2275
2276 /*
2277 * If the request is not B_ASYNC, wait for i/o to complete
2278 * and re-assemble the page list to return to the caller.
2279 * If it is B_ASYNC we leave the page list in pieces and
2280 * cleanup() will dispose of them.
2281 */
2282 if ((flags & B_ASYNC) == 0) {
2283 error = biowait(bp);
2284 pageio_done(bp);
2285 if (error) {
2286 break;
2287 }
2288 page_list_concat(&opp, &cpp);
2289 }
2290 cpp = npp;
2291 npp = NULL;
2292 done_len += cur_len;
2293 }
2294
2295 ASSERT(error || (cpp == NULL && npp == NULL && done_len == io_len));
2296 if (error) {
2297 if (flags & B_ASYNC) {
2298 /* Cleanup unprocessed parts of list */
2299 page_list_concat(&cpp, &npp);
2300 if (flags & B_READ) {
2301 pvn_read_done(cpp, B_ERROR);
2302 } else {
2303 pvn_write_done(cpp, B_ERROR);
2304 }
2305 } else {
2306 /* Re-assemble list and let caller clean up */
2307 page_list_concat(&opp, &cpp);
2308 page_list_concat(&opp, &npp);
2309 }
2310 }
2311
2312 #ifdef __lock_lint
2313 rw_exit(&ip->i_contents);
2314 #else
2315 if (dolock) {
2316 rw_exit(&ip->i_contents);
2317 }
2318 #endif
2319 return (error);
2320 }
2321
2322
2323
2324
2325 /* -------------------- local functions --------------------------- */
2326
2327
2328
2329 int32_t
ud_rdwri(enum uio_rw rw,int32_t ioflag,struct ud_inode * ip,caddr_t base,int32_t len,offset_t offset,enum uio_seg seg,int32_t * aresid,struct cred * cr)2330 ud_rdwri(enum uio_rw rw, int32_t ioflag,
2331 struct ud_inode *ip, caddr_t base, int32_t len,
2332 offset_t offset, enum uio_seg seg, int32_t *aresid, struct cred *cr)
2333 {
2334 int32_t error;
2335 struct uio auio;
2336 struct iovec aiov;
2337
2338 ud_printf("ud_rdwri\n");
2339
2340 bzero((caddr_t)&auio, sizeof (uio_t));
2341 bzero((caddr_t)&aiov, sizeof (iovec_t));
2342
2343 aiov.iov_base = base;
2344 aiov.iov_len = len;
2345 auio.uio_iov = &aiov;
2346 auio.uio_iovcnt = 1;
2347 auio.uio_loffset = offset;
2348 auio.uio_segflg = (int16_t)seg;
2349 auio.uio_resid = len;
2350
2351 if (rw == UIO_WRITE) {
2352 auio.uio_fmode = FWRITE;
2353 auio.uio_extflg = UIO_COPY_DEFAULT;
2354 auio.uio_llimit = curproc->p_fsz_ctl;
2355 error = ud_wrip(ip, &auio, ioflag, cr);
2356 } else {
2357 auio.uio_fmode = FREAD;
2358 auio.uio_extflg = UIO_COPY_CACHED;
2359 auio.uio_llimit = MAXOFFSET_T;
2360 error = ud_rdip(ip, &auio, ioflag, cr);
2361 }
2362
2363 if (aresid) {
2364 *aresid = auio.uio_resid;
2365 } else if (auio.uio_resid) {
2366 error = EIO;
2367 }
2368 return (error);
2369 }
2370
2371 /*
2372 * Free behind hacks. The pager is busted.
2373 * XXX - need to pass the information down to writedone() in a flag like B_SEQ
2374 * or B_FREE_IF_TIGHT_ON_MEMORY.
2375 */
2376 int32_t ud_freebehind = 1;
2377 int32_t ud_smallfile = 32 * 1024;
2378
2379 /* ARGSUSED */
2380 int32_t
ud_getpage_miss(struct vnode * vp,u_offset_t off,size_t len,struct seg * seg,caddr_t addr,page_t * pl[],size_t plsz,enum seg_rw rw,int32_t seq)2381 ud_getpage_miss(struct vnode *vp, u_offset_t off,
2382 size_t len, struct seg *seg, caddr_t addr, page_t *pl[],
2383 size_t plsz, enum seg_rw rw, int32_t seq)
2384 {
2385 struct ud_inode *ip = VTOI(vp);
2386 int32_t err = 0;
2387 size_t io_len;
2388 u_offset_t io_off;
2389 u_offset_t pgoff;
2390 page_t *pp;
2391
2392 pl[0] = NULL;
2393
2394 /*
2395 * Figure out whether the page can be created, or must be
2396 * read from the disk
2397 */
2398 if (rw == S_CREATE) {
2399 if ((pp = page_create_va(vp, off,
2400 PAGESIZE, PG_WAIT, seg, addr)) == NULL) {
2401 cmn_err(CE_WARN, "ud_getpage_miss: page_create");
2402 return (EINVAL);
2403 }
2404 io_len = PAGESIZE;
2405 } else {
2406 pp = pvn_read_kluster(vp, off, seg, addr, &io_off,
2407 &io_len, off, PAGESIZE, 0);
2408
2409 /*
2410 * Some other thread has entered the page.
2411 * ud_getpage will retry page_lookup.
2412 */
2413 if (pp == NULL) {
2414 return (0);
2415 }
2416
2417 /*
2418 * Fill the page with as much data as we can from the file.
2419 */
2420 err = ud_page_fill(ip, pp, off, B_READ, &pgoff);
2421 if (err) {
2422 pvn_read_done(pp, B_ERROR);
2423 return (err);
2424 }
2425
2426 /*
2427 * XXX ??? ufs has io_len instead of pgoff below
2428 */
2429 ip->i_nextrio = off + ((pgoff + PAGESIZE - 1) & PAGEMASK);
2430
2431 /*
2432 * If the file access is sequential, initiate read ahead
2433 * of the next cluster.
2434 */
2435 if (seq && ip->i_nextrio < ip->i_size) {
2436 ud_getpage_ra(vp, off, seg, addr);
2437 }
2438 }
2439
2440 outmiss:
2441 pvn_plist_init(pp, pl, plsz, (offset_t)off, io_len, rw);
2442 return (err);
2443 }
2444
2445 /* ARGSUSED */
2446 void
ud_getpage_ra(struct vnode * vp,u_offset_t off,struct seg * seg,caddr_t addr)2447 ud_getpage_ra(struct vnode *vp,
2448 u_offset_t off, struct seg *seg, caddr_t addr)
2449 {
2450 page_t *pp;
2451 size_t io_len;
2452 struct ud_inode *ip = VTOI(vp);
2453 u_offset_t io_off = ip->i_nextrio, pgoff;
2454 caddr_t addr2 = addr + (io_off - off);
2455 daddr_t bn;
2456 int32_t contig = 0;
2457
2458 /*
2459 * Is this test needed?
2460 */
2461
2462 if (addr2 >= seg->s_base + seg->s_size) {
2463 return;
2464 }
2465
2466 contig = 0;
2467 if (ud_bmap_read(ip, io_off, &bn, &contig) != 0 || bn == UDF_HOLE) {
2468 return;
2469 }
2470
2471 pp = pvn_read_kluster(vp, io_off, seg, addr2,
2472 &io_off, &io_len, io_off, PAGESIZE, 1);
2473
2474 /*
2475 * Some other thread has entered the page.
2476 * So no read head done here (ie we will have to and wait
2477 * for the read when needed).
2478 */
2479
2480 if (pp == NULL) {
2481 return;
2482 }
2483
2484 (void) ud_page_fill(ip, pp, io_off, (B_READ|B_ASYNC), &pgoff);
2485 ip->i_nextrio = io_off + ((pgoff + PAGESIZE - 1) & PAGEMASK);
2486 }
2487
2488 int
ud_page_fill(struct ud_inode * ip,page_t * pp,u_offset_t off,uint32_t bflgs,u_offset_t * pg_off)2489 ud_page_fill(struct ud_inode *ip, page_t *pp, u_offset_t off,
2490 uint32_t bflgs, u_offset_t *pg_off)
2491 {
2492 daddr_t bn;
2493 struct buf *bp;
2494 caddr_t kaddr, caddr;
2495 int32_t error = 0, contig = 0, multi_io = 0;
2496 int32_t lbsize = ip->i_udf->udf_lbsize;
2497 int32_t lbmask = ip->i_udf->udf_lbmask;
2498 uint64_t isize;
2499
2500 isize = (ip->i_size + lbmask) & (~lbmask);
2501 if (ip->i_desc_type == ICB_FLAG_ONE_AD) {
2502
2503 /*
2504 * Embedded file read file_entry
2505 * from buffer cache and copy the required
2506 * portions
2507 */
2508 bp = ud_bread(ip->i_dev,
2509 ip->i_icb_lbano << ip->i_udf->udf_l2d_shift, lbsize);
2510 if ((bp->b_error == 0) &&
2511 (bp->b_resid == 0)) {
2512
2513 caddr = bp->b_un.b_addr + ip->i_data_off;
2514
2515 /*
2516 * mapin to kvm
2517 */
2518 kaddr = (caddr_t)ppmapin(pp,
2519 PROT_READ | PROT_WRITE, (caddr_t)-1);
2520 (void) kcopy(caddr, kaddr, ip->i_size);
2521
2522 /*
2523 * mapout of kvm
2524 */
2525 ppmapout(kaddr);
2526 }
2527 brelse(bp);
2528 contig = ip->i_size;
2529 } else {
2530
2531 /*
2532 * Get the continuous size and block number
2533 * at offset "off"
2534 */
2535 if (error = ud_bmap_read(ip, off, &bn, &contig))
2536 goto out;
2537 contig = MIN(contig, PAGESIZE);
2538 contig = (contig + lbmask) & (~lbmask);
2539
2540 /*
2541 * Zero part of the page which we are not
2542 * going to read from the disk.
2543 */
2544
2545 if (bn == UDF_HOLE) {
2546
2547 /*
2548 * This is a HOLE. Just zero out
2549 * the page
2550 */
2551 if (((off + contig) == isize) ||
2552 (contig == PAGESIZE)) {
2553 pagezero(pp->p_prev, 0, PAGESIZE);
2554 goto out;
2555 }
2556 }
2557
2558 if (contig < PAGESIZE) {
2559 uint64_t count;
2560
2561 count = isize - off;
2562 if (contig != count) {
2563 multi_io = 1;
2564 contig = (int32_t)(MIN(count, PAGESIZE));
2565 } else {
2566 pagezero(pp->p_prev, contig, PAGESIZE - contig);
2567 }
2568 }
2569
2570 /*
2571 * Get a bp and initialize it
2572 */
2573 bp = pageio_setup(pp, contig, ip->i_devvp, bflgs);
2574 ASSERT(bp != NULL);
2575
2576 bp->b_edev = ip->i_dev;
2577 bp->b_dev = cmpdev(ip->i_dev);
2578 bp->b_blkno = bn;
2579 bp->b_un.b_addr = 0;
2580 bp->b_file = ip->i_vnode;
2581
2582 /*
2583 * Start I/O
2584 */
2585 if (multi_io == 0) {
2586
2587 /*
2588 * Single I/O is sufficient for this page
2589 */
2590 (void) bdev_strategy(bp);
2591 } else {
2592
2593 /*
2594 * We need to do the I/O in
2595 * piece's
2596 */
2597 error = ud_multi_strat(ip, pp, bp, off);
2598 if (error != 0) {
2599 goto out;
2600 }
2601 }
2602 if ((bflgs & B_ASYNC) == 0) {
2603
2604 /*
2605 * Wait for i/o to complete.
2606 */
2607
2608 error = biowait(bp);
2609 pageio_done(bp);
2610 if (error) {
2611 goto out;
2612 }
2613 }
2614 }
2615 if ((off + contig) >= ip->i_size) {
2616 contig = ip->i_size - off;
2617 }
2618
2619 out:
2620 *pg_off = contig;
2621 return (error);
2622 }
2623
2624 int32_t
ud_putpages(struct vnode * vp,offset_t off,size_t len,int32_t flags,struct cred * cr)2625 ud_putpages(struct vnode *vp, offset_t off,
2626 size_t len, int32_t flags, struct cred *cr)
2627 {
2628 struct ud_inode *ip;
2629 page_t *pp;
2630 u_offset_t io_off;
2631 size_t io_len;
2632 u_offset_t eoff;
2633 int32_t err = 0;
2634 int32_t dolock;
2635
2636 ud_printf("ud_putpages\n");
2637
2638 if (vp->v_count == 0) {
2639 cmn_err(CE_WARN, "ud_putpages: bad v_count");
2640 return (EINVAL);
2641 }
2642
2643 ip = VTOI(vp);
2644
2645 /*
2646 * Acquire the readers/write inode lock before locking
2647 * any pages in this inode.
2648 * The inode lock is held during i/o.
2649 */
2650 if (len == 0) {
2651 mutex_enter(&ip->i_tlock);
2652 ip->i_delayoff = ip->i_delaylen = 0;
2653 mutex_exit(&ip->i_tlock);
2654 }
2655 #ifdef __lock_lint
2656 rw_enter(&ip->i_contents, RW_READER);
2657 #else
2658 dolock = (rw_owner(&ip->i_contents) != curthread);
2659 if (dolock) {
2660 rw_enter(&ip->i_contents, RW_READER);
2661 }
2662 #endif
2663
2664 if (!vn_has_cached_data(vp)) {
2665 #ifdef __lock_lint
2666 rw_exit(&ip->i_contents);
2667 #else
2668 if (dolock) {
2669 rw_exit(&ip->i_contents);
2670 }
2671 #endif
2672 return (0);
2673 }
2674
2675 if (len == 0) {
2676 /*
2677 * Search the entire vp list for pages >= off.
2678 */
2679 err = pvn_vplist_dirty(vp, (u_offset_t)off, ud_putapage,
2680 flags, cr);
2681 } else {
2682 /*
2683 * Loop over all offsets in the range looking for
2684 * pages to deal with.
2685 */
2686 if ((eoff = blkroundup(ip->i_udf, ip->i_size)) != 0) {
2687 eoff = MIN(off + len, eoff);
2688 } else {
2689 eoff = off + len;
2690 }
2691
2692 for (io_off = off; io_off < eoff; io_off += io_len) {
2693 /*
2694 * If we are not invalidating, synchronously
2695 * freeing or writing pages, use the routine
2696 * page_lookup_nowait() to prevent reclaiming
2697 * them from the free list.
2698 */
2699 if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
2700 pp = page_lookup(vp, io_off,
2701 (flags & (B_INVAL | B_FREE)) ?
2702 SE_EXCL : SE_SHARED);
2703 } else {
2704 pp = page_lookup_nowait(vp, io_off,
2705 (flags & B_FREE) ? SE_EXCL : SE_SHARED);
2706 }
2707
2708 if (pp == NULL || pvn_getdirty(pp, flags) == 0) {
2709 io_len = PAGESIZE;
2710 } else {
2711
2712 err = ud_putapage(vp, pp,
2713 &io_off, &io_len, flags, cr);
2714 if (err != 0) {
2715 break;
2716 }
2717 /*
2718 * "io_off" and "io_len" are returned as
2719 * the range of pages we actually wrote.
2720 * This allows us to skip ahead more quickly
2721 * since several pages may've been dealt
2722 * with by this iteration of the loop.
2723 */
2724 }
2725 }
2726 }
2727 if (err == 0 && off == 0 && (len == 0 || len >= ip->i_size)) {
2728 /*
2729 * We have just sync'ed back all the pages on
2730 * the inode, turn off the IMODTIME flag.
2731 */
2732 mutex_enter(&ip->i_tlock);
2733 ip->i_flag &= ~IMODTIME;
2734 mutex_exit(&ip->i_tlock);
2735 }
2736 #ifdef __lock_lint
2737 rw_exit(&ip->i_contents);
2738 #else
2739 if (dolock) {
2740 rw_exit(&ip->i_contents);
2741 }
2742 #endif
2743 return (err);
2744 }
2745
2746 /* ARGSUSED */
2747 int32_t
ud_putapage(struct vnode * vp,page_t * pp,u_offset_t * offp,size_t * lenp,int32_t flags,struct cred * cr)2748 ud_putapage(struct vnode *vp,
2749 page_t *pp, u_offset_t *offp,
2750 size_t *lenp, int32_t flags, struct cred *cr)
2751 {
2752 daddr_t bn;
2753 size_t io_len;
2754 struct ud_inode *ip;
2755 int32_t error = 0, contig, multi_io = 0;
2756 struct udf_vfs *udf_vfsp;
2757 u_offset_t off, io_off;
2758 caddr_t kaddr, caddr;
2759 struct buf *bp = NULL;
2760 int32_t lbmask;
2761 uint64_t isize;
2762 int32_t crc_len;
2763 struct file_entry *fe;
2764
2765 ud_printf("ud_putapage\n");
2766
2767 ip = VTOI(vp);
2768 ASSERT(ip);
2769 ASSERT(RW_LOCK_HELD(&ip->i_contents));
2770 lbmask = ip->i_udf->udf_lbmask;
2771 isize = (ip->i_size + lbmask) & (~lbmask);
2772
2773 udf_vfsp = ip->i_udf;
2774 ASSERT(udf_vfsp->udf_flags & UDF_FL_RW);
2775
2776 /*
2777 * If the modified time on the inode has not already been
2778 * set elsewhere (e.g. for write/setattr) we set the time now.
2779 * This gives us approximate modified times for mmap'ed files
2780 * which are modified via stores in the user address space.
2781 */
2782 if (((ip->i_flag & IMODTIME) == 0) || (flags & B_FORCE)) {
2783 mutex_enter(&ip->i_tlock);
2784 ip->i_flag |= IUPD;
2785 ITIMES_NOLOCK(ip);
2786 mutex_exit(&ip->i_tlock);
2787 }
2788
2789
2790 /*
2791 * Align the request to a block boundry (for old file systems),
2792 * and go ask bmap() how contiguous things are for this file.
2793 */
2794 off = pp->p_offset & ~(offset_t)lbmask;
2795 /* block align it */
2796
2797
2798 if (ip->i_desc_type == ICB_FLAG_ONE_AD) {
2799 ASSERT(ip->i_size <= ip->i_max_emb);
2800
2801 pp = pvn_write_kluster(vp, pp, &io_off,
2802 &io_len, off, PAGESIZE, flags);
2803 if (io_len == 0) {
2804 io_len = PAGESIZE;
2805 }
2806
2807 bp = ud_bread(ip->i_dev,
2808 ip->i_icb_lbano << udf_vfsp->udf_l2d_shift,
2809 udf_vfsp->udf_lbsize);
2810 fe = (struct file_entry *)bp->b_un.b_addr;
2811 if ((bp->b_flags & B_ERROR) ||
2812 (ud_verify_tag_and_desc(&fe->fe_tag, UD_FILE_ENTRY,
2813 ip->i_icb_block,
2814 1, udf_vfsp->udf_lbsize) != 0)) {
2815 if (pp != NULL)
2816 pvn_write_done(pp, B_ERROR | B_WRITE | flags);
2817 if (bp->b_flags & B_ERROR) {
2818 error = EIO;
2819 } else {
2820 error = EINVAL;
2821 }
2822 brelse(bp);
2823 return (error);
2824 }
2825 if ((bp->b_error == 0) &&
2826 (bp->b_resid == 0)) {
2827
2828 caddr = bp->b_un.b_addr + ip->i_data_off;
2829 kaddr = (caddr_t)ppmapin(pp,
2830 PROT_READ | PROT_WRITE, (caddr_t)-1);
2831 (void) kcopy(kaddr, caddr, ip->i_size);
2832 ppmapout(kaddr);
2833 }
2834 crc_len = ((uint32_t)&((struct file_entry *)0)->fe_spec) +
2835 SWAP_32(fe->fe_len_ear);
2836 crc_len += ip->i_size;
2837 ud_make_tag(ip->i_udf, &fe->fe_tag,
2838 UD_FILE_ENTRY, ip->i_icb_block, crc_len);
2839
2840 bwrite(bp);
2841
2842 if (flags & B_ASYNC) {
2843 pvn_write_done(pp, flags);
2844 }
2845 contig = ip->i_size;
2846 } else {
2847
2848 if (error = ud_bmap_read(ip, off, &bn, &contig)) {
2849 goto out;
2850 }
2851 contig = MIN(contig, PAGESIZE);
2852 contig = (contig + lbmask) & (~lbmask);
2853
2854 if (contig < PAGESIZE) {
2855 uint64_t count;
2856
2857 count = isize - off;
2858 if (contig != count) {
2859 multi_io = 1;
2860 contig = (int32_t)(MIN(count, PAGESIZE));
2861 }
2862 }
2863
2864 if ((off + contig) > isize) {
2865 contig = isize - off;
2866 }
2867
2868 if (contig > PAGESIZE) {
2869 if (contig & PAGEOFFSET) {
2870 contig &= PAGEMASK;
2871 }
2872 }
2873
2874 pp = pvn_write_kluster(vp, pp, &io_off,
2875 &io_len, off, contig, flags);
2876 if (io_len == 0) {
2877 io_len = PAGESIZE;
2878 }
2879
2880 bp = pageio_setup(pp, contig, ip->i_devvp, B_WRITE | flags);
2881 ASSERT(bp != NULL);
2882
2883 bp->b_edev = ip->i_dev;
2884 bp->b_dev = cmpdev(ip->i_dev);
2885 bp->b_blkno = bn;
2886 bp->b_un.b_addr = 0;
2887 bp->b_file = vp;
2888 bp->b_offset = (offset_t)off;
2889
2890
2891 /*
2892 * write throttle
2893 */
2894 ASSERT(bp->b_iodone == NULL);
2895 bp->b_iodone = ud_iodone;
2896 mutex_enter(&ip->i_tlock);
2897 ip->i_writes += bp->b_bcount;
2898 mutex_exit(&ip->i_tlock);
2899
2900 if (multi_io == 0) {
2901
2902 (void) bdev_strategy(bp);
2903 } else {
2904 error = ud_multi_strat(ip, pp, bp, off);
2905 if (error != 0) {
2906 goto out;
2907 }
2908 }
2909
2910 if ((flags & B_ASYNC) == 0) {
2911 /*
2912 * Wait for i/o to complete.
2913 */
2914 error = biowait(bp);
2915 pageio_done(bp);
2916 }
2917 }
2918
2919 if ((flags & B_ASYNC) == 0) {
2920 pvn_write_done(pp, ((error) ? B_ERROR : 0) | B_WRITE | flags);
2921 }
2922
2923 pp = NULL;
2924
2925 out:
2926 if (error != 0 && pp != NULL) {
2927 pvn_write_done(pp, B_ERROR | B_WRITE | flags);
2928 }
2929
2930 if (offp) {
2931 *offp = io_off;
2932 }
2933 if (lenp) {
2934 *lenp = io_len;
2935 }
2936
2937 return (error);
2938 }
2939
2940
2941 int32_t
ud_iodone(struct buf * bp)2942 ud_iodone(struct buf *bp)
2943 {
2944 struct ud_inode *ip;
2945
2946 ASSERT((bp->b_pages->p_vnode != NULL) && !(bp->b_flags & B_READ));
2947
2948 bp->b_iodone = NULL;
2949
2950 ip = VTOI(bp->b_pages->p_vnode);
2951
2952 mutex_enter(&ip->i_tlock);
2953 if (ip->i_writes >= ud_LW) {
2954 if ((ip->i_writes -= bp->b_bcount) <= ud_LW) {
2955 if (ud_WRITES) {
2956 cv_broadcast(&ip->i_wrcv); /* wake all up */
2957 }
2958 }
2959 } else {
2960 ip->i_writes -= bp->b_bcount;
2961 }
2962 mutex_exit(&ip->i_tlock);
2963 iodone(bp);
2964 return (0);
2965 }
2966
2967 /* ARGSUSED3 */
2968 int32_t
ud_rdip(struct ud_inode * ip,struct uio * uio,int32_t ioflag,cred_t * cr)2969 ud_rdip(struct ud_inode *ip, struct uio *uio, int32_t ioflag, cred_t *cr)
2970 {
2971 struct vnode *vp;
2972 struct udf_vfs *udf_vfsp;
2973 krw_t rwtype;
2974 caddr_t base;
2975 uint32_t flags;
2976 int32_t error, n, on, mapon, dofree;
2977 u_offset_t off;
2978 long oresid = uio->uio_resid;
2979
2980 ASSERT(RW_LOCK_HELD(&ip->i_contents));
2981 if ((ip->i_type != VREG) &&
2982 (ip->i_type != VDIR) &&
2983 (ip->i_type != VLNK)) {
2984 return (EIO);
2985 }
2986
2987 if (uio->uio_loffset > MAXOFFSET_T) {
2988 return (0);
2989 }
2990
2991 if ((uio->uio_loffset < (offset_t)0) ||
2992 ((uio->uio_loffset + uio->uio_resid) < 0)) {
2993 return (EINVAL);
2994 }
2995 if (uio->uio_resid == 0) {
2996 return (0);
2997 }
2998
2999 vp = ITOV(ip);
3000 udf_vfsp = ip->i_udf;
3001 mutex_enter(&ip->i_tlock);
3002 ip->i_flag |= IACC;
3003 mutex_exit(&ip->i_tlock);
3004
3005 rwtype = (rw_write_held(&ip->i_contents)?RW_WRITER:RW_READER);
3006
3007 do {
3008 offset_t diff;
3009 u_offset_t uoff = uio->uio_loffset;
3010 off = uoff & (offset_t)MAXBMASK;
3011 mapon = (int)(uoff & (offset_t)MAXBOFFSET);
3012 on = (int)blkoff(udf_vfsp, uoff);
3013 n = (int)MIN(udf_vfsp->udf_lbsize - on, uio->uio_resid);
3014
3015 diff = ip->i_size - uoff;
3016
3017 if (diff <= (offset_t)0) {
3018 error = 0;
3019 goto out;
3020 }
3021 if (diff < (offset_t)n) {
3022 n = (int)diff;
3023 }
3024 dofree = ud_freebehind &&
3025 ip->i_nextr == (off & PAGEMASK) &&
3026 off > ud_smallfile;
3027
3028 #ifndef __lock_lint
3029 if (rwtype == RW_READER) {
3030 rw_exit(&ip->i_contents);
3031 }
3032 #endif
3033
3034 base = segmap_getmapflt(segkmap, vp, (off + mapon),
3035 (uint32_t)n, 1, S_READ);
3036 error = uiomove(base + mapon, (long)n, UIO_READ, uio);
3037
3038 flags = 0;
3039 if (!error) {
3040 /*
3041 * If read a whole block, or read to eof,
3042 * won't need this buffer again soon.
3043 */
3044 if (n + on == MAXBSIZE && ud_freebehind && dofree &&
3045 freemem < lotsfree + pages_before_pager) {
3046 flags = SM_FREE | SM_DONTNEED |SM_ASYNC;
3047 }
3048 /*
3049 * In POSIX SYNC (FSYNC and FDSYNC) read mode,
3050 * we want to make sure that the page which has
3051 * been read, is written on disk if it is dirty.
3052 * And corresponding indirect blocks should also
3053 * be flushed out.
3054 */
3055 if ((ioflag & FRSYNC) && (ioflag & (FSYNC|FDSYNC))) {
3056 flags &= ~SM_ASYNC;
3057 flags |= SM_WRITE;
3058 }
3059 error = segmap_release(segkmap, base, flags);
3060 } else {
3061 (void) segmap_release(segkmap, base, flags);
3062 }
3063
3064 #ifndef __lock_lint
3065 if (rwtype == RW_READER) {
3066 rw_enter(&ip->i_contents, rwtype);
3067 }
3068 #endif
3069 } while (error == 0 && uio->uio_resid > 0 && n != 0);
3070 out:
3071 /*
3072 * Inode is updated according to this table if FRSYNC is set.
3073 *
3074 * FSYNC FDSYNC(posix.4)
3075 * --------------------------
3076 * always IATTCHG|IBDWRITE
3077 */
3078 if (ioflag & FRSYNC) {
3079 if ((ioflag & FSYNC) ||
3080 ((ioflag & FDSYNC) &&
3081 (ip->i_flag & (IATTCHG|IBDWRITE)))) {
3082 rw_exit(&ip->i_contents);
3083 rw_enter(&ip->i_contents, RW_WRITER);
3084 ud_iupdat(ip, 1);
3085 }
3086 }
3087 /*
3088 * If we've already done a partial read, terminate
3089 * the read but return no error.
3090 */
3091 if (oresid != uio->uio_resid) {
3092 error = 0;
3093 }
3094 ITIMES(ip);
3095
3096 return (error);
3097 }
3098
3099 int32_t
ud_wrip(struct ud_inode * ip,struct uio * uio,int ioflag,struct cred * cr)3100 ud_wrip(struct ud_inode *ip, struct uio *uio, int ioflag, struct cred *cr)
3101 {
3102 caddr_t base;
3103 struct vnode *vp;
3104 struct udf_vfs *udf_vfsp;
3105 uint32_t flags;
3106 int32_t error = 0, iupdat_flag, n, on, mapon, i_size_changed = 0;
3107 int32_t pagecreate, newpage;
3108 uint64_t old_i_size;
3109 u_offset_t off;
3110 long start_resid = uio->uio_resid, premove_resid;
3111 rlim64_t limit = uio->uio_limit;
3112
3113
3114 ASSERT(RW_WRITE_HELD(&ip->i_contents));
3115 if ((ip->i_type != VREG) &&
3116 (ip->i_type != VDIR) &&
3117 (ip->i_type != VLNK)) {
3118 return (EIO);
3119 }
3120
3121 if (uio->uio_loffset >= MAXOFFSET_T) {
3122 return (EFBIG);
3123 }
3124 /*
3125 * see udf_l_pathconf
3126 */
3127 if (limit > (((uint64_t)1 << 40) - 1)) {
3128 limit = ((uint64_t)1 << 40) - 1;
3129 }
3130 if (uio->uio_loffset >= limit) {
3131 proc_t *p = ttoproc(curthread);
3132
3133 mutex_enter(&p->p_lock);
3134 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
3135 p, RCA_UNSAFE_SIGINFO);
3136 mutex_exit(&p->p_lock);
3137 return (EFBIG);
3138 }
3139 if ((uio->uio_loffset < (offset_t)0) ||
3140 ((uio->uio_loffset + uio->uio_resid) < 0)) {
3141 return (EINVAL);
3142 }
3143 if (uio->uio_resid == 0) {
3144 return (0);
3145 }
3146
3147 mutex_enter(&ip->i_tlock);
3148 ip->i_flag |= INOACC;
3149
3150 if (ioflag & (FSYNC | FDSYNC)) {
3151 ip->i_flag |= ISYNC;
3152 iupdat_flag = 1;
3153 }
3154 mutex_exit(&ip->i_tlock);
3155
3156 udf_vfsp = ip->i_udf;
3157 vp = ITOV(ip);
3158
3159 do {
3160 u_offset_t uoff = uio->uio_loffset;
3161 off = uoff & (offset_t)MAXBMASK;
3162 mapon = (int)(uoff & (offset_t)MAXBOFFSET);
3163 on = (int)blkoff(udf_vfsp, uoff);
3164 n = (int)MIN(udf_vfsp->udf_lbsize - on, uio->uio_resid);
3165
3166 if (ip->i_type == VREG && uoff + n >= limit) {
3167 if (uoff >= limit) {
3168 error = EFBIG;
3169 goto out;
3170 }
3171 n = (int)(limit - (rlim64_t)uoff);
3172 }
3173 if (uoff + n > ip->i_size) {
3174 /*
3175 * We are extending the length of the file.
3176 * bmap is used so that we are sure that
3177 * if we need to allocate new blocks, that it
3178 * is done here before we up the file size.
3179 */
3180 error = ud_bmap_write(ip, uoff,
3181 (int)(on + n), mapon == 0, cr);
3182 if (error) {
3183 break;
3184 }
3185 i_size_changed = 1;
3186 old_i_size = ip->i_size;
3187 ip->i_size = uoff + n;
3188 /*
3189 * If we are writing from the beginning of
3190 * the mapping, we can just create the
3191 * pages without having to read them.
3192 */
3193 pagecreate = (mapon == 0);
3194 } else if (n == MAXBSIZE) {
3195 /*
3196 * Going to do a whole mappings worth,
3197 * so we can just create the pages w/o
3198 * having to read them in. But before
3199 * we do that, we need to make sure any
3200 * needed blocks are allocated first.
3201 */
3202 error = ud_bmap_write(ip, uoff,
3203 (int)(on + n), 1, cr);
3204 if (error) {
3205 break;
3206 }
3207 pagecreate = 1;
3208 } else {
3209 pagecreate = 0;
3210 }
3211
3212 rw_exit(&ip->i_contents);
3213
3214 /*
3215 * Touch the page and fault it in if it is not in
3216 * core before segmap_getmapflt can lock it. This
3217 * is to avoid the deadlock if the buffer is mapped
3218 * to the same file through mmap which we want to
3219 * write to.
3220 */
3221 uio_prefaultpages((long)n, uio);
3222
3223 base = segmap_getmapflt(segkmap, vp, (off + mapon),
3224 (uint32_t)n, !pagecreate, S_WRITE);
3225
3226 /*
3227 * segmap_pagecreate() returns 1 if it calls
3228 * page_create_va() to allocate any pages.
3229 */
3230 newpage = 0;
3231 if (pagecreate) {
3232 newpage = segmap_pagecreate(segkmap, base,
3233 (size_t)n, 0);
3234 }
3235
3236 premove_resid = uio->uio_resid;
3237 error = uiomove(base + mapon, (long)n, UIO_WRITE, uio);
3238
3239 if (pagecreate &&
3240 uio->uio_loffset < roundup(off + mapon + n, PAGESIZE)) {
3241 /*
3242 * We created pages w/o initializing them completely,
3243 * thus we need to zero the part that wasn't set up.
3244 * This happens on most EOF write cases and if
3245 * we had some sort of error during the uiomove.
3246 */
3247 int nzero, nmoved;
3248
3249 nmoved = (int)(uio->uio_loffset - (off + mapon));
3250 ASSERT(nmoved >= 0 && nmoved <= n);
3251 nzero = roundup(on + n, PAGESIZE) - nmoved;
3252 ASSERT(nzero > 0 && mapon + nmoved + nzero <= MAXBSIZE);
3253 (void) kzero(base + mapon + nmoved, (uint32_t)nzero);
3254 }
3255
3256 /*
3257 * Unlock the pages allocated by page_create_va()
3258 * in segmap_pagecreate()
3259 */
3260 if (newpage) {
3261 segmap_pageunlock(segkmap, base, (size_t)n, S_WRITE);
3262 }
3263
3264 if (error) {
3265 /*
3266 * If we failed on a write, we may have already
3267 * allocated file blocks as well as pages. It's
3268 * hard to undo the block allocation, but we must
3269 * be sure to invalidate any pages that may have
3270 * been allocated.
3271 */
3272 (void) segmap_release(segkmap, base, SM_INVAL);
3273 } else {
3274 flags = 0;
3275 /*
3276 * Force write back for synchronous write cases.
3277 */
3278 if ((ioflag & (FSYNC|FDSYNC)) || ip->i_type == VDIR) {
3279 /*
3280 * If the sticky bit is set but the
3281 * execute bit is not set, we do a
3282 * synchronous write back and free
3283 * the page when done. We set up swap
3284 * files to be handled this way to
3285 * prevent servers from keeping around
3286 * the client's swap pages too long.
3287 * XXX - there ought to be a better way.
3288 */
3289 if (IS_SWAPVP(vp)) {
3290 flags = SM_WRITE | SM_FREE |
3291 SM_DONTNEED;
3292 iupdat_flag = 0;
3293 } else {
3294 flags = SM_WRITE;
3295 }
3296 } else if (((mapon + n) == MAXBSIZE) ||
3297 IS_SWAPVP(vp)) {
3298 /*
3299 * Have written a whole block.
3300 * Start an asynchronous write and
3301 * mark the buffer to indicate that
3302 * it won't be needed again soon.
3303 */
3304 flags = SM_WRITE |SM_ASYNC | SM_DONTNEED;
3305 }
3306 error = segmap_release(segkmap, base, flags);
3307
3308 /*
3309 * If the operation failed and is synchronous,
3310 * then we need to unwind what uiomove() last
3311 * did so we can potentially return an error to
3312 * the caller. If this write operation was
3313 * done in two pieces and the first succeeded,
3314 * then we won't return an error for the second
3315 * piece that failed. However, we only want to
3316 * return a resid value that reflects what was
3317 * really done.
3318 *
3319 * Failures for non-synchronous operations can
3320 * be ignored since the page subsystem will
3321 * retry the operation until it succeeds or the
3322 * file system is unmounted.
3323 */
3324 if (error) {
3325 if ((ioflag & (FSYNC | FDSYNC)) ||
3326 ip->i_type == VDIR) {
3327 uio->uio_resid = premove_resid;
3328 } else {
3329 error = 0;
3330 }
3331 }
3332 }
3333
3334 /*
3335 * Re-acquire contents lock.
3336 */
3337 rw_enter(&ip->i_contents, RW_WRITER);
3338 /*
3339 * If the uiomove() failed or if a synchronous
3340 * page push failed, fix up i_size.
3341 */
3342 if (error) {
3343 if (i_size_changed) {
3344 /*
3345 * The uiomove failed, and we
3346 * allocated blocks,so get rid
3347 * of them.
3348 */
3349 (void) ud_itrunc(ip, old_i_size, 0, cr);
3350 }
3351 } else {
3352 /*
3353 * XXX - Can this be out of the loop?
3354 */
3355 ip->i_flag |= IUPD | ICHG;
3356 if (i_size_changed) {
3357 ip->i_flag |= IATTCHG;
3358 }
3359 if ((ip->i_perm & (IEXEC | (IEXEC >> 5) |
3360 (IEXEC >> 10))) != 0 &&
3361 (ip->i_char & (ISUID | ISGID)) != 0 &&
3362 secpolicy_vnode_setid_retain(cr,
3363 (ip->i_char & ISUID) != 0 && ip->i_uid == 0) != 0) {
3364 /*
3365 * Clear Set-UID & Set-GID bits on
3366 * successful write if not privileged
3367 * and at least one of the execute bits
3368 * is set. If we always clear Set-GID,
3369 * mandatory file and record locking is
3370 * unuseable.
3371 */
3372 ip->i_char &= ~(ISUID | ISGID);
3373 }
3374 }
3375 } while (error == 0 && uio->uio_resid > 0 && n != 0);
3376
3377 out:
3378 /*
3379 * Inode is updated according to this table -
3380 *
3381 * FSYNC FDSYNC(posix.4)
3382 * --------------------------
3383 * always@ IATTCHG|IBDWRITE
3384 *
3385 * @ - If we are doing synchronous write the only time we should
3386 * not be sync'ing the ip here is if we have the stickyhack
3387 * activated, the file is marked with the sticky bit and
3388 * no exec bit, the file length has not been changed and
3389 * no new blocks have been allocated during this write.
3390 */
3391 if ((ip->i_flag & ISYNC) != 0) {
3392 /*
3393 * we have eliminated nosync
3394 */
3395 if ((ip->i_flag & (IATTCHG|IBDWRITE)) ||
3396 ((ioflag & FSYNC) && iupdat_flag)) {
3397 ud_iupdat(ip, 1);
3398 }
3399 }
3400
3401 /*
3402 * If we've already done a partial-write, terminate
3403 * the write but return no error.
3404 */
3405 if (start_resid != uio->uio_resid) {
3406 error = 0;
3407 }
3408 ip->i_flag &= ~(INOACC | ISYNC);
3409 ITIMES_NOLOCK(ip);
3410
3411 return (error);
3412 }
3413
3414 int32_t
ud_multi_strat(struct ud_inode * ip,page_t * pp,struct buf * bp,u_offset_t start)3415 ud_multi_strat(struct ud_inode *ip,
3416 page_t *pp, struct buf *bp, u_offset_t start)
3417 {
3418 daddr_t bn;
3419 int32_t error = 0, io_count, contig, alloc_sz, i;
3420 uint32_t io_off;
3421 mio_master_t *mm = NULL;
3422 mio_slave_t *ms = NULL;
3423 struct buf *rbp;
3424
3425 ASSERT(!(start & PAGEOFFSET));
3426
3427 /*
3428 * Figure out how many buffers to allocate
3429 */
3430 io_count = 0;
3431 for (io_off = 0; io_off < bp->b_bcount; io_off += contig) {
3432 contig = 0;
3433 if (error = ud_bmap_read(ip, (u_offset_t)(start + io_off),
3434 &bn, &contig)) {
3435 goto end;
3436 }
3437 if (contig == 0) {
3438 goto end;
3439 }
3440 contig = MIN(contig, PAGESIZE - io_off);
3441 if (bn != UDF_HOLE) {
3442 io_count ++;
3443 } else {
3444 /*
3445 * HOLE
3446 */
3447 if (bp->b_flags & B_READ) {
3448
3449 /*
3450 * This is a hole and is read
3451 * it should be filled with 0's
3452 */
3453 pagezero(pp, io_off, contig);
3454 }
3455 }
3456 }
3457
3458
3459 if (io_count != 0) {
3460
3461 /*
3462 * Allocate memory for all the
3463 * required number of buffers
3464 */
3465 alloc_sz = sizeof (mio_master_t) +
3466 (sizeof (mio_slave_t) * io_count);
3467 mm = (mio_master_t *)kmem_zalloc(alloc_sz, KM_SLEEP);
3468 if (mm == NULL) {
3469 error = ENOMEM;
3470 goto end;
3471 }
3472
3473 /*
3474 * initialize master
3475 */
3476 mutex_init(&mm->mm_mutex, NULL, MUTEX_DEFAULT, NULL);
3477 mm->mm_size = alloc_sz;
3478 mm->mm_bp = bp;
3479 mm->mm_resid = 0;
3480 mm->mm_error = 0;
3481 mm->mm_index = master_index++;
3482
3483 ms = (mio_slave_t *)(((caddr_t)mm) + sizeof (mio_master_t));
3484
3485 /*
3486 * Initialize buffers
3487 */
3488 io_count = 0;
3489 for (io_off = 0; io_off < bp->b_bcount; io_off += contig) {
3490 contig = 0;
3491 if (error = ud_bmap_read(ip,
3492 (u_offset_t)(start + io_off),
3493 &bn, &contig)) {
3494 goto end;
3495 }
3496 ASSERT(contig);
3497 if ((io_off + contig) > bp->b_bcount) {
3498 contig = bp->b_bcount - io_off;
3499 }
3500 if (bn != UDF_HOLE) {
3501 /*
3502 * Clone the buffer
3503 * and prepare to start I/O
3504 */
3505 ms->ms_ptr = mm;
3506 bioinit(&ms->ms_buf);
3507 rbp = bioclone(bp, io_off, (size_t)contig,
3508 bp->b_edev, bn, ud_slave_done,
3509 &ms->ms_buf, KM_NOSLEEP);
3510 ASSERT(rbp == &ms->ms_buf);
3511 mm->mm_resid += contig;
3512 io_count++;
3513 ms ++;
3514 }
3515 }
3516
3517 /*
3518 * Start I/O's
3519 */
3520 ms = (mio_slave_t *)(((caddr_t)mm) + sizeof (mio_master_t));
3521 for (i = 0; i < io_count; i++) {
3522 (void) bdev_strategy(&ms->ms_buf);
3523 ms ++;
3524 }
3525 }
3526
3527 end:
3528 if (error != 0) {
3529 bp->b_flags |= B_ERROR;
3530 bp->b_error = error;
3531 if (mm != NULL) {
3532 mutex_destroy(&mm->mm_mutex);
3533 kmem_free(mm, mm->mm_size);
3534 }
3535 }
3536 return (error);
3537 }
3538
3539 int32_t
ud_slave_done(struct buf * bp)3540 ud_slave_done(struct buf *bp)
3541 {
3542 mio_master_t *mm;
3543 int32_t resid;
3544
3545 ASSERT(SEMA_HELD(&bp->b_sem));
3546 ASSERT((bp->b_flags & B_DONE) == 0);
3547
3548 mm = ((mio_slave_t *)bp)->ms_ptr;
3549
3550 /*
3551 * Propagate error and byte count info from slave struct to
3552 * the master struct
3553 */
3554 mutex_enter(&mm->mm_mutex);
3555 if (bp->b_flags & B_ERROR) {
3556
3557 /*
3558 * If multiple slave buffers get
3559 * error we forget the old errors
3560 * this is ok because we any way
3561 * cannot return multiple errors
3562 */
3563 mm->mm_error = bp->b_error;
3564 }
3565 mm->mm_resid -= bp->b_bcount;
3566 resid = mm->mm_resid;
3567 mutex_exit(&mm->mm_mutex);
3568
3569 /*
3570 * free up the resources allocated to cloned buffers.
3571 */
3572 bp_mapout(bp);
3573 biofini(bp);
3574
3575 if (resid == 0) {
3576
3577 /*
3578 * This is the last I/O operation
3579 * clean up and return the original buffer
3580 */
3581 if (mm->mm_error) {
3582 mm->mm_bp->b_flags |= B_ERROR;
3583 mm->mm_bp->b_error = mm->mm_error;
3584 }
3585 biodone(mm->mm_bp);
3586 mutex_destroy(&mm->mm_mutex);
3587 kmem_free(mm, mm->mm_size);
3588 }
3589 return (0);
3590 }
3591