xref: /netbsd-src/sys/ufs/ufs/ufs_readwrite.c (revision 27578b9aac214cc7796ead81dcc5427e79d5f2a0)
1 /*	$NetBSD: ufs_readwrite.c,v 1.34 2001/09/16 13:57:56 chs Exp $	*/
2 
3 /*-
4  * Copyright (c) 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed by the University of
18  *	California, Berkeley and its contributors.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  *	@(#)ufs_readwrite.c	8.11 (Berkeley) 5/8/95
36  */
37 
38 #ifdef LFS_READWRITE
39 #define	BLKSIZE(a, b, c)	blksize(a, b, c)
40 #define	FS			struct lfs
41 #define	I_FS			i_lfs
42 #define	READ			lfs_read
43 #define	READ_S			"lfs_read"
44 #define	WRITE			lfs_write
45 #define	WRITE_S			"lfs_write"
46 #define	fs_bsize		lfs_bsize
47 #define	fs_maxfilesize		lfs_maxfilesize
48 #else
49 #define	BLKSIZE(a, b, c)	blksize(a, b, c)
50 #define	FS			struct fs
51 #define	I_FS			i_fs
52 #define	READ			ffs_read
53 #define	READ_S			"ffs_read"
54 #define	WRITE			ffs_write
55 #define	WRITE_S			"ffs_write"
56 #endif
57 
58 /*
59  * Vnode op for reading.
60  */
61 /* ARGSUSED */
62 int
63 READ(void *v)
64 {
65 	struct vop_read_args /* {
66 		struct vnode *a_vp;
67 		struct uio *a_uio;
68 		int a_ioflag;
69 		struct ucred *a_cred;
70 	} */ *ap = v;
71 	struct vnode *vp;
72 	struct inode *ip;
73 	struct uio *uio;
74 	FS *fs;
75 	void *win;
76 	vsize_t bytelen;
77 	struct buf *bp;
78 	ufs_daddr_t lbn, nextlbn;
79 	off_t bytesinfile;
80 	long size, xfersize, blkoffset;
81 	int error;
82 	boolean_t usepc = FALSE;
83 
84 	vp = ap->a_vp;
85 	ip = VTOI(vp);
86 	uio = ap->a_uio;
87 	error = 0;
88 
89 #ifdef DIAGNOSTIC
90 	if (uio->uio_rw != UIO_READ)
91 		panic("%s: mode", READ_S);
92 
93 	if (vp->v_type == VLNK) {
94 		if ((int)ip->i_ffs_size < vp->v_mount->mnt_maxsymlinklen ||
95 		    (vp->v_mount->mnt_maxsymlinklen == 0 &&
96 		     ip->i_ffs_blocks == 0))
97 			panic("%s: short symlink", READ_S);
98 	} else if (vp->v_type != VREG && vp->v_type != VDIR)
99 		panic("%s: type %d", READ_S, vp->v_type);
100 #endif
101 	fs = ip->I_FS;
102 	if ((u_int64_t)uio->uio_offset > fs->fs_maxfilesize)
103 		return (EFBIG);
104 	if (uio->uio_resid == 0)
105 		return (0);
106 	if (uio->uio_offset >= ip->i_ffs_size) {
107 		goto out;
108 	}
109 
110 #ifndef LFS_READWRITE
111 	usepc = vp->v_type == VREG;
112 #endif
113 	if (usepc) {
114 		while (uio->uio_resid > 0) {
115 			bytelen = MIN(ip->i_ffs_size - uio->uio_offset,
116 			    uio->uio_resid);
117 			if (bytelen == 0)
118 				break;
119 
120 			win = ubc_alloc(&vp->v_uobj, uio->uio_offset,
121 					&bytelen, UBC_READ);
122 			error = uiomove(win, bytelen, uio);
123 			ubc_release(win, 0);
124 			if (error)
125 				break;
126 		}
127 		goto out;
128 	}
129 
130 	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
131 		bytesinfile = ip->i_ffs_size - uio->uio_offset;
132 		if (bytesinfile <= 0)
133 			break;
134 		lbn = lblkno(fs, uio->uio_offset);
135 		nextlbn = lbn + 1;
136 		size = BLKSIZE(fs, ip, lbn);
137 		blkoffset = blkoff(fs, uio->uio_offset);
138 		xfersize = MIN(MIN(fs->fs_bsize - blkoffset, uio->uio_resid),
139 		    bytesinfile);
140 
141 		if (lblktosize(fs, nextlbn) >= ip->i_ffs_size)
142 			error = bread(vp, lbn, size, NOCRED, &bp);
143 		else {
144 			int nextsize = BLKSIZE(fs, ip, nextlbn);
145 			error = breadn(vp, lbn,
146 			    size, &nextlbn, &nextsize, 1, NOCRED, &bp);
147 		}
148 		if (error)
149 			break;
150 
151 		/*
152 		 * We should only get non-zero b_resid when an I/O error
153 		 * has occurred, which should cause us to break above.
154 		 * However, if the short read did not cause an error,
155 		 * then we want to ensure that we do not uiomove bad
156 		 * or uninitialized data.
157 		 */
158 		size -= bp->b_resid;
159 		if (size < xfersize) {
160 			if (size == 0)
161 				break;
162 			xfersize = size;
163 		}
164 		error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio);
165 		if (error)
166 			break;
167 		brelse(bp);
168 	}
169 	if (bp != NULL)
170 		brelse(bp);
171 
172  out:
173 	if (!(vp->v_mount->mnt_flag & MNT_NOATIME)) {
174 		ip->i_flag |= IN_ACCESS;
175 		if ((ap->a_ioflag & IO_SYNC) == IO_SYNC)
176 			error = VOP_UPDATE(vp, NULL, NULL, UPDATE_WAIT);
177 	}
178 	return (error);
179 }
180 
181 /*
182  * Vnode op for writing.
183  */
184 int
185 WRITE(void *v)
186 {
187 	struct vop_write_args /* {
188 		struct vnode *a_vp;
189 		struct uio *a_uio;
190 		int a_ioflag;
191 		struct ucred *a_cred;
192 	} */ *ap = v;
193 	struct vnode *vp;
194 	struct uio *uio;
195 	struct inode *ip;
196 	struct genfs_node *gp;
197 	FS *fs;
198 	struct buf *bp;
199 	struct proc *p;
200 	struct ucred *cred;
201 	ufs_daddr_t lbn;
202 	off_t osize, origoff, oldoff, preallocoff, endallocoff, nsize;
203 	int blkoffset, error, flags, ioflag, resid, size, xfersize;
204 	int bsize, aflag;
205 	int ubc_alloc_flags;
206 	void *win;
207 	vsize_t bytelen;
208 	boolean_t alloced;
209 	boolean_t usepc = FALSE;
210 
211 	cred = ap->a_cred;
212 	ioflag = ap->a_ioflag;
213 	uio = ap->a_uio;
214 	vp = ap->a_vp;
215 	ip = VTOI(vp);
216 	gp = VTOG(vp);
217 
218 	KASSERT(vp->v_size == ip->i_ffs_size);
219 #ifdef DIAGNOSTIC
220 	if (uio->uio_rw != UIO_WRITE)
221 		panic("%s: mode", WRITE_S);
222 #endif
223 
224 	switch (vp->v_type) {
225 	case VREG:
226 		if (ioflag & IO_APPEND)
227 			uio->uio_offset = ip->i_ffs_size;
228 		if ((ip->i_ffs_flags & APPEND) && uio->uio_offset != ip->i_ffs_size)
229 			return (EPERM);
230 		/* FALLTHROUGH */
231 	case VLNK:
232 		break;
233 	case VDIR:
234 		if ((ioflag & IO_SYNC) == 0)
235 			panic("%s: nonsync dir write", WRITE_S);
236 		break;
237 	default:
238 		panic("%s: type", WRITE_S);
239 	}
240 
241 	fs = ip->I_FS;
242 	if (uio->uio_offset < 0 ||
243 	    (u_int64_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize)
244 		return (EFBIG);
245 #ifdef LFS_READWRITE
246 	/* Disallow writes to the Ifile, even if noschg flag is removed */
247 	/* XXX can this go away when the Ifile is no longer in the namespace? */
248 	if (vp == fs->lfs_ivnode)
249 		return (EPERM);
250 #endif
251 
252 	/*
253 	 * Maybe this should be above the vnode op call, but so long as
254 	 * file servers have no limits, I don't think it matters.
255 	 */
256 	p = uio->uio_procp;
257 	if (vp->v_type == VREG && p &&
258 	    uio->uio_offset + uio->uio_resid >
259 	    p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
260 		psignal(p, SIGXFSZ);
261 		return (EFBIG);
262 	}
263 
264 	resid = uio->uio_resid;
265 	osize = ip->i_ffs_size;
266 	bsize = fs->fs_bsize;
267 	error = 0;
268 
269 #ifndef LFS_READWRITE
270 	usepc = vp->v_type == VREG;
271 #endif
272 	if (!usepc) {
273 		goto bcache;
274 	}
275 
276 	preallocoff = round_page(blkroundup(fs, MAX(osize, uio->uio_offset)));
277 	aflag = ioflag & IO_SYNC ? B_SYNC : 0;
278 	nsize = MAX(osize, uio->uio_offset + uio->uio_resid);
279 	endallocoff = nsize - blkoff(fs, nsize);
280 
281 	/*
282 	 * if we're increasing the file size, deal with expanding
283 	 * the fragment if there is one.
284 	 */
285 
286 	if (nsize > osize && lblkno(fs, osize) < NDADDR &&
287 	    lblkno(fs, osize) != lblkno(fs, nsize) &&
288 	    blkroundup(fs, osize) != osize) {
289 		error = ufs_balloc_range(vp, osize, blkroundup(fs, osize) -
290 		    osize, cred, aflag);
291 		if (error) {
292 			goto out;
293 		}
294 	}
295 
296 	alloced = FALSE;
297 	ubc_alloc_flags = UBC_WRITE;
298 	origoff = uio->uio_offset;
299 	while (uio->uio_resid > 0) {
300 		oldoff = uio->uio_offset;
301 		blkoffset = blkoff(fs, uio->uio_offset);
302 		bytelen = MIN(fs->fs_bsize - blkoffset, uio->uio_resid);
303 
304 		/*
305 		 * if we're filling in a hole, allocate the blocks now and
306 		 * initialize the pages first.  if we're extending the file,
307 		 * we can safely allocate blocks without initializing pages
308 		 * since the new blocks will be inaccessible until the write
309 		 * is complete.
310 		 */
311 
312 		if (uio->uio_offset < preallocoff ||
313 		    uio->uio_offset >= endallocoff) {
314 			error = ufs_balloc_range(vp, uio->uio_offset, bytelen,
315 			    cred, aflag);
316 			if (error) {
317 				break;
318 			}
319 			ubc_alloc_flags &= ~UBC_FAULTBUSY;
320 		} else if (!alloced) {
321 			lockmgr(&gp->g_glock, LK_EXCLUSIVE, NULL);
322 			error = GOP_ALLOC(vp, uio->uio_offset, uio->uio_resid,
323 			    aflag, cred);
324 			lockmgr(&gp->g_glock, LK_RELEASE, NULL);
325 			if (error) {
326 				(void) VOP_TRUNCATE(vp, preallocoff,
327 				    ioflag & IO_SYNC, ap->a_cred,
328 				    uio->uio_procp);
329 				break;
330 			}
331 			alloced = TRUE;
332 			ubc_alloc_flags |= UBC_FAULTBUSY;
333 		}
334 
335 		/*
336 		 * copy the data.
337 		 */
338 
339 		win = ubc_alloc(&vp->v_uobj, uio->uio_offset, &bytelen,
340 		    ubc_alloc_flags);
341 		error = uiomove(win, bytelen, uio);
342 		ubc_release(win, 0);
343 		if (error) {
344 			break;
345 		}
346 
347 		/*
348 		 * update UVM's notion of the size now that we've
349 		 * copied the data into the vnode's pages.
350 		 */
351 
352 		if (vp->v_size < uio->uio_offset) {
353 			uvm_vnp_setsize(vp, uio->uio_offset);
354 		}
355 
356 		/*
357 		 * flush what we just wrote if necessary.
358 		 * XXXUBC simplistic async flushing.
359 		 */
360 
361 		if (oldoff >> 16 != uio->uio_offset >> 16) {
362 			simple_lock(&vp->v_uobj.vmobjlock);
363 			error = (vp->v_uobj.pgops->pgo_put)(&vp->v_uobj,
364 			    (oldoff >> 16) << 16, (uio->uio_offset >> 16) << 16,
365 			    PGO_CLEANIT);
366 			if (error) {
367 				break;
368 			}
369 		}
370 	}
371 	if (error == 0 && ioflag & IO_SYNC) {
372 		simple_lock(&vp->v_uobj.vmobjlock);
373 		error = (vp->v_uobj.pgops->pgo_put)(&vp->v_uobj,
374 		    origoff & ~(bsize - 1), blkroundup(fs, uio->uio_offset),
375 		    PGO_CLEANIT|PGO_SYNCIO);
376 	}
377 	goto out;
378 
379  bcache:
380 	flags = ioflag & IO_SYNC ? B_SYNC : 0;
381 	while (uio->uio_resid > 0) {
382 		lbn = lblkno(fs, uio->uio_offset);
383 		blkoffset = blkoff(fs, uio->uio_offset);
384 		xfersize = MIN(fs->fs_bsize - blkoffset, uio->uio_resid);
385 		if (fs->fs_bsize > xfersize)
386 			flags |= B_CLRBUF;
387 		else
388 			flags &= ~B_CLRBUF;
389 
390 		error = VOP_BALLOC(vp, uio->uio_offset, xfersize,
391 		    ap->a_cred, flags, &bp);
392 
393 		if (error)
394 			break;
395 		if (uio->uio_offset + xfersize > ip->i_ffs_size) {
396 			ip->i_ffs_size = uio->uio_offset + xfersize;
397 			uvm_vnp_setsize(vp, ip->i_ffs_size);
398 		}
399 		size = BLKSIZE(fs, ip, lbn) - bp->b_resid;
400 		if (xfersize > size)
401 			xfersize = size;
402 
403 		error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio);
404 
405 		/*
406 		 * if we didn't clear the block and the uiomove failed,
407 		 * the buf will now contain part of some other file,
408 		 * so we need to invalidate it.
409 		 */
410 		if (error && (flags & B_CLRBUF) == 0) {
411 			bp->b_flags |= B_INVAL;
412 			brelse(bp);
413 			break;
414 		}
415 #ifdef LFS_READWRITE
416 		if (!error)
417 			error = lfs_reserve(fs, vp, btofsb(fs, (NIADDR + 1) << fs->lfs_bshift));
418 		(void)VOP_BWRITE(bp);
419 		if (!error)
420 			lfs_reserve(fs, vp, -btofsb(fs, (NIADDR + 1) << fs->lfs_bshift));
421 #else
422 		if (ioflag & IO_SYNC)
423 			(void)bwrite(bp);
424 		else if (xfersize + blkoffset == fs->fs_bsize)
425 			bawrite(bp);
426 		else
427 			bdwrite(bp);
428 #endif
429 		if (error || xfersize == 0)
430 			break;
431 	}
432 	/*
433 	 * If we successfully wrote any data, and we are not the superuser
434 	 * we clear the setuid and setgid bits as a precaution against
435 	 * tampering.
436 	 */
437 out:
438 	ip->i_flag |= IN_CHANGE | IN_UPDATE;
439 	if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0)
440 		ip->i_ffs_mode &= ~(ISUID | ISGID);
441 	if (error) {
442 		(void) VOP_TRUNCATE(vp, osize, ioflag & IO_SYNC, ap->a_cred,
443 		    uio->uio_procp);
444 		uio->uio_offset -= resid - uio->uio_resid;
445 		uio->uio_resid = resid;
446 	} else if (resid > uio->uio_resid && (ioflag & IO_SYNC) == IO_SYNC)
447 		error = VOP_UPDATE(vp, NULL, NULL, UPDATE_WAIT);
448 	KASSERT(vp->v_size == ip->i_ffs_size);
449 	return (error);
450 }
451