xref: /netbsd-src/sys/ufs/ffs/ffs_alloc.c (revision 6a493d6bc668897c91594964a732d38505b70cbb)
1 /*	$NetBSD: ffs_alloc.c,v 1.145 2013/11/12 03:29:22 dholland Exp $	*/
2 
3 /*-
4  * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Wasabi Systems, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Copyright (c) 2002 Networks Associates Technology, Inc.
34  * All rights reserved.
35  *
36  * This software was developed for the FreeBSD Project by Marshall
37  * Kirk McKusick and Network Associates Laboratories, the Security
38  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
39  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
40  * research program
41  *
42  * Copyright (c) 1982, 1986, 1989, 1993
43  *	The Regents of the University of California.  All rights reserved.
44  *
45  * Redistribution and use in source and binary forms, with or without
46  * modification, are permitted provided that the following conditions
47  * are met:
48  * 1. Redistributions of source code must retain the above copyright
49  *    notice, this list of conditions and the following disclaimer.
50  * 2. Redistributions in binary form must reproduce the above copyright
51  *    notice, this list of conditions and the following disclaimer in the
52  *    documentation and/or other materials provided with the distribution.
53  * 3. Neither the name of the University nor the names of its contributors
54  *    may be used to endorse or promote products derived from this software
55  *    without specific prior written permission.
56  *
57  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
58  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
59  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
60  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
61  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
62  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
63  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
64  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
65  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
66  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
67  * SUCH DAMAGE.
68  *
69  *	@(#)ffs_alloc.c	8.19 (Berkeley) 7/13/95
70  */
71 
72 #include <sys/cdefs.h>
73 __KERNEL_RCSID(0, "$NetBSD: ffs_alloc.c,v 1.145 2013/11/12 03:29:22 dholland Exp $");
74 
75 #if defined(_KERNEL_OPT)
76 #include "opt_ffs.h"
77 #include "opt_quota.h"
78 #include "opt_uvm_page_trkown.h"
79 #endif
80 
81 #include <sys/param.h>
82 #include <sys/systm.h>
83 #include <sys/buf.h>
84 #include <sys/cprng.h>
85 #include <sys/fstrans.h>
86 #include <sys/kauth.h>
87 #include <sys/kernel.h>
88 #include <sys/mount.h>
89 #include <sys/proc.h>
90 #include <sys/syslog.h>
91 #include <sys/vnode.h>
92 #include <sys/wapbl.h>
93 
94 #include <miscfs/specfs/specdev.h>
95 #include <ufs/ufs/quota.h>
96 #include <ufs/ufs/ufsmount.h>
97 #include <ufs/ufs/inode.h>
98 #include <ufs/ufs/ufs_extern.h>
99 #include <ufs/ufs/ufs_bswap.h>
100 #include <ufs/ufs/ufs_wapbl.h>
101 
102 #include <ufs/ffs/fs.h>
103 #include <ufs/ffs/ffs_extern.h>
104 
105 #ifdef UVM_PAGE_TRKOWN
106 #include <uvm/uvm.h>
107 #endif
108 
109 static daddr_t ffs_alloccg(struct inode *, int, daddr_t, int, int);
110 static daddr_t ffs_alloccgblk(struct inode *, struct buf *, daddr_t, int);
111 static ino_t ffs_dirpref(struct inode *);
112 static daddr_t ffs_fragextend(struct inode *, int, daddr_t, int, int);
113 static void ffs_fserr(struct fs *, u_int, const char *);
114 static daddr_t ffs_hashalloc(struct inode *, int, daddr_t, int, int,
115     daddr_t (*)(struct inode *, int, daddr_t, int, int));
116 static daddr_t ffs_nodealloccg(struct inode *, int, daddr_t, int, int);
117 static int32_t ffs_mapsearch(struct fs *, struct cg *,
118 				      daddr_t, int);
119 static void ffs_blkfree_common(struct ufsmount *, struct fs *, dev_t, struct buf *,
120     daddr_t, long, bool);
121 static void ffs_freefile_common(struct ufsmount *, struct fs *, dev_t, struct buf *, ino_t,
122     int, bool);
123 
124 /* if 1, changes in optimalization strategy are logged */
125 int ffs_log_changeopt = 0;
126 
127 /* in ffs_tables.c */
128 extern const int inside[], around[];
129 extern const u_char * const fragtbl[];
130 
131 /* Basic consistency check for block allocations */
132 static int
133 ffs_check_bad_allocation(const char *func, struct fs *fs, daddr_t bno,
134     long size, dev_t dev, ino_t inum)
135 {
136 	if ((u_int)size > fs->fs_bsize || ffs_fragoff(fs, size) != 0 ||
137 	    ffs_fragnum(fs, bno) + ffs_numfrags(fs, size) > fs->fs_frag) {
138 		printf("dev = 0x%llx, bno = %" PRId64 " bsize = %d, "
139 		    "size = %ld, fs = %s\n",
140 		    (long long)dev, bno, fs->fs_bsize, size, fs->fs_fsmnt);
141 		panic("%s: bad size", func);
142 	}
143 
144 	if (bno >= fs->fs_size) {
145 		printf("bad block %" PRId64 ", ino %llu\n", bno,
146 		    (unsigned long long)inum);
147 		ffs_fserr(fs, inum, "bad block");
148 		return EINVAL;
149 	}
150 	return 0;
151 }
152 
153 /*
154  * Allocate a block in the file system.
155  *
156  * The size of the requested block is given, which must be some
157  * multiple of fs_fsize and <= fs_bsize.
158  * A preference may be optionally specified. If a preference is given
159  * the following hierarchy is used to allocate a block:
160  *   1) allocate the requested block.
161  *   2) allocate a rotationally optimal block in the same cylinder.
162  *   3) allocate a block in the same cylinder group.
163  *   4) quadradically rehash into other cylinder groups, until an
164  *      available block is located.
165  * If no block preference is given the following hierarchy is used
166  * to allocate a block:
167  *   1) allocate a block in the cylinder group that contains the
168  *      inode for the file.
169  *   2) quadradically rehash into other cylinder groups, until an
170  *      available block is located.
171  *
172  * => called with um_lock held
173  * => releases um_lock before returning
174  */
175 int
176 ffs_alloc(struct inode *ip, daddr_t lbn, daddr_t bpref, int size, int flags,
177     kauth_cred_t cred, daddr_t *bnp)
178 {
179 	struct ufsmount *ump;
180 	struct fs *fs;
181 	daddr_t bno;
182 	int cg;
183 #if defined(QUOTA) || defined(QUOTA2)
184 	int error;
185 #endif
186 
187 	fs = ip->i_fs;
188 	ump = ip->i_ump;
189 
190 	KASSERT(mutex_owned(&ump->um_lock));
191 
192 #ifdef UVM_PAGE_TRKOWN
193 
194 	/*
195 	 * Sanity-check that allocations within the file size
196 	 * do not allow other threads to read the stale contents
197 	 * of newly allocated blocks.
198 	 * Usually pages will exist to cover the new allocation.
199 	 * There is an optimization in ffs_write() where we skip
200 	 * creating pages if several conditions are met:
201 	 *  - the file must not be mapped (in any user address space).
202 	 *  - the write must cover whole pages and whole blocks.
203 	 * If those conditions are not met then pages must exist and
204 	 * be locked by the current thread.
205 	 */
206 
207 	if (ITOV(ip)->v_type == VREG &&
208 	    ffs_lblktosize(fs, (voff_t)lbn) < round_page(ITOV(ip)->v_size)) {
209 		struct vm_page *pg;
210 		struct vnode *vp = ITOV(ip);
211 		struct uvm_object *uobj = &vp->v_uobj;
212 		voff_t off = trunc_page(ffs_lblktosize(fs, lbn));
213 		voff_t endoff = round_page(ffs_lblktosize(fs, lbn) + size);
214 
215 		mutex_enter(uobj->vmobjlock);
216 		while (off < endoff) {
217 			pg = uvm_pagelookup(uobj, off);
218 			KASSERT((pg == NULL && (vp->v_vflag & VV_MAPPED) == 0 &&
219 				 (size & PAGE_MASK) == 0 &&
220 				 ffs_blkoff(fs, size) == 0) ||
221 				(pg != NULL && pg->owner == curproc->p_pid &&
222 				 pg->lowner == curlwp->l_lid));
223 			off += PAGE_SIZE;
224 		}
225 		mutex_exit(uobj->vmobjlock);
226 	}
227 #endif
228 
229 	*bnp = 0;
230 #ifdef DIAGNOSTIC
231 	if ((u_int)size > fs->fs_bsize || ffs_fragoff(fs, size) != 0) {
232 		printf("dev = 0x%llx, bsize = %d, size = %d, fs = %s\n",
233 		    (unsigned long long)ip->i_dev, fs->fs_bsize, size,
234 		    fs->fs_fsmnt);
235 		panic("ffs_alloc: bad size");
236 	}
237 	if (cred == NOCRED)
238 		panic("ffs_alloc: missing credential");
239 #endif /* DIAGNOSTIC */
240 	if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
241 		goto nospace;
242 	if (freespace(fs, fs->fs_minfree) <= 0 &&
243 	    kauth_authorize_system(cred, KAUTH_SYSTEM_FS_RESERVEDSPACE, 0, NULL,
244 	    NULL, NULL) != 0)
245 		goto nospace;
246 #if defined(QUOTA) || defined(QUOTA2)
247 	mutex_exit(&ump->um_lock);
248 	if ((error = chkdq(ip, btodb(size), cred, 0)) != 0)
249 		return (error);
250 	mutex_enter(&ump->um_lock);
251 #endif
252 
253 	if (bpref >= fs->fs_size)
254 		bpref = 0;
255 	if (bpref == 0)
256 		cg = ino_to_cg(fs, ip->i_number);
257 	else
258 		cg = dtog(fs, bpref);
259 	bno = ffs_hashalloc(ip, cg, bpref, size, flags, ffs_alloccg);
260 	if (bno > 0) {
261 		DIP_ADD(ip, blocks, btodb(size));
262 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
263 		*bnp = bno;
264 		return (0);
265 	}
266 #if defined(QUOTA) || defined(QUOTA2)
267 	/*
268 	 * Restore user's disk quota because allocation failed.
269 	 */
270 	(void) chkdq(ip, -btodb(size), cred, FORCE);
271 #endif
272 	if (flags & B_CONTIG) {
273 		/*
274 		 * XXX ump->um_lock handling is "suspect" at best.
275 		 * For the case where ffs_hashalloc() fails early
276 		 * in the B_CONTIG case we reach here with um_lock
277 		 * already unlocked, so we can't release it again
278 		 * like in the normal error path.  See kern/39206.
279 		 *
280 		 *
281 		 * Fail silently - it's up to our caller to report
282 		 * errors.
283 		 */
284 		return (ENOSPC);
285 	}
286 nospace:
287 	mutex_exit(&ump->um_lock);
288 	ffs_fserr(fs, kauth_cred_geteuid(cred), "file system full");
289 	uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
290 	return (ENOSPC);
291 }
292 
293 /*
294  * Reallocate a fragment to a bigger size
295  *
296  * The number and size of the old block is given, and a preference
297  * and new size is also specified. The allocator attempts to extend
298  * the original block. Failing that, the regular block allocator is
299  * invoked to get an appropriate block.
300  *
301  * => called with um_lock held
302  * => return with um_lock released
303  */
304 int
305 ffs_realloccg(struct inode *ip, daddr_t lbprev, daddr_t bpref, int osize,
306     int nsize, kauth_cred_t cred, struct buf **bpp, daddr_t *blknop)
307 {
308 	struct ufsmount *ump;
309 	struct fs *fs;
310 	struct buf *bp;
311 	int cg, request, error;
312 	daddr_t bprev, bno;
313 
314 	fs = ip->i_fs;
315 	ump = ip->i_ump;
316 
317 	KASSERT(mutex_owned(&ump->um_lock));
318 
319 #ifdef UVM_PAGE_TRKOWN
320 
321 	/*
322 	 * Sanity-check that allocations within the file size
323 	 * do not allow other threads to read the stale contents
324 	 * of newly allocated blocks.
325 	 * Unlike in ffs_alloc(), here pages must always exist
326 	 * for such allocations, because only the last block of a file
327 	 * can be a fragment and ffs_write() will reallocate the
328 	 * fragment to the new size using ufs_balloc_range(),
329 	 * which always creates pages to cover blocks it allocates.
330 	 */
331 
332 	if (ITOV(ip)->v_type == VREG) {
333 		struct vm_page *pg;
334 		struct uvm_object *uobj = &ITOV(ip)->v_uobj;
335 		voff_t off = trunc_page(ffs_lblktosize(fs, lbprev));
336 		voff_t endoff = round_page(ffs_lblktosize(fs, lbprev) + osize);
337 
338 		mutex_enter(uobj->vmobjlock);
339 		while (off < endoff) {
340 			pg = uvm_pagelookup(uobj, off);
341 			KASSERT(pg->owner == curproc->p_pid &&
342 				pg->lowner == curlwp->l_lid);
343 			off += PAGE_SIZE;
344 		}
345 		mutex_exit(uobj->vmobjlock);
346 	}
347 #endif
348 
349 #ifdef DIAGNOSTIC
350 	if ((u_int)osize > fs->fs_bsize || ffs_fragoff(fs, osize) != 0 ||
351 	    (u_int)nsize > fs->fs_bsize || ffs_fragoff(fs, nsize) != 0) {
352 		printf(
353 		    "dev = 0x%llx, bsize = %d, osize = %d, nsize = %d, fs = %s\n",
354 		    (unsigned long long)ip->i_dev, fs->fs_bsize, osize, nsize,
355 		    fs->fs_fsmnt);
356 		panic("ffs_realloccg: bad size");
357 	}
358 	if (cred == NOCRED)
359 		panic("ffs_realloccg: missing credential");
360 #endif /* DIAGNOSTIC */
361 	if (freespace(fs, fs->fs_minfree) <= 0 &&
362 	    kauth_authorize_system(cred, KAUTH_SYSTEM_FS_RESERVEDSPACE, 0, NULL,
363 	    NULL, NULL) != 0) {
364 		mutex_exit(&ump->um_lock);
365 		goto nospace;
366 	}
367 	if (fs->fs_magic == FS_UFS2_MAGIC)
368 		bprev = ufs_rw64(ip->i_ffs2_db[lbprev], UFS_FSNEEDSWAP(fs));
369 	else
370 		bprev = ufs_rw32(ip->i_ffs1_db[lbprev], UFS_FSNEEDSWAP(fs));
371 
372 	if (bprev == 0) {
373 		printf("dev = 0x%llx, bsize = %d, bprev = %" PRId64 ", fs = %s\n",
374 		    (unsigned long long)ip->i_dev, fs->fs_bsize, bprev,
375 		    fs->fs_fsmnt);
376 		panic("ffs_realloccg: bad bprev");
377 	}
378 	mutex_exit(&ump->um_lock);
379 
380 	/*
381 	 * Allocate the extra space in the buffer.
382 	 */
383 	if (bpp != NULL &&
384 	    (error = bread(ITOV(ip), lbprev, osize, NOCRED, 0, &bp)) != 0) {
385 		return (error);
386 	}
387 #if defined(QUOTA) || defined(QUOTA2)
388 	if ((error = chkdq(ip, btodb(nsize - osize), cred, 0)) != 0) {
389 		if (bpp != NULL) {
390 			brelse(bp, 0);
391 		}
392 		return (error);
393 	}
394 #endif
395 	/*
396 	 * Check for extension in the existing location.
397 	 */
398 	cg = dtog(fs, bprev);
399 	mutex_enter(&ump->um_lock);
400 	if ((bno = ffs_fragextend(ip, cg, bprev, osize, nsize)) != 0) {
401 		DIP_ADD(ip, blocks, btodb(nsize - osize));
402 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
403 
404 		if (bpp != NULL) {
405 			if (bp->b_blkno != FFS_FSBTODB(fs, bno))
406 				panic("bad blockno");
407 			allocbuf(bp, nsize, 1);
408 			memset((char *)bp->b_data + osize, 0, nsize - osize);
409 			mutex_enter(bp->b_objlock);
410 			KASSERT(!cv_has_waiters(&bp->b_done));
411 			bp->b_oflags |= BO_DONE;
412 			mutex_exit(bp->b_objlock);
413 			*bpp = bp;
414 		}
415 		if (blknop != NULL) {
416 			*blknop = bno;
417 		}
418 		return (0);
419 	}
420 	/*
421 	 * Allocate a new disk location.
422 	 */
423 	if (bpref >= fs->fs_size)
424 		bpref = 0;
425 	switch ((int)fs->fs_optim) {
426 	case FS_OPTSPACE:
427 		/*
428 		 * Allocate an exact sized fragment. Although this makes
429 		 * best use of space, we will waste time relocating it if
430 		 * the file continues to grow. If the fragmentation is
431 		 * less than half of the minimum free reserve, we choose
432 		 * to begin optimizing for time.
433 		 */
434 		request = nsize;
435 		if (fs->fs_minfree < 5 ||
436 		    fs->fs_cstotal.cs_nffree >
437 		    fs->fs_dsize * fs->fs_minfree / (2 * 100))
438 			break;
439 
440 		if (ffs_log_changeopt) {
441 			log(LOG_NOTICE,
442 				"%s: optimization changed from SPACE to TIME\n",
443 				fs->fs_fsmnt);
444 		}
445 
446 		fs->fs_optim = FS_OPTTIME;
447 		break;
448 	case FS_OPTTIME:
449 		/*
450 		 * At this point we have discovered a file that is trying to
451 		 * grow a small fragment to a larger fragment. To save time,
452 		 * we allocate a full sized block, then free the unused portion.
453 		 * If the file continues to grow, the `ffs_fragextend' call
454 		 * above will be able to grow it in place without further
455 		 * copying. If aberrant programs cause disk fragmentation to
456 		 * grow within 2% of the free reserve, we choose to begin
457 		 * optimizing for space.
458 		 */
459 		request = fs->fs_bsize;
460 		if (fs->fs_cstotal.cs_nffree <
461 		    fs->fs_dsize * (fs->fs_minfree - 2) / 100)
462 			break;
463 
464 		if (ffs_log_changeopt) {
465 			log(LOG_NOTICE,
466 				"%s: optimization changed from TIME to SPACE\n",
467 				fs->fs_fsmnt);
468 		}
469 
470 		fs->fs_optim = FS_OPTSPACE;
471 		break;
472 	default:
473 		printf("dev = 0x%llx, optim = %d, fs = %s\n",
474 		    (unsigned long long)ip->i_dev, fs->fs_optim, fs->fs_fsmnt);
475 		panic("ffs_realloccg: bad optim");
476 		/* NOTREACHED */
477 	}
478 	bno = ffs_hashalloc(ip, cg, bpref, request, 0, ffs_alloccg);
479 	if (bno > 0) {
480 		if ((ip->i_ump->um_mountp->mnt_wapbl) &&
481 		    (ITOV(ip)->v_type != VREG)) {
482 			UFS_WAPBL_REGISTER_DEALLOCATION(
483 			    ip->i_ump->um_mountp, FFS_FSBTODB(fs, bprev),
484 			    osize);
485 		} else {
486 			ffs_blkfree(fs, ip->i_devvp, bprev, (long)osize,
487 			    ip->i_number);
488 		}
489 		if (nsize < request) {
490 			if ((ip->i_ump->um_mountp->mnt_wapbl) &&
491 			    (ITOV(ip)->v_type != VREG)) {
492 				UFS_WAPBL_REGISTER_DEALLOCATION(
493 				    ip->i_ump->um_mountp,
494 				    FFS_FSBTODB(fs, (bno + ffs_numfrags(fs, nsize))),
495 				    request - nsize);
496 			} else
497 				ffs_blkfree(fs, ip->i_devvp,
498 				    bno + ffs_numfrags(fs, nsize),
499 				    (long)(request - nsize), ip->i_number);
500 		}
501 		DIP_ADD(ip, blocks, btodb(nsize - osize));
502 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
503 		if (bpp != NULL) {
504 			bp->b_blkno = FFS_FSBTODB(fs, bno);
505 			allocbuf(bp, nsize, 1);
506 			memset((char *)bp->b_data + osize, 0, (u_int)nsize - osize);
507 			mutex_enter(bp->b_objlock);
508 			KASSERT(!cv_has_waiters(&bp->b_done));
509 			bp->b_oflags |= BO_DONE;
510 			mutex_exit(bp->b_objlock);
511 			*bpp = bp;
512 		}
513 		if (blknop != NULL) {
514 			*blknop = bno;
515 		}
516 		return (0);
517 	}
518 	mutex_exit(&ump->um_lock);
519 
520 #if defined(QUOTA) || defined(QUOTA2)
521 	/*
522 	 * Restore user's disk quota because allocation failed.
523 	 */
524 	(void) chkdq(ip, -btodb(nsize - osize), cred, FORCE);
525 #endif
526 	if (bpp != NULL) {
527 		brelse(bp, 0);
528 	}
529 
530 nospace:
531 	/*
532 	 * no space available
533 	 */
534 	ffs_fserr(fs, kauth_cred_geteuid(cred), "file system full");
535 	uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
536 	return (ENOSPC);
537 }
538 
539 /*
540  * Allocate an inode in the file system.
541  *
542  * If allocating a directory, use ffs_dirpref to select the inode.
543  * If allocating in a directory, the following hierarchy is followed:
544  *   1) allocate the preferred inode.
545  *   2) allocate an inode in the same cylinder group.
546  *   3) quadradically rehash into other cylinder groups, until an
547  *      available inode is located.
548  * If no inode preference is given the following hierarchy is used
549  * to allocate an inode:
550  *   1) allocate an inode in cylinder group 0.
551  *   2) quadradically rehash into other cylinder groups, until an
552  *      available inode is located.
553  *
554  * => um_lock not held upon entry or return
555  */
556 int
557 ffs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred,
558     struct vnode **vpp)
559 {
560 	struct ufsmount *ump;
561 	struct inode *pip;
562 	struct fs *fs;
563 	struct inode *ip;
564 	struct timespec ts;
565 	ino_t ino, ipref;
566 	int cg, error;
567 
568 	UFS_WAPBL_JUNLOCK_ASSERT(pvp->v_mount);
569 
570 	*vpp = NULL;
571 	pip = VTOI(pvp);
572 	fs = pip->i_fs;
573 	ump = pip->i_ump;
574 
575 	error = UFS_WAPBL_BEGIN(pvp->v_mount);
576 	if (error) {
577 		return error;
578 	}
579 	mutex_enter(&ump->um_lock);
580 	if (fs->fs_cstotal.cs_nifree == 0)
581 		goto noinodes;
582 
583 	if ((mode & IFMT) == IFDIR)
584 		ipref = ffs_dirpref(pip);
585 	else
586 		ipref = pip->i_number;
587 	if (ipref >= fs->fs_ncg * fs->fs_ipg)
588 		ipref = 0;
589 	cg = ino_to_cg(fs, ipref);
590 	/*
591 	 * Track number of dirs created one after another
592 	 * in a same cg without intervening by files.
593 	 */
594 	if ((mode & IFMT) == IFDIR) {
595 		if (fs->fs_contigdirs[cg] < 255)
596 			fs->fs_contigdirs[cg]++;
597 	} else {
598 		if (fs->fs_contigdirs[cg] > 0)
599 			fs->fs_contigdirs[cg]--;
600 	}
601 	ino = (ino_t)ffs_hashalloc(pip, cg, ipref, mode, 0, ffs_nodealloccg);
602 	if (ino == 0)
603 		goto noinodes;
604 	UFS_WAPBL_END(pvp->v_mount);
605 	error = VFS_VGET(pvp->v_mount, ino, vpp);
606 	if (error) {
607 		int err;
608 		err = UFS_WAPBL_BEGIN(pvp->v_mount);
609 		if (err == 0)
610 			ffs_vfree(pvp, ino, mode);
611 		if (err == 0)
612 			UFS_WAPBL_END(pvp->v_mount);
613 		return (error);
614 	}
615 	KASSERT((*vpp)->v_type == VNON);
616 	ip = VTOI(*vpp);
617 	if (ip->i_mode) {
618 #if 0
619 		printf("mode = 0%o, inum = %d, fs = %s\n",
620 		    ip->i_mode, ip->i_number, fs->fs_fsmnt);
621 #else
622 		printf("dmode %x mode %x dgen %x gen %x\n",
623 		    DIP(ip, mode), ip->i_mode,
624 		    DIP(ip, gen), ip->i_gen);
625 		printf("size %llx blocks %llx\n",
626 		    (long long)DIP(ip, size), (long long)DIP(ip, blocks));
627 		printf("ino %llu ipref %llu\n", (unsigned long long)ino,
628 		    (unsigned long long)ipref);
629 #if 0
630 		error = bread(ump->um_devvp, FFS_FSBTODB(fs, ino_to_fsba(fs, ino)),
631 		    (int)fs->fs_bsize, NOCRED, 0, &bp);
632 #endif
633 
634 #endif
635 		panic("ffs_valloc: dup alloc");
636 	}
637 	if (DIP(ip, blocks)) {				/* XXX */
638 		printf("free inode %llu on %s had %" PRId64 " blocks\n",
639 		    (unsigned long long)ino, fs->fs_fsmnt, DIP(ip, blocks));
640 		DIP_ASSIGN(ip, blocks, 0);
641 	}
642 	ip->i_flag &= ~IN_SPACECOUNTED;
643 	ip->i_flags = 0;
644 	DIP_ASSIGN(ip, flags, 0);
645 	/*
646 	 * Set up a new generation number for this inode.
647 	 */
648 	ip->i_gen++;
649 	DIP_ASSIGN(ip, gen, ip->i_gen);
650 	if (fs->fs_magic == FS_UFS2_MAGIC) {
651 		vfs_timestamp(&ts);
652 		ip->i_ffs2_birthtime = ts.tv_sec;
653 		ip->i_ffs2_birthnsec = ts.tv_nsec;
654 	}
655 	return (0);
656 noinodes:
657 	mutex_exit(&ump->um_lock);
658 	UFS_WAPBL_END(pvp->v_mount);
659 	ffs_fserr(fs, kauth_cred_geteuid(cred), "out of inodes");
660 	uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
661 	return (ENOSPC);
662 }
663 
664 /*
665  * Find a cylinder group in which to place a directory.
666  *
667  * The policy implemented by this algorithm is to allocate a
668  * directory inode in the same cylinder group as its parent
669  * directory, but also to reserve space for its files inodes
670  * and data. Restrict the number of directories which may be
671  * allocated one after another in the same cylinder group
672  * without intervening allocation of files.
673  *
674  * If we allocate a first level directory then force allocation
675  * in another cylinder group.
676  */
677 static ino_t
678 ffs_dirpref(struct inode *pip)
679 {
680 	register struct fs *fs;
681 	int cg, prefcg;
682 	int64_t dirsize, cgsize, curdsz;
683 	int avgifree, avgbfree, avgndir;
684 	int minifree, minbfree, maxndir;
685 	int mincg, minndir;
686 	int maxcontigdirs;
687 
688 	KASSERT(mutex_owned(&pip->i_ump->um_lock));
689 
690 	fs = pip->i_fs;
691 
692 	avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
693 	avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
694 	avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg;
695 
696 	/*
697 	 * Force allocation in another cg if creating a first level dir.
698 	 */
699 	if (ITOV(pip)->v_vflag & VV_ROOT) {
700 		prefcg = random() % fs->fs_ncg;
701 		mincg = prefcg;
702 		minndir = fs->fs_ipg;
703 		for (cg = prefcg; cg < fs->fs_ncg; cg++)
704 			if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
705 			    fs->fs_cs(fs, cg).cs_nifree >= avgifree &&
706 			    fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
707 				mincg = cg;
708 				minndir = fs->fs_cs(fs, cg).cs_ndir;
709 			}
710 		for (cg = 0; cg < prefcg; cg++)
711 			if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
712 			    fs->fs_cs(fs, cg).cs_nifree >= avgifree &&
713 			    fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
714 				mincg = cg;
715 				minndir = fs->fs_cs(fs, cg).cs_ndir;
716 			}
717 		return ((ino_t)(fs->fs_ipg * mincg));
718 	}
719 
720 	/*
721 	 * Count various limits which used for
722 	 * optimal allocation of a directory inode.
723 	 * Try cylinder groups with >75% avgifree and avgbfree.
724 	 * Avoid cylinder groups with no free blocks or inodes as that
725 	 * triggers an I/O-expensive cylinder group scan.
726 	 */
727 	maxndir = min(avgndir + fs->fs_ipg / 16, fs->fs_ipg);
728 	minifree = avgifree - avgifree / 4;
729 	if (minifree < 1)
730 		minifree = 1;
731 	minbfree = avgbfree - avgbfree / 4;
732 	if (minbfree < 1)
733 		minbfree = 1;
734 	cgsize = (int64_t)fs->fs_fsize * fs->fs_fpg;
735 	dirsize = (int64_t)fs->fs_avgfilesize * fs->fs_avgfpdir;
736 	if (avgndir != 0) {
737 		curdsz = (cgsize - (int64_t)avgbfree * fs->fs_bsize) / avgndir;
738 		if (dirsize < curdsz)
739 			dirsize = curdsz;
740 	}
741 	if (cgsize < dirsize * 255)
742 		maxcontigdirs = (avgbfree * fs->fs_bsize) / dirsize;
743 	else
744 		maxcontigdirs = 255;
745 	if (fs->fs_avgfpdir > 0)
746 		maxcontigdirs = min(maxcontigdirs,
747 				    fs->fs_ipg / fs->fs_avgfpdir);
748 	if (maxcontigdirs == 0)
749 		maxcontigdirs = 1;
750 
751 	/*
752 	 * Limit number of dirs in one cg and reserve space for
753 	 * regular files, but only if we have no deficit in
754 	 * inodes or space.
755 	 */
756 	prefcg = ino_to_cg(fs, pip->i_number);
757 	for (cg = prefcg; cg < fs->fs_ncg; cg++)
758 		if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
759 		    fs->fs_cs(fs, cg).cs_nifree >= minifree &&
760 	    	    fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
761 			if (fs->fs_contigdirs[cg] < maxcontigdirs)
762 				return ((ino_t)(fs->fs_ipg * cg));
763 		}
764 	for (cg = 0; cg < prefcg; cg++)
765 		if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
766 		    fs->fs_cs(fs, cg).cs_nifree >= minifree &&
767 	    	    fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
768 			if (fs->fs_contigdirs[cg] < maxcontigdirs)
769 				return ((ino_t)(fs->fs_ipg * cg));
770 		}
771 	/*
772 	 * This is a backstop when we are deficient in space.
773 	 */
774 	for (cg = prefcg; cg < fs->fs_ncg; cg++)
775 		if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
776 			return ((ino_t)(fs->fs_ipg * cg));
777 	for (cg = 0; cg < prefcg; cg++)
778 		if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
779 			break;
780 	return ((ino_t)(fs->fs_ipg * cg));
781 }
782 
783 /*
784  * Select the desired position for the next block in a file.  The file is
785  * logically divided into sections. The first section is composed of the
786  * direct blocks. Each additional section contains fs_maxbpg blocks.
787  *
788  * If no blocks have been allocated in the first section, the policy is to
789  * request a block in the same cylinder group as the inode that describes
790  * the file. If no blocks have been allocated in any other section, the
791  * policy is to place the section in a cylinder group with a greater than
792  * average number of free blocks.  An appropriate cylinder group is found
793  * by using a rotor that sweeps the cylinder groups. When a new group of
794  * blocks is needed, the sweep begins in the cylinder group following the
795  * cylinder group from which the previous allocation was made. The sweep
796  * continues until a cylinder group with greater than the average number
797  * of free blocks is found. If the allocation is for the first block in an
798  * indirect block, the information on the previous allocation is unavailable;
799  * here a best guess is made based upon the logical block number being
800  * allocated.
801  *
802  * If a section is already partially allocated, the policy is to
803  * contiguously allocate fs_maxcontig blocks.  The end of one of these
804  * contiguous blocks and the beginning of the next is laid out
805  * contigously if possible.
806  *
807  * => um_lock held on entry and exit
808  */
809 daddr_t
810 ffs_blkpref_ufs1(struct inode *ip, daddr_t lbn, int indx, int flags,
811     int32_t *bap /* XXX ondisk32 */)
812 {
813 	struct fs *fs;
814 	int cg;
815 	int avgbfree, startcg;
816 
817 	KASSERT(mutex_owned(&ip->i_ump->um_lock));
818 
819 	fs = ip->i_fs;
820 
821 	/*
822 	 * If allocating a contiguous file with B_CONTIG, use the hints
823 	 * in the inode extentions to return the desired block.
824 	 *
825 	 * For metadata (indirect blocks) return the address of where
826 	 * the first indirect block resides - we'll scan for the next
827 	 * available slot if we need to allocate more than one indirect
828 	 * block.  For data, return the address of the actual block
829 	 * relative to the address of the first data block.
830 	 */
831 	if (flags & B_CONTIG) {
832 		KASSERT(ip->i_ffs_first_data_blk != 0);
833 		KASSERT(ip->i_ffs_first_indir_blk != 0);
834 		if (flags & B_METAONLY)
835 			return ip->i_ffs_first_indir_blk;
836 		else
837 			return ip->i_ffs_first_data_blk + ffs_blkstofrags(fs, lbn);
838 	}
839 
840 	if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
841 		if (lbn < UFS_NDADDR + FFS_NINDIR(fs)) {
842 			cg = ino_to_cg(fs, ip->i_number);
843 			return (cgbase(fs, cg) + fs->fs_frag);
844 		}
845 		/*
846 		 * Find a cylinder with greater than average number of
847 		 * unused data blocks.
848 		 */
849 		if (indx == 0 || bap[indx - 1] == 0)
850 			startcg =
851 			    ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
852 		else
853 			startcg = dtog(fs,
854 				ufs_rw32(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + 1);
855 		startcg %= fs->fs_ncg;
856 		avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
857 		for (cg = startcg; cg < fs->fs_ncg; cg++)
858 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
859 				return (cgbase(fs, cg) + fs->fs_frag);
860 			}
861 		for (cg = 0; cg < startcg; cg++)
862 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
863 				return (cgbase(fs, cg) + fs->fs_frag);
864 			}
865 		return (0);
866 	}
867 	/*
868 	 * We just always try to lay things out contiguously.
869 	 */
870 	return ufs_rw32(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + fs->fs_frag;
871 }
872 
873 daddr_t
874 ffs_blkpref_ufs2(struct inode *ip, daddr_t lbn, int indx, int flags,
875     int64_t *bap)
876 {
877 	struct fs *fs;
878 	int cg;
879 	int avgbfree, startcg;
880 
881 	KASSERT(mutex_owned(&ip->i_ump->um_lock));
882 
883 	fs = ip->i_fs;
884 
885 	/*
886 	 * If allocating a contiguous file with B_CONTIG, use the hints
887 	 * in the inode extentions to return the desired block.
888 	 *
889 	 * For metadata (indirect blocks) return the address of where
890 	 * the first indirect block resides - we'll scan for the next
891 	 * available slot if we need to allocate more than one indirect
892 	 * block.  For data, return the address of the actual block
893 	 * relative to the address of the first data block.
894 	 */
895 	if (flags & B_CONTIG) {
896 		KASSERT(ip->i_ffs_first_data_blk != 0);
897 		KASSERT(ip->i_ffs_first_indir_blk != 0);
898 		if (flags & B_METAONLY)
899 			return ip->i_ffs_first_indir_blk;
900 		else
901 			return ip->i_ffs_first_data_blk + ffs_blkstofrags(fs, lbn);
902 	}
903 
904 	if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
905 		if (lbn < UFS_NDADDR + FFS_NINDIR(fs)) {
906 			cg = ino_to_cg(fs, ip->i_number);
907 			return (cgbase(fs, cg) + fs->fs_frag);
908 		}
909 		/*
910 		 * Find a cylinder with greater than average number of
911 		 * unused data blocks.
912 		 */
913 		if (indx == 0 || bap[indx - 1] == 0)
914 			startcg =
915 			    ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
916 		else
917 			startcg = dtog(fs,
918 				ufs_rw64(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + 1);
919 		startcg %= fs->fs_ncg;
920 		avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
921 		for (cg = startcg; cg < fs->fs_ncg; cg++)
922 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
923 				return (cgbase(fs, cg) + fs->fs_frag);
924 			}
925 		for (cg = 0; cg < startcg; cg++)
926 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
927 				return (cgbase(fs, cg) + fs->fs_frag);
928 			}
929 		return (0);
930 	}
931 	/*
932 	 * We just always try to lay things out contiguously.
933 	 */
934 	return ufs_rw64(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + fs->fs_frag;
935 }
936 
937 
938 /*
939  * Implement the cylinder overflow algorithm.
940  *
941  * The policy implemented by this algorithm is:
942  *   1) allocate the block in its requested cylinder group.
943  *   2) quadradically rehash on the cylinder group number.
944  *   3) brute force search for a free block.
945  *
946  * => called with um_lock held
947  * => returns with um_lock released on success, held on failure
948  *    (*allocator releases lock on success, retains lock on failure)
949  */
950 /*VARARGS5*/
951 static daddr_t
952 ffs_hashalloc(struct inode *ip, int cg, daddr_t pref,
953     int size /* size for data blocks, mode for inodes */,
954     int flags, daddr_t (*allocator)(struct inode *, int, daddr_t, int, int))
955 {
956 	struct fs *fs;
957 	daddr_t result;
958 	int i, icg = cg;
959 
960 	fs = ip->i_fs;
961 	/*
962 	 * 1: preferred cylinder group
963 	 */
964 	result = (*allocator)(ip, cg, pref, size, flags);
965 	if (result)
966 		return (result);
967 
968 	if (flags & B_CONTIG)
969 		return (result);
970 	/*
971 	 * 2: quadratic rehash
972 	 */
973 	for (i = 1; i < fs->fs_ncg; i *= 2) {
974 		cg += i;
975 		if (cg >= fs->fs_ncg)
976 			cg -= fs->fs_ncg;
977 		result = (*allocator)(ip, cg, 0, size, flags);
978 		if (result)
979 			return (result);
980 	}
981 	/*
982 	 * 3: brute force search
983 	 * Note that we start at i == 2, since 0 was checked initially,
984 	 * and 1 is always checked in the quadratic rehash.
985 	 */
986 	cg = (icg + 2) % fs->fs_ncg;
987 	for (i = 2; i < fs->fs_ncg; i++) {
988 		result = (*allocator)(ip, cg, 0, size, flags);
989 		if (result)
990 			return (result);
991 		cg++;
992 		if (cg == fs->fs_ncg)
993 			cg = 0;
994 	}
995 	return (0);
996 }
997 
998 /*
999  * Determine whether a fragment can be extended.
1000  *
1001  * Check to see if the necessary fragments are available, and
1002  * if they are, allocate them.
1003  *
1004  * => called with um_lock held
1005  * => returns with um_lock released on success, held on failure
1006  */
1007 static daddr_t
1008 ffs_fragextend(struct inode *ip, int cg, daddr_t bprev, int osize, int nsize)
1009 {
1010 	struct ufsmount *ump;
1011 	struct fs *fs;
1012 	struct cg *cgp;
1013 	struct buf *bp;
1014 	daddr_t bno;
1015 	int frags, bbase;
1016 	int i, error;
1017 	u_int8_t *blksfree;
1018 
1019 	fs = ip->i_fs;
1020 	ump = ip->i_ump;
1021 
1022 	KASSERT(mutex_owned(&ump->um_lock));
1023 
1024 	if (fs->fs_cs(fs, cg).cs_nffree < ffs_numfrags(fs, nsize - osize))
1025 		return (0);
1026 	frags = ffs_numfrags(fs, nsize);
1027 	bbase = ffs_fragnum(fs, bprev);
1028 	if (bbase > ffs_fragnum(fs, (bprev + frags - 1))) {
1029 		/* cannot extend across a block boundary */
1030 		return (0);
1031 	}
1032 	mutex_exit(&ump->um_lock);
1033 	error = bread(ip->i_devvp, FFS_FSBTODB(fs, cgtod(fs, cg)),
1034 		(int)fs->fs_cgsize, NOCRED, B_MODIFY, &bp);
1035 	if (error)
1036 		goto fail;
1037 	cgp = (struct cg *)bp->b_data;
1038 	if (!cg_chkmagic(cgp, UFS_FSNEEDSWAP(fs)))
1039 		goto fail;
1040 	cgp->cg_old_time = ufs_rw32(time_second, UFS_FSNEEDSWAP(fs));
1041 	if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1042 	    (fs->fs_old_flags & FS_FLAGS_UPDATED))
1043 		cgp->cg_time = ufs_rw64(time_second, UFS_FSNEEDSWAP(fs));
1044 	bno = dtogd(fs, bprev);
1045 	blksfree = cg_blksfree(cgp, UFS_FSNEEDSWAP(fs));
1046 	for (i = ffs_numfrags(fs, osize); i < frags; i++)
1047 		if (isclr(blksfree, bno + i))
1048 			goto fail;
1049 	/*
1050 	 * the current fragment can be extended
1051 	 * deduct the count on fragment being extended into
1052 	 * increase the count on the remaining fragment (if any)
1053 	 * allocate the extended piece
1054 	 */
1055 	for (i = frags; i < fs->fs_frag - bbase; i++)
1056 		if (isclr(blksfree, bno + i))
1057 			break;
1058 	ufs_add32(cgp->cg_frsum[i - ffs_numfrags(fs, osize)], -1, UFS_FSNEEDSWAP(fs));
1059 	if (i != frags)
1060 		ufs_add32(cgp->cg_frsum[i - frags], 1, UFS_FSNEEDSWAP(fs));
1061 	mutex_enter(&ump->um_lock);
1062 	for (i = ffs_numfrags(fs, osize); i < frags; i++) {
1063 		clrbit(blksfree, bno + i);
1064 		ufs_add32(cgp->cg_cs.cs_nffree, -1, UFS_FSNEEDSWAP(fs));
1065 		fs->fs_cstotal.cs_nffree--;
1066 		fs->fs_cs(fs, cg).cs_nffree--;
1067 	}
1068 	fs->fs_fmod = 1;
1069 	ACTIVECG_CLR(fs, cg);
1070 	mutex_exit(&ump->um_lock);
1071 	bdwrite(bp);
1072 	return (bprev);
1073 
1074  fail:
1075  	if (bp != NULL)
1076 		brelse(bp, 0);
1077  	mutex_enter(&ump->um_lock);
1078  	return (0);
1079 }
1080 
1081 /*
1082  * Determine whether a block can be allocated.
1083  *
1084  * Check to see if a block of the appropriate size is available,
1085  * and if it is, allocate it.
1086  */
1087 static daddr_t
1088 ffs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size, int flags)
1089 {
1090 	struct ufsmount *ump;
1091 	struct fs *fs = ip->i_fs;
1092 	struct cg *cgp;
1093 	struct buf *bp;
1094 	int32_t bno;
1095 	daddr_t blkno;
1096 	int error, frags, allocsiz, i;
1097 	u_int8_t *blksfree;
1098 	const int needswap = UFS_FSNEEDSWAP(fs);
1099 
1100 	ump = ip->i_ump;
1101 
1102 	KASSERT(mutex_owned(&ump->um_lock));
1103 
1104 	if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
1105 		return (0);
1106 	mutex_exit(&ump->um_lock);
1107 	error = bread(ip->i_devvp, FFS_FSBTODB(fs, cgtod(fs, cg)),
1108 		(int)fs->fs_cgsize, NOCRED, B_MODIFY, &bp);
1109 	if (error)
1110 		goto fail;
1111 	cgp = (struct cg *)bp->b_data;
1112 	if (!cg_chkmagic(cgp, needswap) ||
1113 	    (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize))
1114 		goto fail;
1115 	cgp->cg_old_time = ufs_rw32(time_second, needswap);
1116 	if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1117 	    (fs->fs_old_flags & FS_FLAGS_UPDATED))
1118 		cgp->cg_time = ufs_rw64(time_second, needswap);
1119 	if (size == fs->fs_bsize) {
1120 		mutex_enter(&ump->um_lock);
1121 		blkno = ffs_alloccgblk(ip, bp, bpref, flags);
1122 		ACTIVECG_CLR(fs, cg);
1123 		mutex_exit(&ump->um_lock);
1124 		bdwrite(bp);
1125 		return (blkno);
1126 	}
1127 	/*
1128 	 * check to see if any fragments are already available
1129 	 * allocsiz is the size which will be allocated, hacking
1130 	 * it down to a smaller size if necessary
1131 	 */
1132 	blksfree = cg_blksfree(cgp, needswap);
1133 	frags = ffs_numfrags(fs, size);
1134 	for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
1135 		if (cgp->cg_frsum[allocsiz] != 0)
1136 			break;
1137 	if (allocsiz == fs->fs_frag) {
1138 		/*
1139 		 * no fragments were available, so a block will be
1140 		 * allocated, and hacked up
1141 		 */
1142 		if (cgp->cg_cs.cs_nbfree == 0)
1143 			goto fail;
1144 		mutex_enter(&ump->um_lock);
1145 		blkno = ffs_alloccgblk(ip, bp, bpref, flags);
1146 		bno = dtogd(fs, blkno);
1147 		for (i = frags; i < fs->fs_frag; i++)
1148 			setbit(blksfree, bno + i);
1149 		i = fs->fs_frag - frags;
1150 		ufs_add32(cgp->cg_cs.cs_nffree, i, needswap);
1151 		fs->fs_cstotal.cs_nffree += i;
1152 		fs->fs_cs(fs, cg).cs_nffree += i;
1153 		fs->fs_fmod = 1;
1154 		ufs_add32(cgp->cg_frsum[i], 1, needswap);
1155 		ACTIVECG_CLR(fs, cg);
1156 		mutex_exit(&ump->um_lock);
1157 		bdwrite(bp);
1158 		return (blkno);
1159 	}
1160 	bno = ffs_mapsearch(fs, cgp, bpref, allocsiz);
1161 #if 0
1162 	/*
1163 	 * XXX fvdl mapsearch will panic, and never return -1
1164 	 *          also: returning NULL as daddr_t ?
1165 	 */
1166 	if (bno < 0)
1167 		goto fail;
1168 #endif
1169 	for (i = 0; i < frags; i++)
1170 		clrbit(blksfree, bno + i);
1171 	mutex_enter(&ump->um_lock);
1172 	ufs_add32(cgp->cg_cs.cs_nffree, -frags, needswap);
1173 	fs->fs_cstotal.cs_nffree -= frags;
1174 	fs->fs_cs(fs, cg).cs_nffree -= frags;
1175 	fs->fs_fmod = 1;
1176 	ufs_add32(cgp->cg_frsum[allocsiz], -1, needswap);
1177 	if (frags != allocsiz)
1178 		ufs_add32(cgp->cg_frsum[allocsiz - frags], 1, needswap);
1179 	blkno = cgbase(fs, cg) + bno;
1180 	ACTIVECG_CLR(fs, cg);
1181 	mutex_exit(&ump->um_lock);
1182 	bdwrite(bp);
1183 	return blkno;
1184 
1185  fail:
1186  	if (bp != NULL)
1187 		brelse(bp, 0);
1188  	mutex_enter(&ump->um_lock);
1189  	return (0);
1190 }
1191 
1192 /*
1193  * Allocate a block in a cylinder group.
1194  *
1195  * This algorithm implements the following policy:
1196  *   1) allocate the requested block.
1197  *   2) allocate a rotationally optimal block in the same cylinder.
1198  *   3) allocate the next available block on the block rotor for the
1199  *      specified cylinder group.
1200  * Note that this routine only allocates fs_bsize blocks; these
1201  * blocks may be fragmented by the routine that allocates them.
1202  */
1203 static daddr_t
1204 ffs_alloccgblk(struct inode *ip, struct buf *bp, daddr_t bpref, int flags)
1205 {
1206 	struct fs *fs = ip->i_fs;
1207 	struct cg *cgp;
1208 	int cg;
1209 	daddr_t blkno;
1210 	int32_t bno;
1211 	u_int8_t *blksfree;
1212 	const int needswap = UFS_FSNEEDSWAP(fs);
1213 
1214 	KASSERT(mutex_owned(&ip->i_ump->um_lock));
1215 
1216 	cgp = (struct cg *)bp->b_data;
1217 	blksfree = cg_blksfree(cgp, needswap);
1218 	if (bpref == 0 || dtog(fs, bpref) != ufs_rw32(cgp->cg_cgx, needswap)) {
1219 		bpref = ufs_rw32(cgp->cg_rotor, needswap);
1220 	} else {
1221 		bpref = ffs_blknum(fs, bpref);
1222 		bno = dtogd(fs, bpref);
1223 		/*
1224 		 * if the requested block is available, use it
1225 		 */
1226 		if (ffs_isblock(fs, blksfree, ffs_fragstoblks(fs, bno)))
1227 			goto gotit;
1228 		/*
1229 		 * if the requested data block isn't available and we are
1230 		 * trying to allocate a contiguous file, return an error.
1231 		 */
1232 		if ((flags & (B_CONTIG | B_METAONLY)) == B_CONTIG)
1233 			return (0);
1234 	}
1235 
1236 	/*
1237 	 * Take the next available block in this cylinder group.
1238 	 */
1239 	bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
1240 	if (bno < 0)
1241 		return (0);
1242 	cgp->cg_rotor = ufs_rw32(bno, needswap);
1243 gotit:
1244 	blkno = ffs_fragstoblks(fs, bno);
1245 	ffs_clrblock(fs, blksfree, blkno);
1246 	ffs_clusteracct(fs, cgp, blkno, -1);
1247 	ufs_add32(cgp->cg_cs.cs_nbfree, -1, needswap);
1248 	fs->fs_cstotal.cs_nbfree--;
1249 	fs->fs_cs(fs, ufs_rw32(cgp->cg_cgx, needswap)).cs_nbfree--;
1250 	if ((fs->fs_magic == FS_UFS1_MAGIC) &&
1251 	    ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0)) {
1252 		int cylno;
1253 		cylno = old_cbtocylno(fs, bno);
1254 		KASSERT(cylno >= 0);
1255 		KASSERT(cylno < fs->fs_old_ncyl);
1256 		KASSERT(old_cbtorpos(fs, bno) >= 0);
1257 		KASSERT(fs->fs_old_nrpos == 0 || old_cbtorpos(fs, bno) < fs->fs_old_nrpos);
1258 		ufs_add16(old_cg_blks(fs, cgp, cylno, needswap)[old_cbtorpos(fs, bno)], -1,
1259 		    needswap);
1260 		ufs_add32(old_cg_blktot(cgp, needswap)[cylno], -1, needswap);
1261 	}
1262 	fs->fs_fmod = 1;
1263 	cg = ufs_rw32(cgp->cg_cgx, needswap);
1264 	blkno = cgbase(fs, cg) + bno;
1265 	return (blkno);
1266 }
1267 
1268 /*
1269  * Determine whether an inode can be allocated.
1270  *
1271  * Check to see if an inode is available, and if it is,
1272  * allocate it using the following policy:
1273  *   1) allocate the requested inode.
1274  *   2) allocate the next available inode after the requested
1275  *      inode in the specified cylinder group.
1276  */
1277 static daddr_t
1278 ffs_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode, int flags)
1279 {
1280 	struct ufsmount *ump = ip->i_ump;
1281 	struct fs *fs = ip->i_fs;
1282 	struct cg *cgp;
1283 	struct buf *bp, *ibp;
1284 	u_int8_t *inosused;
1285 	int error, start, len, loc, map, i;
1286 	int32_t initediblk;
1287 	daddr_t nalloc;
1288 	struct ufs2_dinode *dp2;
1289 	const int needswap = UFS_FSNEEDSWAP(fs);
1290 
1291 	KASSERT(mutex_owned(&ump->um_lock));
1292 	UFS_WAPBL_JLOCK_ASSERT(ip->i_ump->um_mountp);
1293 
1294 	if (fs->fs_cs(fs, cg).cs_nifree == 0)
1295 		return (0);
1296 	mutex_exit(&ump->um_lock);
1297 	ibp = NULL;
1298 	initediblk = -1;
1299 retry:
1300 	error = bread(ip->i_devvp, FFS_FSBTODB(fs, cgtod(fs, cg)),
1301 		(int)fs->fs_cgsize, NOCRED, B_MODIFY, &bp);
1302 	if (error)
1303 		goto fail;
1304 	cgp = (struct cg *)bp->b_data;
1305 	if (!cg_chkmagic(cgp, needswap) || cgp->cg_cs.cs_nifree == 0)
1306 		goto fail;
1307 
1308 	if (ibp != NULL &&
1309 	    initediblk != ufs_rw32(cgp->cg_initediblk, needswap)) {
1310 		/* Another thread allocated more inodes so we retry the test. */
1311 		brelse(ibp, 0);
1312 		ibp = NULL;
1313 	}
1314 	/*
1315 	 * Check to see if we need to initialize more inodes.
1316 	 */
1317 	if (fs->fs_magic == FS_UFS2_MAGIC && ibp == NULL) {
1318 		initediblk = ufs_rw32(cgp->cg_initediblk, needswap);
1319 		nalloc = fs->fs_ipg - ufs_rw32(cgp->cg_cs.cs_nifree, needswap);
1320 		if (nalloc + FFS_INOPB(fs) > initediblk &&
1321 		    initediblk < ufs_rw32(cgp->cg_niblk, needswap)) {
1322 			/*
1323 			 * We have to release the cg buffer here to prevent
1324 			 * a deadlock when reading the inode block will
1325 			 * run a copy-on-write that might use this cg.
1326 			 */
1327 			brelse(bp, 0);
1328 			bp = NULL;
1329 			error = ffs_getblk(ip->i_devvp, FFS_FSBTODB(fs,
1330 			    ino_to_fsba(fs, cg * fs->fs_ipg + initediblk)),
1331 			    FFS_NOBLK, fs->fs_bsize, false, &ibp);
1332 			if (error)
1333 				goto fail;
1334 			goto retry;
1335 		}
1336 	}
1337 
1338 	cgp->cg_old_time = ufs_rw32(time_second, needswap);
1339 	if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1340 	    (fs->fs_old_flags & FS_FLAGS_UPDATED))
1341 		cgp->cg_time = ufs_rw64(time_second, needswap);
1342 	inosused = cg_inosused(cgp, needswap);
1343 	if (ipref) {
1344 		ipref %= fs->fs_ipg;
1345 		if (isclr(inosused, ipref))
1346 			goto gotit;
1347 	}
1348 	start = ufs_rw32(cgp->cg_irotor, needswap) / NBBY;
1349 	len = howmany(fs->fs_ipg - ufs_rw32(cgp->cg_irotor, needswap),
1350 		NBBY);
1351 	loc = skpc(0xff, len, &inosused[start]);
1352 	if (loc == 0) {
1353 		len = start + 1;
1354 		start = 0;
1355 		loc = skpc(0xff, len, &inosused[0]);
1356 		if (loc == 0) {
1357 			printf("cg = %d, irotor = %d, fs = %s\n",
1358 			    cg, ufs_rw32(cgp->cg_irotor, needswap),
1359 				fs->fs_fsmnt);
1360 			panic("ffs_nodealloccg: map corrupted");
1361 			/* NOTREACHED */
1362 		}
1363 	}
1364 	i = start + len - loc;
1365 	map = inosused[i] ^ 0xff;
1366 	if (map == 0) {
1367 		printf("fs = %s\n", fs->fs_fsmnt);
1368 		panic("ffs_nodealloccg: block not in map");
1369 	}
1370 	ipref = i * NBBY + ffs(map) - 1;
1371 	cgp->cg_irotor = ufs_rw32(ipref, needswap);
1372 gotit:
1373 	UFS_WAPBL_REGISTER_INODE(ip->i_ump->um_mountp, cg * fs->fs_ipg + ipref,
1374 	    mode);
1375 	/*
1376 	 * Check to see if we need to initialize more inodes.
1377 	 */
1378 	if (ibp != NULL) {
1379 		KASSERT(initediblk == ufs_rw32(cgp->cg_initediblk, needswap));
1380 		memset(ibp->b_data, 0, fs->fs_bsize);
1381 		dp2 = (struct ufs2_dinode *)(ibp->b_data);
1382 		for (i = 0; i < FFS_INOPB(fs); i++) {
1383 			/*
1384 			 * Don't bother to swap, it's supposed to be
1385 			 * random, after all.
1386 			 */
1387 			dp2->di_gen = (cprng_fast32() & INT32_MAX) / 2 + 1;
1388 			dp2++;
1389 		}
1390 		initediblk += FFS_INOPB(fs);
1391 		cgp->cg_initediblk = ufs_rw32(initediblk, needswap);
1392 	}
1393 
1394 	mutex_enter(&ump->um_lock);
1395 	ACTIVECG_CLR(fs, cg);
1396 	setbit(inosused, ipref);
1397 	ufs_add32(cgp->cg_cs.cs_nifree, -1, needswap);
1398 	fs->fs_cstotal.cs_nifree--;
1399 	fs->fs_cs(fs, cg).cs_nifree--;
1400 	fs->fs_fmod = 1;
1401 	if ((mode & IFMT) == IFDIR) {
1402 		ufs_add32(cgp->cg_cs.cs_ndir, 1, needswap);
1403 		fs->fs_cstotal.cs_ndir++;
1404 		fs->fs_cs(fs, cg).cs_ndir++;
1405 	}
1406 	mutex_exit(&ump->um_lock);
1407 	if (ibp != NULL) {
1408 		bwrite(bp);
1409 		bawrite(ibp);
1410 	} else
1411 		bdwrite(bp);
1412 	return (cg * fs->fs_ipg + ipref);
1413  fail:
1414 	if (bp != NULL)
1415 		brelse(bp, 0);
1416 	if (ibp != NULL)
1417 		brelse(ibp, 0);
1418 	mutex_enter(&ump->um_lock);
1419 	return (0);
1420 }
1421 
1422 /*
1423  * Allocate a block or fragment.
1424  *
1425  * The specified block or fragment is removed from the
1426  * free map, possibly fragmenting a block in the process.
1427  *
1428  * This implementation should mirror fs_blkfree
1429  *
1430  * => um_lock not held on entry or exit
1431  */
1432 int
1433 ffs_blkalloc(struct inode *ip, daddr_t bno, long size)
1434 {
1435 	int error;
1436 
1437 	error = ffs_check_bad_allocation(__func__, ip->i_fs, bno, size,
1438 	    ip->i_dev, ip->i_uid);
1439 	if (error)
1440 		return error;
1441 
1442 	return ffs_blkalloc_ump(ip->i_ump, bno, size);
1443 }
1444 
1445 int
1446 ffs_blkalloc_ump(struct ufsmount *ump, daddr_t bno, long size)
1447 {
1448 	struct fs *fs = ump->um_fs;
1449 	struct cg *cgp;
1450 	struct buf *bp;
1451 	int32_t fragno, cgbno;
1452 	int i, error, cg, blk, frags, bbase;
1453 	u_int8_t *blksfree;
1454 	const int needswap = UFS_FSNEEDSWAP(fs);
1455 
1456 	KASSERT((u_int)size <= fs->fs_bsize && ffs_fragoff(fs, size) == 0 &&
1457 	    ffs_fragnum(fs, bno) + ffs_numfrags(fs, size) <= fs->fs_frag);
1458 	KASSERT(bno < fs->fs_size);
1459 
1460 	cg = dtog(fs, bno);
1461 	error = bread(ump->um_devvp, FFS_FSBTODB(fs, cgtod(fs, cg)),
1462 		(int)fs->fs_cgsize, NOCRED, B_MODIFY, &bp);
1463 	if (error) {
1464 		return error;
1465 	}
1466 	cgp = (struct cg *)bp->b_data;
1467 	if (!cg_chkmagic(cgp, needswap)) {
1468 		brelse(bp, 0);
1469 		return EIO;
1470 	}
1471 	cgp->cg_old_time = ufs_rw32(time_second, needswap);
1472 	cgp->cg_time = ufs_rw64(time_second, needswap);
1473 	cgbno = dtogd(fs, bno);
1474 	blksfree = cg_blksfree(cgp, needswap);
1475 
1476 	mutex_enter(&ump->um_lock);
1477 	if (size == fs->fs_bsize) {
1478 		fragno = ffs_fragstoblks(fs, cgbno);
1479 		if (!ffs_isblock(fs, blksfree, fragno)) {
1480 			mutex_exit(&ump->um_lock);
1481 			brelse(bp, 0);
1482 			return EBUSY;
1483 		}
1484 		ffs_clrblock(fs, blksfree, fragno);
1485 		ffs_clusteracct(fs, cgp, fragno, -1);
1486 		ufs_add32(cgp->cg_cs.cs_nbfree, -1, needswap);
1487 		fs->fs_cstotal.cs_nbfree--;
1488 		fs->fs_cs(fs, cg).cs_nbfree--;
1489 	} else {
1490 		bbase = cgbno - ffs_fragnum(fs, cgbno);
1491 
1492 		frags = ffs_numfrags(fs, size);
1493 		for (i = 0; i < frags; i++) {
1494 			if (isclr(blksfree, cgbno + i)) {
1495 				mutex_exit(&ump->um_lock);
1496 				brelse(bp, 0);
1497 				return EBUSY;
1498 			}
1499 		}
1500 		/*
1501 		 * if a complete block is being split, account for it
1502 		 */
1503 		fragno = ffs_fragstoblks(fs, bbase);
1504 		if (ffs_isblock(fs, blksfree, fragno)) {
1505 			ufs_add32(cgp->cg_cs.cs_nffree, fs->fs_frag, needswap);
1506 			fs->fs_cstotal.cs_nffree += fs->fs_frag;
1507 			fs->fs_cs(fs, cg).cs_nffree += fs->fs_frag;
1508 			ffs_clusteracct(fs, cgp, fragno, -1);
1509 			ufs_add32(cgp->cg_cs.cs_nbfree, -1, needswap);
1510 			fs->fs_cstotal.cs_nbfree--;
1511 			fs->fs_cs(fs, cg).cs_nbfree--;
1512 		}
1513 		/*
1514 		 * decrement the counts associated with the old frags
1515 		 */
1516 		blk = blkmap(fs, blksfree, bbase);
1517 		ffs_fragacct(fs, blk, cgp->cg_frsum, -1, needswap);
1518 		/*
1519 		 * allocate the fragment
1520 		 */
1521 		for (i = 0; i < frags; i++) {
1522 			clrbit(blksfree, cgbno + i);
1523 		}
1524 		ufs_add32(cgp->cg_cs.cs_nffree, -i, needswap);
1525 		fs->fs_cstotal.cs_nffree -= i;
1526 		fs->fs_cs(fs, cg).cs_nffree -= i;
1527 		/*
1528 		 * add back in counts associated with the new frags
1529 		 */
1530 		blk = blkmap(fs, blksfree, bbase);
1531 		ffs_fragacct(fs, blk, cgp->cg_frsum, 1, needswap);
1532 	}
1533 	fs->fs_fmod = 1;
1534 	ACTIVECG_CLR(fs, cg);
1535 	mutex_exit(&ump->um_lock);
1536 	bdwrite(bp);
1537 	return 0;
1538 }
1539 
1540 /*
1541  * Free a block or fragment.
1542  *
1543  * The specified block or fragment is placed back in the
1544  * free map. If a fragment is deallocated, a possible
1545  * block reassembly is checked.
1546  *
1547  * => um_lock not held on entry or exit
1548  */
1549 static void
1550 ffs_blkfree_cg(struct fs *fs, struct vnode *devvp, daddr_t bno, long size)
1551 {
1552 	struct cg *cgp;
1553 	struct buf *bp;
1554 	struct ufsmount *ump;
1555 	daddr_t cgblkno;
1556 	int error, cg;
1557 	dev_t dev;
1558 	const bool devvp_is_snapshot = (devvp->v_type != VBLK);
1559 	const int needswap = UFS_FSNEEDSWAP(fs);
1560 
1561 	KASSERT(!devvp_is_snapshot);
1562 
1563 	cg = dtog(fs, bno);
1564 	dev = devvp->v_rdev;
1565 	ump = VFSTOUFS(spec_node_getmountedfs(devvp));
1566 	KASSERT(fs == ump->um_fs);
1567 	cgblkno = FFS_FSBTODB(fs, cgtod(fs, cg));
1568 
1569 	error = bread(devvp, cgblkno, (int)fs->fs_cgsize,
1570 	    NOCRED, B_MODIFY, &bp);
1571 	if (error) {
1572 		return;
1573 	}
1574 	cgp = (struct cg *)bp->b_data;
1575 	if (!cg_chkmagic(cgp, needswap)) {
1576 		brelse(bp, 0);
1577 		return;
1578 	}
1579 
1580 	ffs_blkfree_common(ump, fs, dev, bp, bno, size, devvp_is_snapshot);
1581 
1582 	bdwrite(bp);
1583 }
1584 
1585 struct discardopdata {
1586 	struct work wk; /* must be first */
1587 	struct vnode *devvp;
1588 	daddr_t bno;
1589 	long size;
1590 };
1591 
1592 struct discarddata {
1593 	struct fs *fs;
1594 	struct discardopdata *entry;
1595 	long maxsize;
1596 	kmutex_t entrylk;
1597 	struct workqueue *wq;
1598 	int wqcnt, wqdraining;
1599 	kmutex_t wqlk;
1600 	kcondvar_t wqcv;
1601 	/* timer for flush? */
1602 };
1603 
1604 static void
1605 ffs_blkfree_td(struct fs *fs, struct discardopdata *td)
1606 {
1607 	long todo;
1608 
1609 	while (td->size) {
1610 		todo = min(td->size,
1611 		  ffs_lfragtosize(fs, (fs->fs_frag - ffs_fragnum(fs, td->bno))));
1612 		ffs_blkfree_cg(fs, td->devvp, td->bno, todo);
1613 		td->bno += ffs_numfrags(fs, todo);
1614 		td->size -= todo;
1615 	}
1616 }
1617 
1618 static void
1619 ffs_discardcb(struct work *wk, void *arg)
1620 {
1621 	struct discardopdata *td = (void *)wk;
1622 	struct discarddata *ts = arg;
1623 	struct fs *fs = ts->fs;
1624 	struct disk_discard_range ta;
1625 #ifdef TRIMDEBUG
1626 	int error;
1627 #endif
1628 
1629 	ta.bno = FFS_FSBTODB(fs, td->bno);
1630 	ta.size = td->size >> DEV_BSHIFT;
1631 #ifdef TRIMDEBUG
1632 	error =
1633 #endif
1634 		VOP_IOCTL(td->devvp, DIOCDISCARD, &ta, FWRITE, FSCRED);
1635 #ifdef TRIMDEBUG
1636 	printf("trim(%" PRId64 ",%ld):%d\n", td->bno, td->size, error);
1637 #endif
1638 
1639 	ffs_blkfree_td(fs, td);
1640 	kmem_free(td, sizeof(*td));
1641 	mutex_enter(&ts->wqlk);
1642 	ts->wqcnt--;
1643 	if (ts->wqdraining && !ts->wqcnt)
1644 		cv_signal(&ts->wqcv);
1645 	mutex_exit(&ts->wqlk);
1646 }
1647 
1648 void *
1649 ffs_discard_init(struct vnode *devvp, struct fs *fs)
1650 {
1651 	struct disk_discard_params tp;
1652 	struct discarddata *ts;
1653 	int error;
1654 
1655 	error = VOP_IOCTL(devvp, DIOCGDISCARDPARAMS, &tp, FREAD, FSCRED);
1656 	if (error) {
1657 		printf("DIOCGDISCARDPARAMS: %d\n", error);
1658 		return NULL;
1659 	}
1660 	if (tp.maxsize * DEV_BSIZE < fs->fs_bsize) {
1661 		printf("tp.maxsize=%ld, fs_bsize=%d\n", tp.maxsize, fs->fs_bsize);
1662 		return NULL;
1663 	}
1664 
1665 	ts = kmem_zalloc(sizeof (*ts), KM_SLEEP);
1666 	error = workqueue_create(&ts->wq, "trimwq", ffs_discardcb, ts,
1667 				 0, 0, 0);
1668 	if (error) {
1669 		kmem_free(ts, sizeof (*ts));
1670 		return NULL;
1671 	}
1672 	mutex_init(&ts->entrylk, MUTEX_DEFAULT, IPL_NONE);
1673 	mutex_init(&ts->wqlk, MUTEX_DEFAULT, IPL_NONE);
1674 	cv_init(&ts->wqcv, "trimwqcv");
1675 	ts->maxsize = max(tp.maxsize * DEV_BSIZE, 100*1024); /* XXX */
1676 	ts->fs = fs;
1677 	return ts;
1678 }
1679 
1680 void
1681 ffs_discard_finish(void *vts, int flags)
1682 {
1683 	struct discarddata *ts = vts;
1684 	struct discardopdata *td = NULL;
1685 	int res = 0;
1686 
1687 	/* wait for workqueue to drain */
1688 	mutex_enter(&ts->wqlk);
1689 	if (ts->wqcnt) {
1690 		ts->wqdraining = 1;
1691 		res = cv_timedwait(&ts->wqcv, &ts->wqlk, mstohz(5000));
1692 	}
1693 	mutex_exit(&ts->wqlk);
1694 	if (res)
1695 		printf("ffs_discarddata drain timeout\n");
1696 
1697 	mutex_enter(&ts->entrylk);
1698 	if (ts->entry) {
1699 		td = ts->entry;
1700 		ts->entry = NULL;
1701 	}
1702 	mutex_exit(&ts->entrylk);
1703 	if (td) {
1704 		/* XXX don't tell disk, its optional */
1705 		ffs_blkfree_td(ts->fs, td);
1706 #ifdef TRIMDEBUG
1707 		printf("finish(%" PRId64 ",%ld)\n", td->bno, td->size);
1708 #endif
1709 		kmem_free(td, sizeof(*td));
1710 	}
1711 
1712 	cv_destroy(&ts->wqcv);
1713 	mutex_destroy(&ts->entrylk);
1714 	mutex_destroy(&ts->wqlk);
1715 	workqueue_destroy(ts->wq);
1716 	kmem_free(ts, sizeof(*ts));
1717 }
1718 
1719 void
1720 ffs_blkfree(struct fs *fs, struct vnode *devvp, daddr_t bno, long size,
1721     ino_t inum)
1722 {
1723 	struct ufsmount *ump;
1724 	int error;
1725 	dev_t dev;
1726 	struct discarddata *ts;
1727 	struct discardopdata *td;
1728 
1729 	dev = devvp->v_rdev;
1730 	ump = VFSTOUFS(spec_node_getmountedfs(devvp));
1731 	if (ffs_snapblkfree(fs, devvp, bno, size, inum))
1732 		return;
1733 
1734 	error = ffs_check_bad_allocation(__func__, fs, bno, size, dev, inum);
1735 	if (error)
1736 		return;
1737 
1738 	if (!ump->um_discarddata) {
1739 		ffs_blkfree_cg(fs, devvp, bno, size);
1740 		return;
1741 	}
1742 
1743 #ifdef TRIMDEBUG
1744 	printf("blkfree(%" PRId64 ",%ld)\n", bno, size);
1745 #endif
1746 	ts = ump->um_discarddata;
1747 	td = NULL;
1748 
1749 	mutex_enter(&ts->entrylk);
1750 	if (ts->entry) {
1751 		td = ts->entry;
1752 		/* ffs deallocs backwards, check for prepend only */
1753 		if (td->bno == bno + ffs_numfrags(fs, size)
1754 		    && td->size + size <= ts->maxsize) {
1755 			td->bno = bno;
1756 			td->size += size;
1757 			if (td->size < ts->maxsize) {
1758 #ifdef TRIMDEBUG
1759 				printf("defer(%" PRId64 ",%ld)\n", td->bno, td->size);
1760 #endif
1761 				mutex_exit(&ts->entrylk);
1762 				return;
1763 			}
1764 			size = 0; /* mark done */
1765 		}
1766 		ts->entry = NULL;
1767 	}
1768 	mutex_exit(&ts->entrylk);
1769 
1770 	if (td) {
1771 #ifdef TRIMDEBUG
1772 		printf("enq old(%" PRId64 ",%ld)\n", td->bno, td->size);
1773 #endif
1774 		mutex_enter(&ts->wqlk);
1775 		ts->wqcnt++;
1776 		mutex_exit(&ts->wqlk);
1777 		workqueue_enqueue(ts->wq, &td->wk, NULL);
1778 	}
1779 	if (!size)
1780 		return;
1781 
1782 	td = kmem_alloc(sizeof(*td), KM_SLEEP);
1783 	td->devvp = devvp;
1784 	td->bno = bno;
1785 	td->size = size;
1786 
1787 	if (td->size < ts->maxsize) { /* XXX always the case */
1788 		mutex_enter(&ts->entrylk);
1789 		if (!ts->entry) { /* possible race? */
1790 #ifdef TRIMDEBUG
1791 			printf("defer(%" PRId64 ",%ld)\n", td->bno, td->size);
1792 #endif
1793 			ts->entry = td;
1794 			td = NULL;
1795 		}
1796 		mutex_exit(&ts->entrylk);
1797 	}
1798 	if (td) {
1799 #ifdef TRIMDEBUG
1800 		printf("enq new(%" PRId64 ",%ld)\n", td->bno, td->size);
1801 #endif
1802 		mutex_enter(&ts->wqlk);
1803 		ts->wqcnt++;
1804 		mutex_exit(&ts->wqlk);
1805 		workqueue_enqueue(ts->wq, &td->wk, NULL);
1806 	}
1807 }
1808 
1809 /*
1810  * Free a block or fragment from a snapshot cg copy.
1811  *
1812  * The specified block or fragment is placed back in the
1813  * free map. If a fragment is deallocated, a possible
1814  * block reassembly is checked.
1815  *
1816  * => um_lock not held on entry or exit
1817  */
1818 void
1819 ffs_blkfree_snap(struct fs *fs, struct vnode *devvp, daddr_t bno, long size,
1820     ino_t inum)
1821 {
1822 	struct cg *cgp;
1823 	struct buf *bp;
1824 	struct ufsmount *ump;
1825 	daddr_t cgblkno;
1826 	int error, cg;
1827 	dev_t dev;
1828 	const bool devvp_is_snapshot = (devvp->v_type != VBLK);
1829 	const int needswap = UFS_FSNEEDSWAP(fs);
1830 
1831 	KASSERT(devvp_is_snapshot);
1832 
1833 	cg = dtog(fs, bno);
1834 	dev = VTOI(devvp)->i_devvp->v_rdev;
1835 	ump = VFSTOUFS(devvp->v_mount);
1836 	cgblkno = ffs_fragstoblks(fs, cgtod(fs, cg));
1837 
1838 	error = ffs_check_bad_allocation(__func__, fs, bno, size, dev, inum);
1839 	if (error)
1840 		return;
1841 
1842 	error = bread(devvp, cgblkno, (int)fs->fs_cgsize,
1843 	    NOCRED, B_MODIFY, &bp);
1844 	if (error) {
1845 		return;
1846 	}
1847 	cgp = (struct cg *)bp->b_data;
1848 	if (!cg_chkmagic(cgp, needswap)) {
1849 		brelse(bp, 0);
1850 		return;
1851 	}
1852 
1853 	ffs_blkfree_common(ump, fs, dev, bp, bno, size, devvp_is_snapshot);
1854 
1855 	bdwrite(bp);
1856 }
1857 
1858 static void
1859 ffs_blkfree_common(struct ufsmount *ump, struct fs *fs, dev_t dev,
1860     struct buf *bp, daddr_t bno, long size, bool devvp_is_snapshot)
1861 {
1862 	struct cg *cgp;
1863 	int32_t fragno, cgbno;
1864 	int i, cg, blk, frags, bbase;
1865 	u_int8_t *blksfree;
1866 	const int needswap = UFS_FSNEEDSWAP(fs);
1867 
1868 	cg = dtog(fs, bno);
1869 	cgp = (struct cg *)bp->b_data;
1870 	cgp->cg_old_time = ufs_rw32(time_second, needswap);
1871 	if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1872 	    (fs->fs_old_flags & FS_FLAGS_UPDATED))
1873 		cgp->cg_time = ufs_rw64(time_second, needswap);
1874 	cgbno = dtogd(fs, bno);
1875 	blksfree = cg_blksfree(cgp, needswap);
1876 	mutex_enter(&ump->um_lock);
1877 	if (size == fs->fs_bsize) {
1878 		fragno = ffs_fragstoblks(fs, cgbno);
1879 		if (!ffs_isfreeblock(fs, blksfree, fragno)) {
1880 			if (devvp_is_snapshot) {
1881 				mutex_exit(&ump->um_lock);
1882 				return;
1883 			}
1884 			printf("dev = 0x%llx, block = %" PRId64 ", fs = %s\n",
1885 			    (unsigned long long)dev, bno, fs->fs_fsmnt);
1886 			panic("blkfree: freeing free block");
1887 		}
1888 		ffs_setblock(fs, blksfree, fragno);
1889 		ffs_clusteracct(fs, cgp, fragno, 1);
1890 		ufs_add32(cgp->cg_cs.cs_nbfree, 1, needswap);
1891 		fs->fs_cstotal.cs_nbfree++;
1892 		fs->fs_cs(fs, cg).cs_nbfree++;
1893 		if ((fs->fs_magic == FS_UFS1_MAGIC) &&
1894 		    ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0)) {
1895 			i = old_cbtocylno(fs, cgbno);
1896 			KASSERT(i >= 0);
1897 			KASSERT(i < fs->fs_old_ncyl);
1898 			KASSERT(old_cbtorpos(fs, cgbno) >= 0);
1899 			KASSERT(fs->fs_old_nrpos == 0 || old_cbtorpos(fs, cgbno) < fs->fs_old_nrpos);
1900 			ufs_add16(old_cg_blks(fs, cgp, i, needswap)[old_cbtorpos(fs, cgbno)], 1,
1901 			    needswap);
1902 			ufs_add32(old_cg_blktot(cgp, needswap)[i], 1, needswap);
1903 		}
1904 	} else {
1905 		bbase = cgbno - ffs_fragnum(fs, cgbno);
1906 		/*
1907 		 * decrement the counts associated with the old frags
1908 		 */
1909 		blk = blkmap(fs, blksfree, bbase);
1910 		ffs_fragacct(fs, blk, cgp->cg_frsum, -1, needswap);
1911 		/*
1912 		 * deallocate the fragment
1913 		 */
1914 		frags = ffs_numfrags(fs, size);
1915 		for (i = 0; i < frags; i++) {
1916 			if (isset(blksfree, cgbno + i)) {
1917 				printf("dev = 0x%llx, block = %" PRId64
1918 				       ", fs = %s\n",
1919 				    (unsigned long long)dev, bno + i,
1920 				    fs->fs_fsmnt);
1921 				panic("blkfree: freeing free frag");
1922 			}
1923 			setbit(blksfree, cgbno + i);
1924 		}
1925 		ufs_add32(cgp->cg_cs.cs_nffree, i, needswap);
1926 		fs->fs_cstotal.cs_nffree += i;
1927 		fs->fs_cs(fs, cg).cs_nffree += i;
1928 		/*
1929 		 * add back in counts associated with the new frags
1930 		 */
1931 		blk = blkmap(fs, blksfree, bbase);
1932 		ffs_fragacct(fs, blk, cgp->cg_frsum, 1, needswap);
1933 		/*
1934 		 * if a complete block has been reassembled, account for it
1935 		 */
1936 		fragno = ffs_fragstoblks(fs, bbase);
1937 		if (ffs_isblock(fs, blksfree, fragno)) {
1938 			ufs_add32(cgp->cg_cs.cs_nffree, -fs->fs_frag, needswap);
1939 			fs->fs_cstotal.cs_nffree -= fs->fs_frag;
1940 			fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
1941 			ffs_clusteracct(fs, cgp, fragno, 1);
1942 			ufs_add32(cgp->cg_cs.cs_nbfree, 1, needswap);
1943 			fs->fs_cstotal.cs_nbfree++;
1944 			fs->fs_cs(fs, cg).cs_nbfree++;
1945 			if ((fs->fs_magic == FS_UFS1_MAGIC) &&
1946 			    ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0)) {
1947 				i = old_cbtocylno(fs, bbase);
1948 				KASSERT(i >= 0);
1949 				KASSERT(i < fs->fs_old_ncyl);
1950 				KASSERT(old_cbtorpos(fs, bbase) >= 0);
1951 				KASSERT(fs->fs_old_nrpos == 0 || old_cbtorpos(fs, bbase) < fs->fs_old_nrpos);
1952 				ufs_add16(old_cg_blks(fs, cgp, i, needswap)[old_cbtorpos(fs,
1953 				    bbase)], 1, needswap);
1954 				ufs_add32(old_cg_blktot(cgp, needswap)[i], 1, needswap);
1955 			}
1956 		}
1957 	}
1958 	fs->fs_fmod = 1;
1959 	ACTIVECG_CLR(fs, cg);
1960 	mutex_exit(&ump->um_lock);
1961 }
1962 
1963 /*
1964  * Free an inode.
1965  */
1966 int
1967 ffs_vfree(struct vnode *vp, ino_t ino, int mode)
1968 {
1969 
1970 	return ffs_freefile(vp->v_mount, ino, mode);
1971 }
1972 
1973 /*
1974  * Do the actual free operation.
1975  * The specified inode is placed back in the free map.
1976  *
1977  * => um_lock not held on entry or exit
1978  */
1979 int
1980 ffs_freefile(struct mount *mp, ino_t ino, int mode)
1981 {
1982 	struct ufsmount *ump = VFSTOUFS(mp);
1983 	struct fs *fs = ump->um_fs;
1984 	struct vnode *devvp;
1985 	struct cg *cgp;
1986 	struct buf *bp;
1987 	int error, cg;
1988 	daddr_t cgbno;
1989 	dev_t dev;
1990 	const int needswap = UFS_FSNEEDSWAP(fs);
1991 
1992 	cg = ino_to_cg(fs, ino);
1993 	devvp = ump->um_devvp;
1994 	dev = devvp->v_rdev;
1995 	cgbno = FFS_FSBTODB(fs, cgtod(fs, cg));
1996 
1997 	if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
1998 		panic("ifree: range: dev = 0x%llx, ino = %llu, fs = %s",
1999 		    (long long)dev, (unsigned long long)ino, fs->fs_fsmnt);
2000 	error = bread(devvp, cgbno, (int)fs->fs_cgsize,
2001 	    NOCRED, B_MODIFY, &bp);
2002 	if (error) {
2003 		return (error);
2004 	}
2005 	cgp = (struct cg *)bp->b_data;
2006 	if (!cg_chkmagic(cgp, needswap)) {
2007 		brelse(bp, 0);
2008 		return (0);
2009 	}
2010 
2011 	ffs_freefile_common(ump, fs, dev, bp, ino, mode, false);
2012 
2013 	bdwrite(bp);
2014 
2015 	return 0;
2016 }
2017 
2018 int
2019 ffs_freefile_snap(struct fs *fs, struct vnode *devvp, ino_t ino, int mode)
2020 {
2021 	struct ufsmount *ump;
2022 	struct cg *cgp;
2023 	struct buf *bp;
2024 	int error, cg;
2025 	daddr_t cgbno;
2026 	dev_t dev;
2027 	const int needswap = UFS_FSNEEDSWAP(fs);
2028 
2029 	KASSERT(devvp->v_type != VBLK);
2030 
2031 	cg = ino_to_cg(fs, ino);
2032 	dev = VTOI(devvp)->i_devvp->v_rdev;
2033 	ump = VFSTOUFS(devvp->v_mount);
2034 	cgbno = ffs_fragstoblks(fs, cgtod(fs, cg));
2035 	if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
2036 		panic("ifree: range: dev = 0x%llx, ino = %llu, fs = %s",
2037 		    (unsigned long long)dev, (unsigned long long)ino,
2038 		    fs->fs_fsmnt);
2039 	error = bread(devvp, cgbno, (int)fs->fs_cgsize,
2040 	    NOCRED, B_MODIFY, &bp);
2041 	if (error) {
2042 		return (error);
2043 	}
2044 	cgp = (struct cg *)bp->b_data;
2045 	if (!cg_chkmagic(cgp, needswap)) {
2046 		brelse(bp, 0);
2047 		return (0);
2048 	}
2049 	ffs_freefile_common(ump, fs, dev, bp, ino, mode, true);
2050 
2051 	bdwrite(bp);
2052 
2053 	return 0;
2054 }
2055 
2056 static void
2057 ffs_freefile_common(struct ufsmount *ump, struct fs *fs, dev_t dev,
2058     struct buf *bp, ino_t ino, int mode, bool devvp_is_snapshot)
2059 {
2060 	int cg;
2061 	struct cg *cgp;
2062 	u_int8_t *inosused;
2063 	const int needswap = UFS_FSNEEDSWAP(fs);
2064 
2065 	cg = ino_to_cg(fs, ino);
2066 	cgp = (struct cg *)bp->b_data;
2067 	cgp->cg_old_time = ufs_rw32(time_second, needswap);
2068 	if ((fs->fs_magic != FS_UFS1_MAGIC) ||
2069 	    (fs->fs_old_flags & FS_FLAGS_UPDATED))
2070 		cgp->cg_time = ufs_rw64(time_second, needswap);
2071 	inosused = cg_inosused(cgp, needswap);
2072 	ino %= fs->fs_ipg;
2073 	if (isclr(inosused, ino)) {
2074 		printf("ifree: dev = 0x%llx, ino = %llu, fs = %s\n",
2075 		    (unsigned long long)dev, (unsigned long long)ino +
2076 		    cg * fs->fs_ipg, fs->fs_fsmnt);
2077 		if (fs->fs_ronly == 0)
2078 			panic("ifree: freeing free inode");
2079 	}
2080 	clrbit(inosused, ino);
2081 	if (!devvp_is_snapshot)
2082 		UFS_WAPBL_UNREGISTER_INODE(ump->um_mountp,
2083 		    ino + cg * fs->fs_ipg, mode);
2084 	if (ino < ufs_rw32(cgp->cg_irotor, needswap))
2085 		cgp->cg_irotor = ufs_rw32(ino, needswap);
2086 	ufs_add32(cgp->cg_cs.cs_nifree, 1, needswap);
2087 	mutex_enter(&ump->um_lock);
2088 	fs->fs_cstotal.cs_nifree++;
2089 	fs->fs_cs(fs, cg).cs_nifree++;
2090 	if ((mode & IFMT) == IFDIR) {
2091 		ufs_add32(cgp->cg_cs.cs_ndir, -1, needswap);
2092 		fs->fs_cstotal.cs_ndir--;
2093 		fs->fs_cs(fs, cg).cs_ndir--;
2094 	}
2095 	fs->fs_fmod = 1;
2096 	ACTIVECG_CLR(fs, cg);
2097 	mutex_exit(&ump->um_lock);
2098 }
2099 
2100 /*
2101  * Check to see if a file is free.
2102  */
2103 int
2104 ffs_checkfreefile(struct fs *fs, struct vnode *devvp, ino_t ino)
2105 {
2106 	struct cg *cgp;
2107 	struct buf *bp;
2108 	daddr_t cgbno;
2109 	int ret, cg;
2110 	u_int8_t *inosused;
2111 	const bool devvp_is_snapshot = (devvp->v_type != VBLK);
2112 
2113 	KASSERT(devvp_is_snapshot);
2114 
2115 	cg = ino_to_cg(fs, ino);
2116 	if (devvp_is_snapshot)
2117 		cgbno = ffs_fragstoblks(fs, cgtod(fs, cg));
2118 	else
2119 		cgbno = FFS_FSBTODB(fs, cgtod(fs, cg));
2120 	if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
2121 		return 1;
2122 	if (bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, 0, &bp)) {
2123 		return 1;
2124 	}
2125 	cgp = (struct cg *)bp->b_data;
2126 	if (!cg_chkmagic(cgp, UFS_FSNEEDSWAP(fs))) {
2127 		brelse(bp, 0);
2128 		return 1;
2129 	}
2130 	inosused = cg_inosused(cgp, UFS_FSNEEDSWAP(fs));
2131 	ino %= fs->fs_ipg;
2132 	ret = isclr(inosused, ino);
2133 	brelse(bp, 0);
2134 	return ret;
2135 }
2136 
2137 /*
2138  * Find a block of the specified size in the specified cylinder group.
2139  *
2140  * It is a panic if a request is made to find a block if none are
2141  * available.
2142  */
2143 static int32_t
2144 ffs_mapsearch(struct fs *fs, struct cg *cgp, daddr_t bpref, int allocsiz)
2145 {
2146 	int32_t bno;
2147 	int start, len, loc, i;
2148 	int blk, field, subfield, pos;
2149 	int ostart, olen;
2150 	u_int8_t *blksfree;
2151 	const int needswap = UFS_FSNEEDSWAP(fs);
2152 
2153 	/* KASSERT(mutex_owned(&ump->um_lock)); */
2154 
2155 	/*
2156 	 * find the fragment by searching through the free block
2157 	 * map for an appropriate bit pattern
2158 	 */
2159 	if (bpref)
2160 		start = dtogd(fs, bpref) / NBBY;
2161 	else
2162 		start = ufs_rw32(cgp->cg_frotor, needswap) / NBBY;
2163 	blksfree = cg_blksfree(cgp, needswap);
2164 	len = howmany(fs->fs_fpg, NBBY) - start;
2165 	ostart = start;
2166 	olen = len;
2167 	loc = scanc((u_int)len,
2168 		(const u_char *)&blksfree[start],
2169 		(const u_char *)fragtbl[fs->fs_frag],
2170 		(1 << (allocsiz - 1 + (fs->fs_frag & (NBBY - 1)))));
2171 	if (loc == 0) {
2172 		len = start + 1;
2173 		start = 0;
2174 		loc = scanc((u_int)len,
2175 			(const u_char *)&blksfree[0],
2176 			(const u_char *)fragtbl[fs->fs_frag],
2177 			(1 << (allocsiz - 1 + (fs->fs_frag & (NBBY - 1)))));
2178 		if (loc == 0) {
2179 			printf("start = %d, len = %d, fs = %s\n",
2180 			    ostart, olen, fs->fs_fsmnt);
2181 			printf("offset=%d %ld\n",
2182 				ufs_rw32(cgp->cg_freeoff, needswap),
2183 				(long)blksfree - (long)cgp);
2184 			printf("cg %d\n", cgp->cg_cgx);
2185 			panic("ffs_alloccg: map corrupted");
2186 			/* NOTREACHED */
2187 		}
2188 	}
2189 	bno = (start + len - loc) * NBBY;
2190 	cgp->cg_frotor = ufs_rw32(bno, needswap);
2191 	/*
2192 	 * found the byte in the map
2193 	 * sift through the bits to find the selected frag
2194 	 */
2195 	for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
2196 		blk = blkmap(fs, blksfree, bno);
2197 		blk <<= 1;
2198 		field = around[allocsiz];
2199 		subfield = inside[allocsiz];
2200 		for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
2201 			if ((blk & field) == subfield)
2202 				return (bno + pos);
2203 			field <<= 1;
2204 			subfield <<= 1;
2205 		}
2206 	}
2207 	printf("bno = %d, fs = %s\n", bno, fs->fs_fsmnt);
2208 	panic("ffs_alloccg: block not in map");
2209 	/* return (-1); */
2210 }
2211 
2212 /*
2213  * Fserr prints the name of a file system with an error diagnostic.
2214  *
2215  * The form of the error message is:
2216  *	fs: error message
2217  */
2218 static void
2219 ffs_fserr(struct fs *fs, u_int uid, const char *cp)
2220 {
2221 
2222 	log(LOG_ERR, "uid %d, pid %d, command %s, on %s: %s\n",
2223 	    uid, curproc->p_pid, curproc->p_comm, fs->fs_fsmnt, cp);
2224 }
2225