10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
53247Sgjelinek * Common Development and Distribution License (the "License").
63247Sgjelinek * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
210Sstevel@tonic-gate /*
22*7632SNick.Todd@Sun.COM * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
230Sstevel@tonic-gate * Use is subject to license terms.
240Sstevel@tonic-gate */
250Sstevel@tonic-gate
260Sstevel@tonic-gate #include <sys/types.h>
270Sstevel@tonic-gate #include <sys/param.h>
280Sstevel@tonic-gate #include <sys/t_lock.h>
290Sstevel@tonic-gate #include <sys/systm.h>
300Sstevel@tonic-gate #include <sys/time.h>
310Sstevel@tonic-gate #include <sys/sysmacros.h>
320Sstevel@tonic-gate #include <sys/proc.h>
330Sstevel@tonic-gate #include <sys/disp.h>
340Sstevel@tonic-gate #include <sys/user.h>
350Sstevel@tonic-gate #include <sys/time.h>
360Sstevel@tonic-gate #include <sys/vfs.h>
370Sstevel@tonic-gate #include <sys/vnode.h>
380Sstevel@tonic-gate #include <sys/stat.h>
390Sstevel@tonic-gate #include <sys/mode.h>
400Sstevel@tonic-gate #include <sys/errno.h>
410Sstevel@tonic-gate #include <sys/kmem.h>
420Sstevel@tonic-gate #include <vm/seg.h>
430Sstevel@tonic-gate #include <vm/seg_map.h>
440Sstevel@tonic-gate #include <vm/anon.h>
450Sstevel@tonic-gate #include <vm/page.h>
460Sstevel@tonic-gate #include <vm/pvn.h>
470Sstevel@tonic-gate #include <sys/fs/tmp.h>
480Sstevel@tonic-gate #include <sys/fs/tmpnode.h>
490Sstevel@tonic-gate #include <sys/debug.h>
500Sstevel@tonic-gate #include <sys/cmn_err.h>
510Sstevel@tonic-gate #include <sys/swap.h>
520Sstevel@tonic-gate #include <sys/vtrace.h>
530Sstevel@tonic-gate
540Sstevel@tonic-gate /*
550Sstevel@tonic-gate * Reserve swap space for the size of the file.
560Sstevel@tonic-gate * Called before growing a file (i.e. ftruncate, write)
570Sstevel@tonic-gate * Returns 0 on success.
580Sstevel@tonic-gate */
590Sstevel@tonic-gate int
tmp_resv(struct tmount * tm,struct tmpnode * tp,size_t delta,int pagecreate)600Sstevel@tonic-gate tmp_resv(
610Sstevel@tonic-gate struct tmount *tm,
620Sstevel@tonic-gate struct tmpnode *tp,
630Sstevel@tonic-gate size_t delta, /* size needed */
640Sstevel@tonic-gate int pagecreate) /* call anon_resv if set */
650Sstevel@tonic-gate {
660Sstevel@tonic-gate pgcnt_t pages = btopr(delta);
673247Sgjelinek zone_t *zone;
680Sstevel@tonic-gate
690Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&tp->tn_rwlock));
700Sstevel@tonic-gate ASSERT(tp->tn_type == VREG);
710Sstevel@tonic-gate /*
720Sstevel@tonic-gate * pagecreate is set only if we actually need to call anon_resv
730Sstevel@tonic-gate * to reserve an additional page of anonymous memory.
740Sstevel@tonic-gate * Since anon_resv always reserves a page at a time,
750Sstevel@tonic-gate * it should only get called when we know we're growing the
760Sstevel@tonic-gate * file into a new page or filling a hole.
770Sstevel@tonic-gate *
780Sstevel@tonic-gate * Deny if trying to reserve more than tmpfs can allocate
790Sstevel@tonic-gate */
803247Sgjelinek zone = tm->tm_vfsp->vfs_zone;
810Sstevel@tonic-gate if (pagecreate && ((tm->tm_anonmem + pages > tm->tm_anonmax) ||
823247Sgjelinek (!anon_checkspace(ptob(pages + tmpfs_minfree), zone)) ||
834789Sjj204856 (anon_try_resv_zone(delta, zone) == 0))) {
840Sstevel@tonic-gate return (1);
850Sstevel@tonic-gate }
860Sstevel@tonic-gate
870Sstevel@tonic-gate /*
880Sstevel@tonic-gate * update statistics
890Sstevel@tonic-gate */
900Sstevel@tonic-gate if (pagecreate) {
910Sstevel@tonic-gate mutex_enter(&tm->tm_contents);
920Sstevel@tonic-gate tm->tm_anonmem += pages;
930Sstevel@tonic-gate mutex_exit(&tm->tm_contents);
940Sstevel@tonic-gate
950Sstevel@tonic-gate TRACE_2(TR_FAC_VM, TR_ANON_TMPFS, "anon tmpfs:%p %lu",
960Sstevel@tonic-gate tp, delta);
970Sstevel@tonic-gate }
980Sstevel@tonic-gate
990Sstevel@tonic-gate return (0);
1000Sstevel@tonic-gate }
1010Sstevel@tonic-gate
1020Sstevel@tonic-gate /*
1030Sstevel@tonic-gate * tmp_unresv - called when truncating a file
1040Sstevel@tonic-gate * Only called if we're freeing at least pagesize bytes
1050Sstevel@tonic-gate * because anon_unresv does a btopr(delta)
1060Sstevel@tonic-gate */
1070Sstevel@tonic-gate static void
tmp_unresv(struct tmount * tm,struct tmpnode * tp,size_t delta)1080Sstevel@tonic-gate tmp_unresv(
1090Sstevel@tonic-gate struct tmount *tm,
1100Sstevel@tonic-gate struct tmpnode *tp,
1110Sstevel@tonic-gate size_t delta)
1120Sstevel@tonic-gate {
1130Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&tp->tn_rwlock));
1140Sstevel@tonic-gate ASSERT(tp->tn_type == VREG);
1150Sstevel@tonic-gate
1163247Sgjelinek anon_unresv_zone(delta, tm->tm_vfsp->vfs_zone);
1170Sstevel@tonic-gate
1180Sstevel@tonic-gate mutex_enter(&tm->tm_contents);
1190Sstevel@tonic-gate tm->tm_anonmem -= btopr(delta);
1200Sstevel@tonic-gate mutex_exit(&tm->tm_contents);
1210Sstevel@tonic-gate
1220Sstevel@tonic-gate TRACE_2(TR_FAC_VM, TR_ANON_TMPFS, "anon tmpfs:%p %lu", tp, delta);
1230Sstevel@tonic-gate }
1240Sstevel@tonic-gate
1250Sstevel@tonic-gate #define TMP_INIT_SZ 128
1260Sstevel@tonic-gate
1270Sstevel@tonic-gate /*
1280Sstevel@tonic-gate * Grow the anon pointer array to cover 'newsize' bytes plus slack.
1290Sstevel@tonic-gate */
1300Sstevel@tonic-gate void
tmpnode_growmap(struct tmpnode * tp,ulong_t newsize)1310Sstevel@tonic-gate tmpnode_growmap(struct tmpnode *tp, ulong_t newsize)
1320Sstevel@tonic-gate {
1330Sstevel@tonic-gate pgcnt_t np = btopr(newsize);
1340Sstevel@tonic-gate
1350Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&tp->tn_rwlock));
1360Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&tp->tn_contents));
1370Sstevel@tonic-gate ASSERT(tp->tn_type == VREG);
1380Sstevel@tonic-gate
1390Sstevel@tonic-gate if (tp->tn_asize >= np)
1400Sstevel@tonic-gate return;
1410Sstevel@tonic-gate
1420Sstevel@tonic-gate if (newsize > MAXOFF_T)
143*7632SNick.Todd@Sun.COM np = btopr((u_offset_t)MAXOFF_T);
1440Sstevel@tonic-gate
1450Sstevel@tonic-gate if (tp->tn_anon == NULL) {
1460Sstevel@tonic-gate tp->tn_anon = anon_create(MAX(np, TMP_INIT_SZ), ANON_SLEEP);
1470Sstevel@tonic-gate tp->tn_asize = tp->tn_anon->size;
1480Sstevel@tonic-gate return;
1490Sstevel@tonic-gate }
1500Sstevel@tonic-gate
1510Sstevel@tonic-gate tp->tn_asize = anon_grow(tp->tn_anon, NULL, tp->tn_asize,
1520Sstevel@tonic-gate np - tp->tn_asize, ANON_SLEEP);
1530Sstevel@tonic-gate ASSERT(tp->tn_asize >= np);
1540Sstevel@tonic-gate }
1550Sstevel@tonic-gate
1560Sstevel@tonic-gate /*
1570Sstevel@tonic-gate * Initialize a tmpnode and add it to file list under mount point.
1580Sstevel@tonic-gate */
1590Sstevel@tonic-gate void
tmpnode_init(struct tmount * tm,struct tmpnode * t,vattr_t * vap,cred_t * cred)1600Sstevel@tonic-gate tmpnode_init(struct tmount *tm, struct tmpnode *t, vattr_t *vap, cred_t *cred)
1610Sstevel@tonic-gate {
1620Sstevel@tonic-gate struct vnode *vp;
1630Sstevel@tonic-gate timestruc_t now;
1640Sstevel@tonic-gate
1650Sstevel@tonic-gate ASSERT(vap != NULL);
1660Sstevel@tonic-gate
1670Sstevel@tonic-gate rw_init(&t->tn_rwlock, NULL, RW_DEFAULT, NULL);
1680Sstevel@tonic-gate mutex_init(&t->tn_tlock, NULL, MUTEX_DEFAULT, NULL);
1690Sstevel@tonic-gate t->tn_mode = MAKEIMODE(vap->va_type, vap->va_mode);
1700Sstevel@tonic-gate t->tn_mask = 0;
1710Sstevel@tonic-gate t->tn_type = vap->va_type;
1720Sstevel@tonic-gate t->tn_nodeid = (ino64_t)(uint32_t)((uintptr_t)t >> 3);
1730Sstevel@tonic-gate t->tn_nlink = 1;
1740Sstevel@tonic-gate t->tn_size = 0;
1750Sstevel@tonic-gate
1760Sstevel@tonic-gate if (cred == NULL) {
1770Sstevel@tonic-gate t->tn_uid = vap->va_uid;
1780Sstevel@tonic-gate t->tn_gid = vap->va_gid;
1790Sstevel@tonic-gate } else {
1800Sstevel@tonic-gate t->tn_uid = crgetuid(cred);
1810Sstevel@tonic-gate t->tn_gid = crgetgid(cred);
1820Sstevel@tonic-gate }
1830Sstevel@tonic-gate
1840Sstevel@tonic-gate t->tn_fsid = tm->tm_dev;
1850Sstevel@tonic-gate t->tn_rdev = vap->va_rdev;
1860Sstevel@tonic-gate t->tn_blksize = PAGESIZE;
1870Sstevel@tonic-gate t->tn_nblocks = 0;
1880Sstevel@tonic-gate gethrestime(&now);
1890Sstevel@tonic-gate t->tn_atime = now;
1900Sstevel@tonic-gate t->tn_mtime = now;
1910Sstevel@tonic-gate t->tn_ctime = now;
1920Sstevel@tonic-gate t->tn_seq = 0;
1930Sstevel@tonic-gate t->tn_dir = NULL;
1940Sstevel@tonic-gate
1950Sstevel@tonic-gate t->tn_vnode = vn_alloc(KM_SLEEP);
1960Sstevel@tonic-gate vp = TNTOV(t);
1970Sstevel@tonic-gate vn_setops(vp, tmp_vnodeops);
1980Sstevel@tonic-gate vp->v_vfsp = tm->tm_vfsp;
1990Sstevel@tonic-gate vp->v_type = vap->va_type;
2000Sstevel@tonic-gate vp->v_rdev = vap->va_rdev;
2010Sstevel@tonic-gate vp->v_data = (caddr_t)t;
2020Sstevel@tonic-gate mutex_enter(&tm->tm_contents);
2030Sstevel@tonic-gate /*
2040Sstevel@tonic-gate * Increment the pseudo generation number for this tmpnode.
2050Sstevel@tonic-gate * Since tmpnodes are allocated and freed, there really is no
2060Sstevel@tonic-gate * particular generation number for a new tmpnode. Just fake it
2070Sstevel@tonic-gate * by using a counter in each file system.
2080Sstevel@tonic-gate */
2090Sstevel@tonic-gate t->tn_gen = tm->tm_gen++;
2100Sstevel@tonic-gate
2110Sstevel@tonic-gate /*
2120Sstevel@tonic-gate * Add new tmpnode to end of linked list of tmpnodes for this tmpfs
2130Sstevel@tonic-gate * Root directory is handled specially in tmp_mount.
2140Sstevel@tonic-gate */
2150Sstevel@tonic-gate if (tm->tm_rootnode != (struct tmpnode *)NULL) {
2160Sstevel@tonic-gate t->tn_forw = NULL;
2170Sstevel@tonic-gate t->tn_back = tm->tm_rootnode->tn_back;
2180Sstevel@tonic-gate t->tn_back->tn_forw = tm->tm_rootnode->tn_back = t;
2190Sstevel@tonic-gate }
2200Sstevel@tonic-gate mutex_exit(&tm->tm_contents);
2210Sstevel@tonic-gate vn_exists(vp);
2220Sstevel@tonic-gate }
2230Sstevel@tonic-gate
2240Sstevel@tonic-gate /*
2250Sstevel@tonic-gate * tmpnode_trunc - set length of tmpnode and deal with resources
2260Sstevel@tonic-gate */
2270Sstevel@tonic-gate int
tmpnode_trunc(struct tmount * tm,struct tmpnode * tp,ulong_t newsize)2280Sstevel@tonic-gate tmpnode_trunc(
2290Sstevel@tonic-gate struct tmount *tm,
2300Sstevel@tonic-gate struct tmpnode *tp,
2310Sstevel@tonic-gate ulong_t newsize)
2320Sstevel@tonic-gate {
2330Sstevel@tonic-gate size_t oldsize = tp->tn_size;
2340Sstevel@tonic-gate size_t delta;
2350Sstevel@tonic-gate struct vnode *vp = TNTOV(tp);
2360Sstevel@tonic-gate timestruc_t now;
2370Sstevel@tonic-gate int error = 0;
2380Sstevel@tonic-gate
2390Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&tp->tn_rwlock));
2400Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&tp->tn_contents));
2410Sstevel@tonic-gate
2420Sstevel@tonic-gate if (newsize == oldsize) {
2430Sstevel@tonic-gate /* Required by POSIX */
2440Sstevel@tonic-gate goto stamp_out;
2450Sstevel@tonic-gate }
2460Sstevel@tonic-gate
2470Sstevel@tonic-gate switch (tp->tn_type) {
2480Sstevel@tonic-gate case VREG:
2490Sstevel@tonic-gate /* Growing the file */
2500Sstevel@tonic-gate if (newsize > oldsize) {
2510Sstevel@tonic-gate delta = P2ROUNDUP(newsize, PAGESIZE) -
2520Sstevel@tonic-gate P2ROUNDUP(oldsize, PAGESIZE);
2530Sstevel@tonic-gate /*
2540Sstevel@tonic-gate * Grow the size of the anon array to the new size
2550Sstevel@tonic-gate * Reserve the space for the growth here.
2560Sstevel@tonic-gate * We do it this way for now because this is how
2570Sstevel@tonic-gate * tmpfs used to do it, and this way the reserved
2580Sstevel@tonic-gate * space is alway equal to the file size.
2590Sstevel@tonic-gate * Alternatively, we could wait to reserve space 'til
2600Sstevel@tonic-gate * someone tries to store into one of the newly
2610Sstevel@tonic-gate * trunc'ed up pages. This would give us behavior
2620Sstevel@tonic-gate * identical to ufs; i.e., you could fail a
2630Sstevel@tonic-gate * fault on storing into a holey region of a file
2640Sstevel@tonic-gate * if there is no space in the filesystem to fill
2650Sstevel@tonic-gate * the hole at that time.
2660Sstevel@tonic-gate */
2670Sstevel@tonic-gate /*
2680Sstevel@tonic-gate * tmp_resv calls anon_resv only if we're extending
2690Sstevel@tonic-gate * the file into a new page
2700Sstevel@tonic-gate */
2710Sstevel@tonic-gate if (tmp_resv(tm, tp, delta,
2720Sstevel@tonic-gate (btopr(newsize) != btopr(oldsize)))) {
2730Sstevel@tonic-gate error = ENOSPC;
2740Sstevel@tonic-gate goto out;
2750Sstevel@tonic-gate }
2760Sstevel@tonic-gate tmpnode_growmap(tp, newsize);
2770Sstevel@tonic-gate tp->tn_size = newsize;
2780Sstevel@tonic-gate break;
2790Sstevel@tonic-gate }
2800Sstevel@tonic-gate
2810Sstevel@tonic-gate /* Free anon pages if shrinking file over page boundary. */
2820Sstevel@tonic-gate if (btopr(newsize) != btopr(oldsize)) {
2830Sstevel@tonic-gate pgcnt_t freed;
2840Sstevel@tonic-gate delta = P2ROUNDUP(oldsize, PAGESIZE) -
2850Sstevel@tonic-gate P2ROUNDUP(newsize, PAGESIZE);
2860Sstevel@tonic-gate freed = anon_pages(tp->tn_anon, btopr(newsize),
2870Sstevel@tonic-gate btopr(delta));
2880Sstevel@tonic-gate tp->tn_nblocks -= freed;
2890Sstevel@tonic-gate anon_free(tp->tn_anon, btopr(newsize), delta);
2900Sstevel@tonic-gate tmp_unresv(tm, tp, delta);
2910Sstevel@tonic-gate }
2920Sstevel@tonic-gate
2930Sstevel@tonic-gate /*
2940Sstevel@tonic-gate * Update the file size now to reflect the pages we just
2950Sstevel@tonic-gate * blew away as we're about to drop the
2960Sstevel@tonic-gate * contents lock to zero the partial page (which could
2970Sstevel@tonic-gate * re-enter tmpfs via getpage and try to reacquire the lock)
2980Sstevel@tonic-gate * Once we drop the lock, faulters can fill in holes in
2990Sstevel@tonic-gate * the file and if we haven't updated the size they
3000Sstevel@tonic-gate * may fill in holes that are beyond EOF, which will then
3010Sstevel@tonic-gate * never get cleared.
3020Sstevel@tonic-gate */
3030Sstevel@tonic-gate tp->tn_size = newsize;
3040Sstevel@tonic-gate
3050Sstevel@tonic-gate /* Zero new size of file to page boundary. */
3060Sstevel@tonic-gate if (anon_get_ptr(tp->tn_anon, btop(newsize)) != NULL) {
3070Sstevel@tonic-gate size_t zlen;
3080Sstevel@tonic-gate
3090Sstevel@tonic-gate zlen = PAGESIZE - ((ulong_t)newsize & PAGEOFFSET);
3100Sstevel@tonic-gate rw_exit(&tp->tn_contents);
3110Sstevel@tonic-gate pvn_vpzero(TNTOV(tp), (u_offset_t)newsize, zlen);
3120Sstevel@tonic-gate rw_enter(&tp->tn_contents, RW_WRITER);
3130Sstevel@tonic-gate }
3140Sstevel@tonic-gate
3150Sstevel@tonic-gate if (newsize == 0) {
3160Sstevel@tonic-gate /* Delete anon array for tmpnode */
3170Sstevel@tonic-gate ASSERT(tp->tn_nblocks == 0);
3180Sstevel@tonic-gate ASSERT(anon_get_ptr(tp->tn_anon, 0) == NULL);
3190Sstevel@tonic-gate ASSERT(!vn_has_cached_data(vp));
3200Sstevel@tonic-gate
3210Sstevel@tonic-gate anon_release(tp->tn_anon, tp->tn_asize);
3220Sstevel@tonic-gate tp->tn_anon = NULL;
3230Sstevel@tonic-gate tp->tn_asize = 0;
3240Sstevel@tonic-gate }
3250Sstevel@tonic-gate break;
3260Sstevel@tonic-gate case VLNK:
3270Sstevel@tonic-gate /*
3280Sstevel@tonic-gate * Don't do anything here
3290Sstevel@tonic-gate * tmp_inactive frees the memory
3300Sstevel@tonic-gate */
3310Sstevel@tonic-gate if (newsize != 0)
3320Sstevel@tonic-gate error = EINVAL;
3330Sstevel@tonic-gate goto out;
3340Sstevel@tonic-gate case VDIR:
3350Sstevel@tonic-gate /*
3360Sstevel@tonic-gate * Remove all the directory entries under this directory.
3370Sstevel@tonic-gate */
3380Sstevel@tonic-gate if (newsize != 0) {
3390Sstevel@tonic-gate error = EINVAL;
3400Sstevel@tonic-gate goto out;
3410Sstevel@tonic-gate }
3420Sstevel@tonic-gate tdirtrunc(tp);
3430Sstevel@tonic-gate ASSERT(tp->tn_nlink == 0);
3440Sstevel@tonic-gate break;
3450Sstevel@tonic-gate default:
3460Sstevel@tonic-gate goto out;
3470Sstevel@tonic-gate }
3480Sstevel@tonic-gate
3490Sstevel@tonic-gate stamp_out:
3500Sstevel@tonic-gate gethrestime(&now);
3510Sstevel@tonic-gate tp->tn_mtime = now;
3520Sstevel@tonic-gate tp->tn_ctime = now;
3530Sstevel@tonic-gate out:
3540Sstevel@tonic-gate /*
3550Sstevel@tonic-gate * tmpnode_trunc() cannot fail when newsize == 0.
3560Sstevel@tonic-gate */
3570Sstevel@tonic-gate ASSERT(error == 0 || newsize != 0);
3580Sstevel@tonic-gate return (error);
3590Sstevel@tonic-gate }
360