Home | History | Annotate | Download | only in tmpfs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #include <sys/types.h>
     27 #include <sys/param.h>
     28 #include <sys/t_lock.h>
     29 #include <sys/systm.h>
     30 #include <sys/time.h>
     31 #include <sys/sysmacros.h>
     32 #include <sys/proc.h>
     33 #include <sys/disp.h>
     34 #include <sys/user.h>
     35 #include <sys/time.h>
     36 #include <sys/vfs.h>
     37 #include <sys/vnode.h>
     38 #include <sys/stat.h>
     39 #include <sys/mode.h>
     40 #include <sys/errno.h>
     41 #include <sys/kmem.h>
     42 #include <vm/seg.h>
     43 #include <vm/seg_map.h>
     44 #include <vm/anon.h>
     45 #include <vm/page.h>
     46 #include <vm/pvn.h>
     47 #include <sys/fs/tmp.h>
     48 #include <sys/fs/tmpnode.h>
     49 #include <sys/debug.h>
     50 #include <sys/cmn_err.h>
     51 #include <sys/swap.h>
     52 #include <sys/vtrace.h>
     53 
     54 /*
     55  * Reserve swap space for the size of the file.
     56  * Called before growing a file (i.e. ftruncate, write)
     57  * Returns 0 on success.
     58  */
     59 int
     60 tmp_resv(
     61 	struct tmount *tm,
     62 	struct tmpnode *tp,
     63 	size_t delta,		/* size needed */
     64 	int pagecreate)		/* call anon_resv if set */
     65 {
     66 	pgcnt_t pages = btopr(delta);
     67 	zone_t *zone;
     68 
     69 	ASSERT(RW_WRITE_HELD(&tp->tn_rwlock));
     70 	ASSERT(tp->tn_type == VREG);
     71 	/*
     72 	 * pagecreate is set only if we actually need to call anon_resv
     73 	 * to reserve an additional page of anonymous memory.
     74 	 * Since anon_resv always reserves a page at a time,
     75 	 * it should only get called when we know we're growing the
     76 	 * file into a new page or filling a hole.
     77 	 *
     78 	 * Deny if trying to reserve more than tmpfs can allocate
     79 	 */
     80 	zone = tm->tm_vfsp->vfs_zone;
     81 	if (pagecreate && ((tm->tm_anonmem + pages > tm->tm_anonmax) ||
     82 	    (!anon_checkspace(ptob(pages + tmpfs_minfree), zone)) ||
     83 	    (anon_try_resv_zone(delta, zone) == 0))) {
     84 		return (1);
     85 	}
     86 
     87 	/*
     88 	 * update statistics
     89 	 */
     90 	if (pagecreate) {
     91 		mutex_enter(&tm->tm_contents);
     92 		tm->tm_anonmem += pages;
     93 		mutex_exit(&tm->tm_contents);
     94 
     95 		TRACE_2(TR_FAC_VM, TR_ANON_TMPFS, "anon tmpfs:%p %lu",
     96 		    tp, delta);
     97 	}
     98 
     99 	return (0);
    100 }
    101 
    102 /*
    103  * tmp_unresv - called when truncating a file
    104  * Only called if we're freeing at least pagesize bytes
    105  * because anon_unresv does a btopr(delta)
    106  */
    107 static void
    108 tmp_unresv(
    109 	struct tmount *tm,
    110 	struct tmpnode *tp,
    111 	size_t delta)
    112 {
    113 	ASSERT(RW_WRITE_HELD(&tp->tn_rwlock));
    114 	ASSERT(tp->tn_type == VREG);
    115 
    116 	anon_unresv_zone(delta, tm->tm_vfsp->vfs_zone);
    117 
    118 	mutex_enter(&tm->tm_contents);
    119 	tm->tm_anonmem -= btopr(delta);
    120 	mutex_exit(&tm->tm_contents);
    121 
    122 	TRACE_2(TR_FAC_VM, TR_ANON_TMPFS, "anon tmpfs:%p %lu", tp, delta);
    123 }
    124 
    125 #define	TMP_INIT_SZ	128
    126 
    127 /*
    128  * Grow the anon pointer array to cover 'newsize' bytes plus slack.
    129  */
    130 void
    131 tmpnode_growmap(struct tmpnode *tp, ulong_t newsize)
    132 {
    133 	pgcnt_t np = btopr(newsize);
    134 
    135 	ASSERT(RW_WRITE_HELD(&tp->tn_rwlock));
    136 	ASSERT(RW_WRITE_HELD(&tp->tn_contents));
    137 	ASSERT(tp->tn_type == VREG);
    138 
    139 	if (tp->tn_asize >= np)
    140 		return;
    141 
    142 	if (newsize > MAXOFF_T)
    143 		np = btopr((u_offset_t)MAXOFF_T);
    144 
    145 	if (tp->tn_anon == NULL) {
    146 		tp->tn_anon = anon_create(MAX(np, TMP_INIT_SZ), ANON_SLEEP);
    147 		tp->tn_asize = tp->tn_anon->size;
    148 		return;
    149 	}
    150 
    151 	tp->tn_asize = anon_grow(tp->tn_anon, NULL, tp->tn_asize,
    152 	    np - tp->tn_asize, ANON_SLEEP);
    153 	ASSERT(tp->tn_asize >= np);
    154 }
    155 
    156 /*
    157  * Initialize a tmpnode and add it to file list under mount point.
    158  */
    159 void
    160 tmpnode_init(struct tmount *tm, struct tmpnode *t, vattr_t *vap, cred_t *cred)
    161 {
    162 	struct vnode *vp;
    163 	timestruc_t now;
    164 
    165 	ASSERT(vap != NULL);
    166 
    167 	rw_init(&t->tn_rwlock, NULL, RW_DEFAULT, NULL);
    168 	mutex_init(&t->tn_tlock, NULL, MUTEX_DEFAULT, NULL);
    169 	t->tn_mode = MAKEIMODE(vap->va_type, vap->va_mode);
    170 	t->tn_mask = 0;
    171 	t->tn_type = vap->va_type;
    172 	t->tn_nodeid = (ino64_t)(uint32_t)((uintptr_t)t >> 3);
    173 	t->tn_nlink = 1;
    174 	t->tn_size = 0;
    175 
    176 	if (cred == NULL) {
    177 		t->tn_uid = vap->va_uid;
    178 		t->tn_gid = vap->va_gid;
    179 	} else {
    180 		t->tn_uid = crgetuid(cred);
    181 		t->tn_gid = crgetgid(cred);
    182 	}
    183 
    184 	t->tn_fsid = tm->tm_dev;
    185 	t->tn_rdev = vap->va_rdev;
    186 	t->tn_blksize = PAGESIZE;
    187 	t->tn_nblocks = 0;
    188 	gethrestime(&now);
    189 	t->tn_atime = now;
    190 	t->tn_mtime = now;
    191 	t->tn_ctime = now;
    192 	t->tn_seq = 0;
    193 	t->tn_dir = NULL;
    194 
    195 	t->tn_vnode = vn_alloc(KM_SLEEP);
    196 	vp = TNTOV(t);
    197 	vn_setops(vp, tmp_vnodeops);
    198 	vp->v_vfsp = tm->tm_vfsp;
    199 	vp->v_type = vap->va_type;
    200 	vp->v_rdev = vap->va_rdev;
    201 	vp->v_data = (caddr_t)t;
    202 	mutex_enter(&tm->tm_contents);
    203 	/*
    204 	 * Increment the pseudo generation number for this tmpnode.
    205 	 * Since tmpnodes are allocated and freed, there really is no
    206 	 * particular generation number for a new tmpnode.  Just fake it
    207 	 * by using a counter in each file system.
    208 	 */
    209 	t->tn_gen = tm->tm_gen++;
    210 
    211 	/*
    212 	 * Add new tmpnode to end of linked list of tmpnodes for this tmpfs
    213 	 * Root directory is handled specially in tmp_mount.
    214 	 */
    215 	if (tm->tm_rootnode != (struct tmpnode *)NULL) {
    216 		t->tn_forw = NULL;
    217 		t->tn_back = tm->tm_rootnode->tn_back;
    218 		t->tn_back->tn_forw = tm->tm_rootnode->tn_back = t;
    219 	}
    220 	mutex_exit(&tm->tm_contents);
    221 	vn_exists(vp);
    222 }
    223 
    224 /*
    225  * tmpnode_trunc - set length of tmpnode and deal with resources
    226  */
    227 int
    228 tmpnode_trunc(
    229 	struct tmount *tm,
    230 	struct tmpnode *tp,
    231 	ulong_t newsize)
    232 {
    233 	size_t oldsize = tp->tn_size;
    234 	size_t delta;
    235 	struct vnode *vp = TNTOV(tp);
    236 	timestruc_t now;
    237 	int error = 0;
    238 
    239 	ASSERT(RW_WRITE_HELD(&tp->tn_rwlock));
    240 	ASSERT(RW_WRITE_HELD(&tp->tn_contents));
    241 
    242 	if (newsize == oldsize) {
    243 		/* Required by POSIX */
    244 		goto stamp_out;
    245 	}
    246 
    247 	switch (tp->tn_type) {
    248 	case VREG:
    249 		/* Growing the file */
    250 		if (newsize > oldsize) {
    251 			delta = P2ROUNDUP(newsize, PAGESIZE) -
    252 			    P2ROUNDUP(oldsize, PAGESIZE);
    253 			/*
    254 			 * Grow the size of the anon array to the new size
    255 			 * Reserve the space for the growth here.
    256 			 * We do it this way for now because this is how
    257 			 * tmpfs used to do it, and this way the reserved
    258 			 * space is alway equal to the file size.
    259 			 * Alternatively, we could wait to reserve space 'til
    260 			 * someone tries to store into one of the newly
    261 			 * trunc'ed up pages. This would give us behavior
    262 			 * identical to ufs; i.e., you could fail a
    263 			 * fault on storing into a holey region of a file
    264 			 * if there is no space in the filesystem to fill
    265 			 * the hole at that time.
    266 			 */
    267 			/*
    268 			 * tmp_resv calls anon_resv only if we're extending
    269 			 * the file into a new page
    270 			 */
    271 			if (tmp_resv(tm, tp, delta,
    272 			    (btopr(newsize) != btopr(oldsize)))) {
    273 				error = ENOSPC;
    274 				goto out;
    275 			}
    276 			tmpnode_growmap(tp, newsize);
    277 			tp->tn_size = newsize;
    278 			break;
    279 		}
    280 
    281 		/* Free anon pages if shrinking file over page boundary. */
    282 		if (btopr(newsize) != btopr(oldsize)) {
    283 			pgcnt_t freed;
    284 			delta = P2ROUNDUP(oldsize, PAGESIZE) -
    285 			    P2ROUNDUP(newsize, PAGESIZE);
    286 			freed = anon_pages(tp->tn_anon, btopr(newsize),
    287 			    btopr(delta));
    288 			tp->tn_nblocks -= freed;
    289 			anon_free(tp->tn_anon, btopr(newsize), delta);
    290 			tmp_unresv(tm, tp, delta);
    291 		}
    292 
    293 		/*
    294 		 * Update the file size now to reflect the pages we just
    295 		 * blew away as we're about to drop the
    296 		 * contents lock to zero the partial page (which could
    297 		 * re-enter tmpfs via getpage and try to reacquire the lock)
    298 		 * Once we drop the lock, faulters can fill in holes in
    299 		 * the file and if we haven't updated the size they
    300 		 * may fill in holes that are beyond EOF, which will then
    301 		 * never get cleared.
    302 		 */
    303 		tp->tn_size = newsize;
    304 
    305 		/* Zero new size of file to page boundary. */
    306 		if (anon_get_ptr(tp->tn_anon, btop(newsize)) != NULL) {
    307 			size_t zlen;
    308 
    309 			zlen = PAGESIZE - ((ulong_t)newsize & PAGEOFFSET);
    310 			rw_exit(&tp->tn_contents);
    311 			pvn_vpzero(TNTOV(tp), (u_offset_t)newsize, zlen);
    312 			rw_enter(&tp->tn_contents, RW_WRITER);
    313 		}
    314 
    315 		if (newsize == 0) {
    316 			/* Delete anon array for tmpnode */
    317 			ASSERT(tp->tn_nblocks == 0);
    318 			ASSERT(anon_get_ptr(tp->tn_anon, 0) == NULL);
    319 			ASSERT(!vn_has_cached_data(vp));
    320 
    321 			anon_release(tp->tn_anon, tp->tn_asize);
    322 			tp->tn_anon = NULL;
    323 			tp->tn_asize = 0;
    324 		}
    325 		break;
    326 	case VLNK:
    327 		/*
    328 		 * Don't do anything here
    329 		 * tmp_inactive frees the memory
    330 		 */
    331 		if (newsize != 0)
    332 			error = EINVAL;
    333 		goto out;
    334 	case VDIR:
    335 		/*
    336 		 * Remove all the directory entries under this directory.
    337 		 */
    338 		if (newsize != 0) {
    339 			error = EINVAL;
    340 			goto out;
    341 		}
    342 		tdirtrunc(tp);
    343 		ASSERT(tp->tn_nlink == 0);
    344 		break;
    345 	default:
    346 		goto out;
    347 	}
    348 
    349 stamp_out:
    350 	gethrestime(&now);
    351 	tp->tn_mtime = now;
    352 	tp->tn_ctime = now;
    353 out:
    354 	/*
    355 	 * tmpnode_trunc() cannot fail when newsize == 0.
    356 	 */
    357 	ASSERT(error == 0 || newsize != 0);
    358 	return (error);
    359 }
    360