Home | History | Annotate | Download | only in os
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     27 
     28 #include <sys/types.h>
     29 #include <sys/systm.h>
     30 #include <sys/archsystm.h>
     31 #include <sys/machsystm.h>
     32 #include <sys/t_lock.h>
     33 #include <sys/vmem.h>
     34 #include <sys/mman.h>
     35 #include <sys/vm.h>
     36 #include <sys/cpu.h>
     37 #include <sys/cmn_err.h>
     38 #include <sys/cpuvar.h>
     39 #include <sys/atomic.h>
     40 #include <vm/as.h>
     41 #include <vm/hat.h>
     42 #include <vm/as.h>
     43 #include <vm/page.h>
     44 #include <vm/seg.h>
     45 #include <vm/seg_kmem.h>
     46 #include <vm/seg_kpm.h>
     47 #include <vm/hat_sfmmu.h>
     48 #include <sys/debug.h>
     49 #include <sys/cpu_module.h>
     50 
     51 /*
     52  * A quick way to generate a cache consistent address to map in a page.
     53  * users: ppcopy, pagezero, /proc, dev/mem
     54  *
     55  * The ppmapin/ppmapout routines provide a quick way of generating a cache
     56  * consistent address by reserving a given amount of kernel address space.
     57  * The base is PPMAPBASE and its size is PPMAPSIZE.  This memory is divided
     58  * into x number of sets, where x is the number of colors for the virtual
     59  * cache. The number of colors is how many times a page can be mapped
     60  * simulatenously in the cache.  For direct map caches this translates to
     61  * the number of pages in the cache.
     62  * Each set will be assigned a group of virtual pages from the reserved memory
     63  * depending on its virtual color.
     64  * When trying to assign a virtual address we will find out the color for the
     65  * physical page in question (if applicable).  Then we will try to find an
     66  * available virtual page from the set of the appropiate color.
     67  */
     68 
     69 int pp_slots = 4;		/* small default, tuned by cpu module */
     70 
     71 /* tuned by cpu module, default is "safe" */
     72 int pp_consistent_coloring = PPAGE_STORES_POLLUTE | PPAGE_LOADS_POLLUTE;
     73 
     74 static caddr_t	ppmap_vaddrs[PPMAPSIZE / MMU_PAGESIZE];
     75 static int	nsets;			/* number of sets */
     76 static int	ppmap_shift;		/* set selector */
     77 
     78 #ifdef PPDEBUG
     79 #define		MAXCOLORS	16	/* for debug only */
     80 static int	ppalloc_noslot = 0;	/* # of allocations from kernelmap */
     81 static int	align_hits;
     82 static int	pp_allocs;		/* # of ppmapin requests */
     83 #endif /* PPDEBUG */
     84 
     85 /*
     86  * There are only 64 TLB entries on spitfire, 16 on cheetah
     87  * (fully-associative TLB) so we allow the cpu module to tune the
     88  * number to use here via pp_slots.
     89  */
     90 static struct ppmap_va {
     91 	caddr_t	ppmap_slots[MAXPP_SLOTS];
     92 } ppmap_va[NCPU];
     93 
     94 /* prevent compilation with VAC defined */
     95 #ifdef VAC
     96 #error "sun4v ppmapin and ppmapout do not support VAC"
     97 #endif
     98 
     99 void
    100 ppmapinit(void)
    101 {
    102 	int nset;
    103 	caddr_t va;
    104 
    105 	ASSERT(pp_slots <= MAXPP_SLOTS);
    106 
    107 	va = (caddr_t)PPMAPBASE;
    108 
    109 	/*
    110 	 * sun4v does not have a virtual indexed cache and simply
    111 	 * has only one set containing all pages.
    112 	 */
    113 	nsets = mmu_btop(PPMAPSIZE);
    114 	ppmap_shift = MMU_PAGESHIFT;
    115 
    116 	for (nset = 0; nset < nsets; nset++) {
    117 		ppmap_vaddrs[nset] =
    118 		    (caddr_t)((uintptr_t)va + (nset * MMU_PAGESIZE));
    119 	}
    120 }
    121 
    122 /*
    123  * Allocate a cache consistent virtual address to map a page, pp,
    124  * with protection, vprot; and map it in the MMU, using the most
    125  * efficient means possible.  The argument avoid is a virtual address
    126  * hint which when masked yields an offset into a virtual cache
    127  * that should be avoided when allocating an address to map in a
    128  * page.  An avoid arg of -1 means you don't care, for instance pagezero.
    129  *
    130  * machine dependent, depends on virtual address space layout,
    131  * understands that all kernel addresses have bit 31 set.
    132  *
    133  * NOTE: For sun4 platforms the meaning of the hint argument is opposite from
    134  * that found in other architectures.  In other architectures the hint
    135  * (called avoid) was used to ask ppmapin to NOT use the specified cache color.
    136  * This was used to avoid virtual cache trashing in the bcopy.  Unfortunately
    137  * in the case of a COW,  this later on caused a cache aliasing conflict.  In
    138  * sun4, the bcopy routine uses the block ld/st instructions so we don't have
    139  * to worry about virtual cache trashing.  Actually, by using the hint to choose
    140  * the right color we can almost guarantee a cache conflict will not occur.
    141  */
    142 
    143 /*ARGSUSED2*/
    144 caddr_t
    145 ppmapin(page_t *pp, uint_t vprot, caddr_t hint)
    146 {
    147 	int nset;
    148 	caddr_t va;
    149 
    150 #ifdef PPDEBUG
    151 	pp_allocs++;
    152 #endif /* PPDEBUG */
    153 
    154 	/*
    155 	 * For sun4v caches are physical caches, we can pick any address
    156 	 * we want.
    157 	 */
    158 	for (nset = 0; nset < nsets; nset++) {
    159 		va = ppmap_vaddrs[nset];
    160 		if (va != NULL) {
    161 #ifdef PPDEBUG
    162 			align_hits++;
    163 #endif /* PPDEBUG */
    164 			if (casptr(&ppmap_vaddrs[nset], va, NULL) == va) {
    165 				hat_memload(kas.a_hat, va, pp,
    166 				    vprot | HAT_NOSYNC,
    167 				    HAT_LOAD_LOCK);
    168 				return (va);
    169 			}
    170 		}
    171 	}
    172 
    173 #ifdef PPDEBUG
    174 	ppalloc_noslot++;
    175 #endif /* PPDEBUG */
    176 
    177 	/*
    178 	 * No free slots; get a random one from the kernel heap area.
    179 	 */
    180 	va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
    181 
    182 	hat_memload(kas.a_hat, va, pp, vprot | HAT_NOSYNC, HAT_LOAD_LOCK);
    183 
    184 	return (va);
    185 
    186 }
    187 
    188 void
    189 ppmapout(caddr_t va)
    190 {
    191 	int nset;
    192 
    193 	if (va >= kernelheap && va < ekernelheap) {
    194 		/*
    195 		 * Space came from kernelmap, flush the page and
    196 		 * return the space.
    197 		 */
    198 		hat_unload(kas.a_hat, va, PAGESIZE,
    199 		    (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK));
    200 		vmem_free(heap_arena, va, PAGESIZE);
    201 	} else {
    202 		/*
    203 		 * Space came from ppmap_vaddrs[], give it back.
    204 		 */
    205 		nset = ((uintptr_t)va >> ppmap_shift) & (nsets - 1);
    206 		hat_unload(kas.a_hat, va, PAGESIZE,
    207 		    (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK));
    208 
    209 		ASSERT(ppmap_vaddrs[nset] == NULL);
    210 		ppmap_vaddrs[nset] = va;
    211 	}
    212 }
    213 
    214 #ifdef DEBUG
    215 #define	PP_STAT_ADD(stat)	(stat)++
    216 uint_t pload, ploadfail;
    217 uint_t ppzero, ppzero_short;
    218 #else
    219 #define	PP_STAT_ADD(stat)
    220 #endif /* DEBUG */
    221 
    222 static void
    223 pp_unload_tlb(caddr_t *pslot, caddr_t va)
    224 {
    225 	ASSERT(*pslot == va);
    226 
    227 	vtag_flushpage(va, (uint64_t)ksfmmup);
    228 	*pslot = NULL;				/* release the slot */
    229 }
    230 
    231 /*
    232  * Routine to copy kernel pages during relocation.  It will copy one
    233  * PAGESIZE page to another PAGESIZE page.  This function may be called
    234  * above LOCK_LEVEL so it should not grab any locks.
    235  */
    236 void
    237 ppcopy_kernel__relocatable(page_t *fm_pp, page_t *to_pp)
    238 {
    239 	uint64_t fm_pa, to_pa;
    240 	size_t nbytes;
    241 
    242 	fm_pa = (uint64_t)(fm_pp->p_pagenum) << MMU_PAGESHIFT;
    243 	to_pa = (uint64_t)(to_pp->p_pagenum) << MMU_PAGESHIFT;
    244 
    245 	nbytes = MMU_PAGESIZE;
    246 
    247 	for (; nbytes > 0; fm_pa += 32, to_pa += 32, nbytes -= 32)
    248 		hw_pa_bcopy32(fm_pa, to_pa);
    249 }
    250 
    251 /*
    252  * Copy the data from the physical page represented by "frompp" to
    253  * that represented by "topp".
    254  *
    255  * Try to use per cpu mapping first, if that fails then call pp_mapin
    256  * to load it.
    257  * Returns one on success or zero on some sort of fault while doing the copy.
    258  */
    259 int
    260 ppcopy(page_t *fm_pp, page_t *to_pp)
    261 {
    262 	caddr_t fm_va = NULL;
    263 	caddr_t to_va;
    264 	boolean_t fast;
    265 	label_t ljb;
    266 	int ret = 1;
    267 
    268 	ASSERT(PAGE_LOCKED(fm_pp));
    269 	ASSERT(PAGE_LOCKED(to_pp));
    270 
    271 	/*
    272 	 * Try to map using KPM if enabled.  If it fails, fall
    273 	 * back to ppmapin/ppmapout.
    274 	 */
    275 	if ((kpm_enable == 0) ||
    276 	    (fm_va = hat_kpm_mapin(fm_pp, NULL)) == NULL ||
    277 	    (to_va = hat_kpm_mapin(to_pp, NULL)) == NULL) {
    278 		if (fm_va != NULL)
    279 			hat_kpm_mapout(fm_pp, NULL, fm_va);
    280 		fm_va = ppmapin(fm_pp, PROT_READ, (caddr_t)-1);
    281 		to_va = ppmapin(to_pp, PROT_READ | PROT_WRITE, fm_va);
    282 		fast = B_FALSE;
    283 	} else
    284 		fast = B_TRUE;
    285 
    286 	if (on_fault(&ljb)) {
    287 		ret = 0;
    288 		goto faulted;
    289 	}
    290 	bcopy(fm_va, to_va, PAGESIZE);
    291 	no_fault();
    292 faulted:
    293 
    294 	/* Unmap */
    295 	if (fast) {
    296 		hat_kpm_mapout(fm_pp, NULL, fm_va);
    297 		hat_kpm_mapout(to_pp, NULL, to_va);
    298 	} else {
    299 		ppmapout(fm_va);
    300 		ppmapout(to_va);
    301 	}
    302 	return (ret);
    303 }
    304 
    305 /*
    306  * Zero the physical page from off to off + len given by `pp'
    307  * without changing the reference and modified bits of page.
    308  *
    309  * Again, we'll try per cpu mapping first.
    310  */
    311 
    312 void
    313 pagezero(page_t *pp, uint_t off, uint_t len)
    314 {
    315 	caddr_t va;
    316 	extern int hwblkclr(void *, size_t);
    317 	extern int use_hw_bzero;
    318 	boolean_t fast;
    319 
    320 	ASSERT((int)len > 0 && (int)off >= 0 && off + len <= PAGESIZE);
    321 	ASSERT(PAGE_LOCKED(pp));
    322 
    323 	PP_STAT_ADD(ppzero);
    324 
    325 	if (len != MMU_PAGESIZE || !use_hw_bzero) {
    326 		PP_STAT_ADD(ppzero_short);
    327 	}
    328 
    329 	kpreempt_disable();
    330 
    331 	/*
    332 	 * Try to use KPM if enabled.  If that fails, fall back to
    333 	 * ppmapin/ppmapout.
    334 	 */
    335 
    336 	if (kpm_enable != 0) {
    337 		fast = B_TRUE;
    338 		va = hat_kpm_mapin(pp, NULL);
    339 	} else
    340 		va = NULL;
    341 
    342 	if (va == NULL) {
    343 		fast = B_FALSE;
    344 		va = ppmapin(pp, PROT_READ | PROT_WRITE, (caddr_t)-1);
    345 	}
    346 
    347 	if (!use_hw_bzero) {
    348 		bzero(va + off, len);
    349 		sync_icache(va + off, len);
    350 	} else if (hwblkclr(va + off, len)) {
    351 		/*
    352 		 * We may not have used block commit asi.
    353 		 * So flush the I-$ manually
    354 		 */
    355 		sync_icache(va + off, len);
    356 	} else {
    357 		/*
    358 		 * We have used blk commit, and flushed the I-$.
    359 		 * However we still may have an instruction in the
    360 		 * pipeline. Only a flush will invalidate that.
    361 		 */
    362 		doflush(va);
    363 	}
    364 
    365 	if (fast) {
    366 		hat_kpm_mapout(pp, NULL, va);
    367 	} else {
    368 		ppmapout(va);
    369 	}
    370 	kpreempt_enable();
    371 }
    372