Home | History | Annotate | Download | only in os
      1     0    stevel /*
      2     0    stevel  * CDDL HEADER START
      3     0    stevel  *
      4     0    stevel  * The contents of this file are subject to the terms of the
      5  1517  jb145095  * Common Development and Distribution License (the "License").
      6  1517  jb145095  * You may not use this file except in compliance with the License.
      7     0    stevel  *
      8     0    stevel  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9     0    stevel  * or http://www.opensolaris.org/os/licensing.
     10     0    stevel  * See the License for the specific language governing permissions
     11     0    stevel  * and limitations under the License.
     12     0    stevel  *
     13     0    stevel  * When distributing Covered Code, include this CDDL HEADER in each
     14     0    stevel  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15     0    stevel  * If applicable, add the following below this CDDL HEADER, with the
     16     0    stevel  * fields enclosed by brackets "[]" replaced with your own identifying
     17     0    stevel  * information: Portions Copyright [yyyy] [name of copyright owner]
     18     0    stevel  *
     19     0    stevel  * CDDL HEADER END
     20     0    stevel  */
     21     0    stevel /*
     22  6461  cb222892  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     23     0    stevel  * Use is subject to license terms.
     24     0    stevel  */
     25     0    stevel 
     26     0    stevel #pragma ident	"%Z%%M%	%I%	%E% SMI"
     27     0    stevel 
     28     0    stevel #include <sys/types.h>
     29     0    stevel #include <sys/systm.h>
     30     0    stevel #include <sys/archsystm.h>
     31     0    stevel #include <sys/machsystm.h>
     32     0    stevel #include <sys/t_lock.h>
     33     0    stevel #include <sys/vmem.h>
     34     0    stevel #include <sys/mman.h>
     35     0    stevel #include <sys/vm.h>
     36     0    stevel #include <sys/cpu.h>
     37     0    stevel #include <sys/cmn_err.h>
     38     0    stevel #include <sys/cpuvar.h>
     39     0    stevel #include <sys/atomic.h>
     40     0    stevel #include <vm/as.h>
     41     0    stevel #include <vm/hat.h>
     42     0    stevel #include <vm/as.h>
     43     0    stevel #include <vm/page.h>
     44     0    stevel #include <vm/seg.h>
     45     0    stevel #include <vm/seg_kmem.h>
     46  6461  cb222892 #include <vm/seg_kpm.h>
     47     0    stevel #include <vm/hat_sfmmu.h>
     48     0    stevel #include <sys/debug.h>
     49     0    stevel #include <sys/cpu_module.h>
     50     0    stevel 
     51     0    stevel /*
     52     0    stevel  * A quick way to generate a cache consistent address to map in a page.
     53     0    stevel  * users: ppcopy, pagezero, /proc, dev/mem
     54     0    stevel  *
     55     0    stevel  * The ppmapin/ppmapout routines provide a quick way of generating a cache
     56     0    stevel  * consistent address by reserving a given amount of kernel address space.
     57     0    stevel  * The base is PPMAPBASE and its size is PPMAPSIZE.  This memory is divided
     58     0    stevel  * into x number of sets, where x is the number of colors for the virtual
     59     0    stevel  * cache. The number of colors is how many times a page can be mapped
     60     0    stevel  * simulatenously in the cache.  For direct map caches this translates to
     61     0    stevel  * the number of pages in the cache.
     62     0    stevel  * Each set will be assigned a group of virtual pages from the reserved memory
     63     0    stevel  * depending on its virtual color.
     64     0    stevel  * When trying to assign a virtual address we will find out the color for the
     65     0    stevel  * physical page in question (if applicable).  Then we will try to find an
     66     0    stevel  * available virtual page from the set of the appropiate color.
     67     0    stevel  */
     68     0    stevel 
     69     0    stevel int pp_slots = 4;		/* small default, tuned by cpu module */
     70     0    stevel 
     71     0    stevel /* tuned by cpu module, default is "safe" */
     72     0    stevel int pp_consistent_coloring = PPAGE_STORES_POLLUTE | PPAGE_LOADS_POLLUTE;
     73     0    stevel 
     74     0    stevel static caddr_t	ppmap_vaddrs[PPMAPSIZE / MMU_PAGESIZE];
     75     0    stevel static int	nsets;			/* number of sets */
     76     0    stevel static int	ppmap_shift;		/* set selector */
     77     0    stevel 
     78     0    stevel #ifdef PPDEBUG
     79     0    stevel #define		MAXCOLORS	16	/* for debug only */
     80     0    stevel static int	ppalloc_noslot = 0;	/* # of allocations from kernelmap */
     81  2296  ae112802 static int	align_hits;
     82     0    stevel static int	pp_allocs;		/* # of ppmapin requests */
     83     0    stevel #endif /* PPDEBUG */
     84     0    stevel 
     85     0    stevel /*
     86     0    stevel  * There are only 64 TLB entries on spitfire, 16 on cheetah
     87     0    stevel  * (fully-associative TLB) so we allow the cpu module to tune the
     88     0    stevel  * number to use here via pp_slots.
     89     0    stevel  */
     90     0    stevel static struct ppmap_va {
     91     0    stevel 	caddr_t	ppmap_slots[MAXPP_SLOTS];
     92     0    stevel } ppmap_va[NCPU];
     93     0    stevel 
     94  2296  ae112802 /* prevent compilation with VAC defined */
     95  2296  ae112802 #ifdef VAC
     96  2296  ae112802 #error "sun4v ppmapin and ppmapout do not support VAC"
     97  2296  ae112802 #endif
     98  2296  ae112802 
     99     0    stevel void
    100     0    stevel ppmapinit(void)
    101     0    stevel {
    102  2296  ae112802 	int nset;
    103     0    stevel 	caddr_t va;
    104     0    stevel 
    105     0    stevel 	ASSERT(pp_slots <= MAXPP_SLOTS);
    106     0    stevel 
    107     0    stevel 	va = (caddr_t)PPMAPBASE;
    108     0    stevel 
    109  2296  ae112802 	/*
    110  2296  ae112802 	 * sun4v does not have a virtual indexed cache and simply
    111  2296  ae112802 	 * has only one set containing all pages.
    112  2296  ae112802 	 */
    113  2296  ae112802 	nsets = mmu_btop(PPMAPSIZE);
    114  2296  ae112802 	ppmap_shift = MMU_PAGESHIFT;
    115  2296  ae112802 
    116  2296  ae112802 	for (nset = 0; nset < nsets; nset++) {
    117  2296  ae112802 		ppmap_vaddrs[nset] =
    118  2296  ae112802 		    (caddr_t)((uintptr_t)va + (nset * MMU_PAGESIZE));
    119     0    stevel 	}
    120     0    stevel }
    121     0    stevel 
    122     0    stevel /*
    123     0    stevel  * Allocate a cache consistent virtual address to map a page, pp,
    124     0    stevel  * with protection, vprot; and map it in the MMU, using the most
    125     0    stevel  * efficient means possible.  The argument avoid is a virtual address
    126     0    stevel  * hint which when masked yields an offset into a virtual cache
    127     0    stevel  * that should be avoided when allocating an address to map in a
    128     0    stevel  * page.  An avoid arg of -1 means you don't care, for instance pagezero.
    129     0    stevel  *
    130     0    stevel  * machine dependent, depends on virtual address space layout,
    131     0    stevel  * understands that all kernel addresses have bit 31 set.
    132     0    stevel  *
    133     0    stevel  * NOTE: For sun4 platforms the meaning of the hint argument is opposite from
    134     0    stevel  * that found in other architectures.  In other architectures the hint
    135     0    stevel  * (called avoid) was used to ask ppmapin to NOT use the specified cache color.
    136     0    stevel  * This was used to avoid virtual cache trashing in the bcopy.  Unfortunately
    137     0    stevel  * in the case of a COW,  this later on caused a cache aliasing conflict.  In
    138     0    stevel  * sun4, the bcopy routine uses the block ld/st instructions so we don't have
    139     0    stevel  * to worry about virtual cache trashing.  Actually, by using the hint to choose
    140     0    stevel  * the right color we can almost guarantee a cache conflict will not occur.
    141     0    stevel  */
    142     0    stevel 
    143  2296  ae112802 /*ARGSUSED2*/
    144     0    stevel caddr_t
    145     0    stevel ppmapin(page_t *pp, uint_t vprot, caddr_t hint)
    146     0    stevel {
    147  2296  ae112802 	int nset;
    148     0    stevel 	caddr_t va;
    149     0    stevel 
    150     0    stevel #ifdef PPDEBUG
    151     0    stevel 	pp_allocs++;
    152     0    stevel #endif /* PPDEBUG */
    153  2296  ae112802 
    154  2296  ae112802 	/*
    155  2296  ae112802 	 * For sun4v caches are physical caches, we can pick any address
    156  2296  ae112802 	 * we want.
    157  2296  ae112802 	 */
    158  2296  ae112802 	for (nset = 0; nset < nsets; nset++) {
    159  2296  ae112802 		va = ppmap_vaddrs[nset];
    160  2296  ae112802 		if (va != NULL) {
    161  2296  ae112802 #ifdef PPDEBUG
    162  2296  ae112802 			align_hits++;
    163  2296  ae112802 #endif /* PPDEBUG */
    164  2296  ae112802 			if (casptr(&ppmap_vaddrs[nset], va, NULL) == va) {
    165  2296  ae112802 				hat_memload(kas.a_hat, va, pp,
    166  6461  cb222892 				    vprot | HAT_NOSYNC,
    167  6461  cb222892 				    HAT_LOAD_LOCK);
    168  2296  ae112802 				return (va);
    169     0    stevel 			}
    170     0    stevel 		}
    171     0    stevel 	}
    172     0    stevel 
    173     0    stevel #ifdef PPDEBUG
    174     0    stevel 	ppalloc_noslot++;
    175     0    stevel #endif /* PPDEBUG */
    176     0    stevel 
    177     0    stevel 	/*
    178     0    stevel 	 * No free slots; get a random one from the kernel heap area.
    179     0    stevel 	 */
    180     0    stevel 	va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
    181     0    stevel 
    182     0    stevel 	hat_memload(kas.a_hat, va, pp, vprot | HAT_NOSYNC, HAT_LOAD_LOCK);
    183     0    stevel 
    184     0    stevel 	return (va);
    185     0    stevel 
    186     0    stevel }
    187     0    stevel 
    188     0    stevel void
    189     0    stevel ppmapout(caddr_t va)
    190     0    stevel {
    191  2296  ae112802 	int nset;
    192     0    stevel 
    193     0    stevel 	if (va >= kernelheap && va < ekernelheap) {
    194     0    stevel 		/*
    195     0    stevel 		 * Space came from kernelmap, flush the page and
    196     0    stevel 		 * return the space.
    197     0    stevel 		 */
    198     0    stevel 		hat_unload(kas.a_hat, va, PAGESIZE,
    199     0    stevel 		    (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK));
    200     0    stevel 		vmem_free(heap_arena, va, PAGESIZE);
    201     0    stevel 	} else {
    202     0    stevel 		/*
    203     0    stevel 		 * Space came from ppmap_vaddrs[], give it back.
    204     0    stevel 		 */
    205     0    stevel 		nset = ((uintptr_t)va >> ppmap_shift) & (nsets - 1);
    206     0    stevel 		hat_unload(kas.a_hat, va, PAGESIZE,
    207     0    stevel 		    (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK));
    208     0    stevel 
    209  2296  ae112802 		ASSERT(ppmap_vaddrs[nset] == NULL);
    210  2296  ae112802 		ppmap_vaddrs[nset] = va;
    211     0    stevel 	}
    212     0    stevel }
    213     0    stevel 
    214     0    stevel #ifdef DEBUG
    215     0    stevel #define	PP_STAT_ADD(stat)	(stat)++
    216     0    stevel uint_t pload, ploadfail;
    217     0    stevel uint_t ppzero, ppzero_short;
    218     0    stevel #else
    219     0    stevel #define	PP_STAT_ADD(stat)
    220     0    stevel #endif /* DEBUG */
    221     0    stevel 
    222     0    stevel static void
    223     0    stevel pp_unload_tlb(caddr_t *pslot, caddr_t va)
    224     0    stevel {
    225     0    stevel 	ASSERT(*pslot == va);
    226     0    stevel 
    227  2241      huah 	vtag_flushpage(va, (uint64_t)ksfmmup);
    228     0    stevel 	*pslot = NULL;				/* release the slot */
    229     0    stevel }
    230     0    stevel 
    231     0    stevel /*
    232     0    stevel  * Routine to copy kernel pages during relocation.  It will copy one
    233     0    stevel  * PAGESIZE page to another PAGESIZE page.  This function may be called
    234     0    stevel  * above LOCK_LEVEL so it should not grab any locks.
    235     0    stevel  */
    236     0    stevel void
    237     0    stevel ppcopy_kernel__relocatable(page_t *fm_pp, page_t *to_pp)
    238     0    stevel {
    239     0    stevel 	uint64_t fm_pa, to_pa;
    240     0    stevel 	size_t nbytes;
    241     0    stevel 
    242     0    stevel 	fm_pa = (uint64_t)(fm_pp->p_pagenum) << MMU_PAGESHIFT;
    243     0    stevel 	to_pa = (uint64_t)(to_pp->p_pagenum) << MMU_PAGESHIFT;
    244     0    stevel 
    245     0    stevel 	nbytes = MMU_PAGESIZE;
    246     0    stevel 
    247     0    stevel 	for (; nbytes > 0; fm_pa += 32, to_pa += 32, nbytes -= 32)
    248     0    stevel 		hw_pa_bcopy32(fm_pa, to_pa);
    249     0    stevel }
    250     0    stevel 
    251     0    stevel /*
    252     0    stevel  * Copy the data from the physical page represented by "frompp" to
    253     0    stevel  * that represented by "topp".
    254     0    stevel  *
    255     0    stevel  * Try to use per cpu mapping first, if that fails then call pp_mapin
    256     0    stevel  * to load it.
    257  3253       mec  * Returns one on success or zero on some sort of fault while doing the copy.
    258     0    stevel  */
    259  3253       mec int
    260     0    stevel ppcopy(page_t *fm_pp, page_t *to_pp)
    261     0    stevel {
    262  6461  cb222892 	caddr_t fm_va = NULL;
    263  1517  jb145095 	caddr_t to_va;
    264  1517  jb145095 	boolean_t fast;
    265  3253       mec 	label_t ljb;
    266  3253       mec 	int ret = 1;
    267     0    stevel 
    268  1517  jb145095 	ASSERT(PAGE_LOCKED(fm_pp));
    269  1517  jb145095 	ASSERT(PAGE_LOCKED(to_pp));
    270  1517  jb145095 
    271  1517  jb145095 	/*
    272  6461  cb222892 	 * Try to map using KPM if enabled.  If it fails, fall
    273  6461  cb222892 	 * back to ppmapin/ppmapout.
    274  1517  jb145095 	 */
    275  6461  cb222892 	if ((kpm_enable == 0) ||
    276  6461  cb222892 	    (fm_va = hat_kpm_mapin(fm_pp, NULL)) == NULL ||
    277  1517  jb145095 	    (to_va = hat_kpm_mapin(to_pp, NULL)) == NULL) {
    278  1517  jb145095 		if (fm_va != NULL)
    279  1517  jb145095 			hat_kpm_mapout(fm_pp, NULL, fm_va);
    280  1517  jb145095 		fm_va = ppmapin(fm_pp, PROT_READ, (caddr_t)-1);
    281  1517  jb145095 		to_va = ppmapin(to_pp, PROT_READ | PROT_WRITE, fm_va);
    282  1517  jb145095 		fast = B_FALSE;
    283  1517  jb145095 	} else
    284  1517  jb145095 		fast = B_TRUE;
    285  1517  jb145095 
    286  3253       mec 	if (on_fault(&ljb)) {
    287  3253       mec 		ret = 0;
    288  3253       mec 		goto faulted;
    289  3253       mec 	}
    290     0    stevel 	bcopy(fm_va, to_va, PAGESIZE);
    291  3253       mec 	no_fault();
    292  3253       mec faulted:
    293  1517  jb145095 
    294  1517  jb145095 	/* Unmap */
    295  1517  jb145095 	if (fast) {
    296  1517  jb145095 		hat_kpm_mapout(fm_pp, NULL, fm_va);
    297  1517  jb145095 		hat_kpm_mapout(to_pp, NULL, to_va);
    298  1517  jb145095 	} else {
    299  1517  jb145095 		ppmapout(fm_va);
    300  1517  jb145095 		ppmapout(to_va);
    301  1517  jb145095 	}
    302  3253       mec 	return (ret);
    303     0    stevel }
    304     0    stevel 
    305     0    stevel /*
    306     0    stevel  * Zero the physical page from off to off + len given by `pp'
    307     0    stevel  * without changing the reference and modified bits of page.
    308     0    stevel  *
    309     0    stevel  * Again, we'll try per cpu mapping first.
    310     0    stevel  */
    311  1517  jb145095 
    312     0    stevel void
    313     0    stevel pagezero(page_t *pp, uint_t off, uint_t len)
    314     0    stevel {
    315     0    stevel 	caddr_t va;
    316     0    stevel 	extern int hwblkclr(void *, size_t);
    317     0    stevel 	extern int use_hw_bzero;
    318  1517  jb145095 	boolean_t fast;
    319     0    stevel 
    320     0    stevel 	ASSERT((int)len > 0 && (int)off >= 0 && off + len <= PAGESIZE);
    321     0    stevel 	ASSERT(PAGE_LOCKED(pp));
    322     0    stevel 
    323     0    stevel 	PP_STAT_ADD(ppzero);
    324     0    stevel 
    325     0    stevel 	if (len != MMU_PAGESIZE || !use_hw_bzero) {
    326     0    stevel 		PP_STAT_ADD(ppzero_short);
    327     0    stevel 	}
    328     0    stevel 
    329     0    stevel 	kpreempt_disable();
    330     0    stevel 
    331  1517  jb145095 	/*
    332  6461  cb222892 	 * Try to use KPM if enabled.  If that fails, fall back to
    333  1517  jb145095 	 * ppmapin/ppmapout.
    334  1517  jb145095 	 */
    335  6461  cb222892 
    336  6461  cb222892 	if (kpm_enable != 0) {
    337  6461  cb222892 		fast = B_TRUE;
    338  6461  cb222892 		va = hat_kpm_mapin(pp, NULL);
    339  6461  cb222892 	} else
    340  6461  cb222892 		va = NULL;
    341  6461  cb222892 
    342  1517  jb145095 	if (va == NULL) {
    343  1517  jb145095 		fast = B_FALSE;
    344  1517  jb145095 		va = ppmapin(pp, PROT_READ | PROT_WRITE, (caddr_t)-1);
    345  1517  jb145095 	}
    346     0    stevel 
    347     0    stevel 	if (!use_hw_bzero) {
    348     0    stevel 		bzero(va + off, len);
    349     0    stevel 		sync_icache(va + off, len);
    350     0    stevel 	} else if (hwblkclr(va + off, len)) {
    351     0    stevel 		/*
    352     0    stevel 		 * We may not have used block commit asi.
    353     0    stevel 		 * So flush the I-$ manually
    354     0    stevel 		 */
    355     0    stevel 		sync_icache(va + off, len);
    356     0    stevel 	} else {
    357     0    stevel 		/*
    358  1517  jb145095 		 * We have used blk commit, and flushed the I-$.
    359  1517  jb145095 		 * However we still may have an instruction in the
    360  1517  jb145095 		 * pipeline. Only a flush will invalidate that.
    361     0    stevel 		 */
    362     0    stevel 		doflush(va);
    363     0    stevel 	}
    364     0    stevel 
    365  1517  jb145095 	if (fast) {
    366  1517  jb145095 		hat_kpm_mapout(pp, NULL, va);
    367  1517  jb145095 	} else {
    368  1517  jb145095 		ppmapout(va);
    369  1517  jb145095 	}
    370     0    stevel 	kpreempt_enable();
    371     0    stevel }
    372