1 0 stevel /* 2 0 stevel * CDDL HEADER START 3 0 stevel * 4 0 stevel * The contents of this file are subject to the terms of the 5 1517 jb145095 * Common Development and Distribution License (the "License"). 6 1517 jb145095 * You may not use this file except in compliance with the License. 7 0 stevel * 8 0 stevel * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 0 stevel * or http://www.opensolaris.org/os/licensing. 10 0 stevel * See the License for the specific language governing permissions 11 0 stevel * and limitations under the License. 12 0 stevel * 13 0 stevel * When distributing Covered Code, include this CDDL HEADER in each 14 0 stevel * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 0 stevel * If applicable, add the following below this CDDL HEADER, with the 16 0 stevel * fields enclosed by brackets "[]" replaced with your own identifying 17 0 stevel * information: Portions Copyright [yyyy] [name of copyright owner] 18 0 stevel * 19 0 stevel * CDDL HEADER END 20 0 stevel */ 21 0 stevel /* 22 6461 cb222892 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 0 stevel * Use is subject to license terms. 24 0 stevel */ 25 0 stevel 26 0 stevel #pragma ident "%Z%%M% %I% %E% SMI" 27 0 stevel 28 0 stevel #include <sys/types.h> 29 0 stevel #include <sys/systm.h> 30 0 stevel #include <sys/archsystm.h> 31 0 stevel #include <sys/machsystm.h> 32 0 stevel #include <sys/t_lock.h> 33 0 stevel #include <sys/vmem.h> 34 0 stevel #include <sys/mman.h> 35 0 stevel #include <sys/vm.h> 36 0 stevel #include <sys/cpu.h> 37 0 stevel #include <sys/cmn_err.h> 38 0 stevel #include <sys/cpuvar.h> 39 0 stevel #include <sys/atomic.h> 40 0 stevel #include <vm/as.h> 41 0 stevel #include <vm/hat.h> 42 0 stevel #include <vm/as.h> 43 0 stevel #include <vm/page.h> 44 0 stevel #include <vm/seg.h> 45 0 stevel #include <vm/seg_kmem.h> 46 6461 cb222892 #include <vm/seg_kpm.h> 47 0 stevel #include <vm/hat_sfmmu.h> 48 0 stevel #include <sys/debug.h> 49 0 stevel #include <sys/cpu_module.h> 50 0 stevel 51 0 stevel /* 52 0 stevel * A quick way to generate a cache consistent address to map in a page. 53 0 stevel * users: ppcopy, pagezero, /proc, dev/mem 54 0 stevel * 55 0 stevel * The ppmapin/ppmapout routines provide a quick way of generating a cache 56 0 stevel * consistent address by reserving a given amount of kernel address space. 57 0 stevel * The base is PPMAPBASE and its size is PPMAPSIZE. This memory is divided 58 0 stevel * into x number of sets, where x is the number of colors for the virtual 59 0 stevel * cache. The number of colors is how many times a page can be mapped 60 0 stevel * simulatenously in the cache. For direct map caches this translates to 61 0 stevel * the number of pages in the cache. 62 0 stevel * Each set will be assigned a group of virtual pages from the reserved memory 63 0 stevel * depending on its virtual color. 64 0 stevel * When trying to assign a virtual address we will find out the color for the 65 0 stevel * physical page in question (if applicable). Then we will try to find an 66 0 stevel * available virtual page from the set of the appropiate color. 67 0 stevel */ 68 0 stevel 69 0 stevel int pp_slots = 4; /* small default, tuned by cpu module */ 70 0 stevel 71 0 stevel /* tuned by cpu module, default is "safe" */ 72 0 stevel int pp_consistent_coloring = PPAGE_STORES_POLLUTE | PPAGE_LOADS_POLLUTE; 73 0 stevel 74 0 stevel static caddr_t ppmap_vaddrs[PPMAPSIZE / MMU_PAGESIZE]; 75 0 stevel static int nsets; /* number of sets */ 76 0 stevel static int ppmap_shift; /* set selector */ 77 0 stevel 78 0 stevel #ifdef PPDEBUG 79 0 stevel #define MAXCOLORS 16 /* for debug only */ 80 0 stevel static int ppalloc_noslot = 0; /* # of allocations from kernelmap */ 81 2296 ae112802 static int align_hits; 82 0 stevel static int pp_allocs; /* # of ppmapin requests */ 83 0 stevel #endif /* PPDEBUG */ 84 0 stevel 85 0 stevel /* 86 0 stevel * There are only 64 TLB entries on spitfire, 16 on cheetah 87 0 stevel * (fully-associative TLB) so we allow the cpu module to tune the 88 0 stevel * number to use here via pp_slots. 89 0 stevel */ 90 0 stevel static struct ppmap_va { 91 0 stevel caddr_t ppmap_slots[MAXPP_SLOTS]; 92 0 stevel } ppmap_va[NCPU]; 93 0 stevel 94 2296 ae112802 /* prevent compilation with VAC defined */ 95 2296 ae112802 #ifdef VAC 96 2296 ae112802 #error "sun4v ppmapin and ppmapout do not support VAC" 97 2296 ae112802 #endif 98 2296 ae112802 99 0 stevel void 100 0 stevel ppmapinit(void) 101 0 stevel { 102 2296 ae112802 int nset; 103 0 stevel caddr_t va; 104 0 stevel 105 0 stevel ASSERT(pp_slots <= MAXPP_SLOTS); 106 0 stevel 107 0 stevel va = (caddr_t)PPMAPBASE; 108 0 stevel 109 2296 ae112802 /* 110 2296 ae112802 * sun4v does not have a virtual indexed cache and simply 111 2296 ae112802 * has only one set containing all pages. 112 2296 ae112802 */ 113 2296 ae112802 nsets = mmu_btop(PPMAPSIZE); 114 2296 ae112802 ppmap_shift = MMU_PAGESHIFT; 115 2296 ae112802 116 2296 ae112802 for (nset = 0; nset < nsets; nset++) { 117 2296 ae112802 ppmap_vaddrs[nset] = 118 2296 ae112802 (caddr_t)((uintptr_t)va + (nset * MMU_PAGESIZE)); 119 0 stevel } 120 0 stevel } 121 0 stevel 122 0 stevel /* 123 0 stevel * Allocate a cache consistent virtual address to map a page, pp, 124 0 stevel * with protection, vprot; and map it in the MMU, using the most 125 0 stevel * efficient means possible. The argument avoid is a virtual address 126 0 stevel * hint which when masked yields an offset into a virtual cache 127 0 stevel * that should be avoided when allocating an address to map in a 128 0 stevel * page. An avoid arg of -1 means you don't care, for instance pagezero. 129 0 stevel * 130 0 stevel * machine dependent, depends on virtual address space layout, 131 0 stevel * understands that all kernel addresses have bit 31 set. 132 0 stevel * 133 0 stevel * NOTE: For sun4 platforms the meaning of the hint argument is opposite from 134 0 stevel * that found in other architectures. In other architectures the hint 135 0 stevel * (called avoid) was used to ask ppmapin to NOT use the specified cache color. 136 0 stevel * This was used to avoid virtual cache trashing in the bcopy. Unfortunately 137 0 stevel * in the case of a COW, this later on caused a cache aliasing conflict. In 138 0 stevel * sun4, the bcopy routine uses the block ld/st instructions so we don't have 139 0 stevel * to worry about virtual cache trashing. Actually, by using the hint to choose 140 0 stevel * the right color we can almost guarantee a cache conflict will not occur. 141 0 stevel */ 142 0 stevel 143 2296 ae112802 /*ARGSUSED2*/ 144 0 stevel caddr_t 145 0 stevel ppmapin(page_t *pp, uint_t vprot, caddr_t hint) 146 0 stevel { 147 2296 ae112802 int nset; 148 0 stevel caddr_t va; 149 0 stevel 150 0 stevel #ifdef PPDEBUG 151 0 stevel pp_allocs++; 152 0 stevel #endif /* PPDEBUG */ 153 2296 ae112802 154 2296 ae112802 /* 155 2296 ae112802 * For sun4v caches are physical caches, we can pick any address 156 2296 ae112802 * we want. 157 2296 ae112802 */ 158 2296 ae112802 for (nset = 0; nset < nsets; nset++) { 159 2296 ae112802 va = ppmap_vaddrs[nset]; 160 2296 ae112802 if (va != NULL) { 161 2296 ae112802 #ifdef PPDEBUG 162 2296 ae112802 align_hits++; 163 2296 ae112802 #endif /* PPDEBUG */ 164 2296 ae112802 if (casptr(&ppmap_vaddrs[nset], va, NULL) == va) { 165 2296 ae112802 hat_memload(kas.a_hat, va, pp, 166 6461 cb222892 vprot | HAT_NOSYNC, 167 6461 cb222892 HAT_LOAD_LOCK); 168 2296 ae112802 return (va); 169 0 stevel } 170 0 stevel } 171 0 stevel } 172 0 stevel 173 0 stevel #ifdef PPDEBUG 174 0 stevel ppalloc_noslot++; 175 0 stevel #endif /* PPDEBUG */ 176 0 stevel 177 0 stevel /* 178 0 stevel * No free slots; get a random one from the kernel heap area. 179 0 stevel */ 180 0 stevel va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 181 0 stevel 182 0 stevel hat_memload(kas.a_hat, va, pp, vprot | HAT_NOSYNC, HAT_LOAD_LOCK); 183 0 stevel 184 0 stevel return (va); 185 0 stevel 186 0 stevel } 187 0 stevel 188 0 stevel void 189 0 stevel ppmapout(caddr_t va) 190 0 stevel { 191 2296 ae112802 int nset; 192 0 stevel 193 0 stevel if (va >= kernelheap && va < ekernelheap) { 194 0 stevel /* 195 0 stevel * Space came from kernelmap, flush the page and 196 0 stevel * return the space. 197 0 stevel */ 198 0 stevel hat_unload(kas.a_hat, va, PAGESIZE, 199 0 stevel (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK)); 200 0 stevel vmem_free(heap_arena, va, PAGESIZE); 201 0 stevel } else { 202 0 stevel /* 203 0 stevel * Space came from ppmap_vaddrs[], give it back. 204 0 stevel */ 205 0 stevel nset = ((uintptr_t)va >> ppmap_shift) & (nsets - 1); 206 0 stevel hat_unload(kas.a_hat, va, PAGESIZE, 207 0 stevel (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK)); 208 0 stevel 209 2296 ae112802 ASSERT(ppmap_vaddrs[nset] == NULL); 210 2296 ae112802 ppmap_vaddrs[nset] = va; 211 0 stevel } 212 0 stevel } 213 0 stevel 214 0 stevel #ifdef DEBUG 215 0 stevel #define PP_STAT_ADD(stat) (stat)++ 216 0 stevel uint_t pload, ploadfail; 217 0 stevel uint_t ppzero, ppzero_short; 218 0 stevel #else 219 0 stevel #define PP_STAT_ADD(stat) 220 0 stevel #endif /* DEBUG */ 221 0 stevel 222 0 stevel static void 223 0 stevel pp_unload_tlb(caddr_t *pslot, caddr_t va) 224 0 stevel { 225 0 stevel ASSERT(*pslot == va); 226 0 stevel 227 2241 huah vtag_flushpage(va, (uint64_t)ksfmmup); 228 0 stevel *pslot = NULL; /* release the slot */ 229 0 stevel } 230 0 stevel 231 0 stevel /* 232 0 stevel * Routine to copy kernel pages during relocation. It will copy one 233 0 stevel * PAGESIZE page to another PAGESIZE page. This function may be called 234 0 stevel * above LOCK_LEVEL so it should not grab any locks. 235 0 stevel */ 236 0 stevel void 237 0 stevel ppcopy_kernel__relocatable(page_t *fm_pp, page_t *to_pp) 238 0 stevel { 239 0 stevel uint64_t fm_pa, to_pa; 240 0 stevel size_t nbytes; 241 0 stevel 242 0 stevel fm_pa = (uint64_t)(fm_pp->p_pagenum) << MMU_PAGESHIFT; 243 0 stevel to_pa = (uint64_t)(to_pp->p_pagenum) << MMU_PAGESHIFT; 244 0 stevel 245 0 stevel nbytes = MMU_PAGESIZE; 246 0 stevel 247 0 stevel for (; nbytes > 0; fm_pa += 32, to_pa += 32, nbytes -= 32) 248 0 stevel hw_pa_bcopy32(fm_pa, to_pa); 249 0 stevel } 250 0 stevel 251 0 stevel /* 252 0 stevel * Copy the data from the physical page represented by "frompp" to 253 0 stevel * that represented by "topp". 254 0 stevel * 255 0 stevel * Try to use per cpu mapping first, if that fails then call pp_mapin 256 0 stevel * to load it. 257 3253 mec * Returns one on success or zero on some sort of fault while doing the copy. 258 0 stevel */ 259 3253 mec int 260 0 stevel ppcopy(page_t *fm_pp, page_t *to_pp) 261 0 stevel { 262 6461 cb222892 caddr_t fm_va = NULL; 263 1517 jb145095 caddr_t to_va; 264 1517 jb145095 boolean_t fast; 265 3253 mec label_t ljb; 266 3253 mec int ret = 1; 267 0 stevel 268 1517 jb145095 ASSERT(PAGE_LOCKED(fm_pp)); 269 1517 jb145095 ASSERT(PAGE_LOCKED(to_pp)); 270 1517 jb145095 271 1517 jb145095 /* 272 6461 cb222892 * Try to map using KPM if enabled. If it fails, fall 273 6461 cb222892 * back to ppmapin/ppmapout. 274 1517 jb145095 */ 275 6461 cb222892 if ((kpm_enable == 0) || 276 6461 cb222892 (fm_va = hat_kpm_mapin(fm_pp, NULL)) == NULL || 277 1517 jb145095 (to_va = hat_kpm_mapin(to_pp, NULL)) == NULL) { 278 1517 jb145095 if (fm_va != NULL) 279 1517 jb145095 hat_kpm_mapout(fm_pp, NULL, fm_va); 280 1517 jb145095 fm_va = ppmapin(fm_pp, PROT_READ, (caddr_t)-1); 281 1517 jb145095 to_va = ppmapin(to_pp, PROT_READ | PROT_WRITE, fm_va); 282 1517 jb145095 fast = B_FALSE; 283 1517 jb145095 } else 284 1517 jb145095 fast = B_TRUE; 285 1517 jb145095 286 3253 mec if (on_fault(&ljb)) { 287 3253 mec ret = 0; 288 3253 mec goto faulted; 289 3253 mec } 290 0 stevel bcopy(fm_va, to_va, PAGESIZE); 291 3253 mec no_fault(); 292 3253 mec faulted: 293 1517 jb145095 294 1517 jb145095 /* Unmap */ 295 1517 jb145095 if (fast) { 296 1517 jb145095 hat_kpm_mapout(fm_pp, NULL, fm_va); 297 1517 jb145095 hat_kpm_mapout(to_pp, NULL, to_va); 298 1517 jb145095 } else { 299 1517 jb145095 ppmapout(fm_va); 300 1517 jb145095 ppmapout(to_va); 301 1517 jb145095 } 302 3253 mec return (ret); 303 0 stevel } 304 0 stevel 305 0 stevel /* 306 0 stevel * Zero the physical page from off to off + len given by `pp' 307 0 stevel * without changing the reference and modified bits of page. 308 0 stevel * 309 0 stevel * Again, we'll try per cpu mapping first. 310 0 stevel */ 311 1517 jb145095 312 0 stevel void 313 0 stevel pagezero(page_t *pp, uint_t off, uint_t len) 314 0 stevel { 315 0 stevel caddr_t va; 316 0 stevel extern int hwblkclr(void *, size_t); 317 0 stevel extern int use_hw_bzero; 318 1517 jb145095 boolean_t fast; 319 0 stevel 320 0 stevel ASSERT((int)len > 0 && (int)off >= 0 && off + len <= PAGESIZE); 321 0 stevel ASSERT(PAGE_LOCKED(pp)); 322 0 stevel 323 0 stevel PP_STAT_ADD(ppzero); 324 0 stevel 325 0 stevel if (len != MMU_PAGESIZE || !use_hw_bzero) { 326 0 stevel PP_STAT_ADD(ppzero_short); 327 0 stevel } 328 0 stevel 329 0 stevel kpreempt_disable(); 330 0 stevel 331 1517 jb145095 /* 332 6461 cb222892 * Try to use KPM if enabled. If that fails, fall back to 333 1517 jb145095 * ppmapin/ppmapout. 334 1517 jb145095 */ 335 6461 cb222892 336 6461 cb222892 if (kpm_enable != 0) { 337 6461 cb222892 fast = B_TRUE; 338 6461 cb222892 va = hat_kpm_mapin(pp, NULL); 339 6461 cb222892 } else 340 6461 cb222892 va = NULL; 341 6461 cb222892 342 1517 jb145095 if (va == NULL) { 343 1517 jb145095 fast = B_FALSE; 344 1517 jb145095 va = ppmapin(pp, PROT_READ | PROT_WRITE, (caddr_t)-1); 345 1517 jb145095 } 346 0 stevel 347 0 stevel if (!use_hw_bzero) { 348 0 stevel bzero(va + off, len); 349 0 stevel sync_icache(va + off, len); 350 0 stevel } else if (hwblkclr(va + off, len)) { 351 0 stevel /* 352 0 stevel * We may not have used block commit asi. 353 0 stevel * So flush the I-$ manually 354 0 stevel */ 355 0 stevel sync_icache(va + off, len); 356 0 stevel } else { 357 0 stevel /* 358 1517 jb145095 * We have used blk commit, and flushed the I-$. 359 1517 jb145095 * However we still may have an instruction in the 360 1517 jb145095 * pipeline. Only a flush will invalidate that. 361 0 stevel */ 362 0 stevel doflush(va); 363 0 stevel } 364 0 stevel 365 1517 jb145095 if (fast) { 366 1517 jb145095 hat_kpm_mapout(pp, NULL, va); 367 1517 jb145095 } else { 368 1517 jb145095 ppmapout(va); 369 1517 jb145095 } 370 0 stevel kpreempt_enable(); 371 0 stevel } 372