1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/systm.h> 29 #include <sys/archsystm.h> 30 #include <sys/machparam.h> 31 #include <sys/machsystm.h> 32 #include <sys/cpu.h> 33 #include <sys/elf_SPARC.h> 34 #include <vm/page.h> 35 #include <vm/vm_dep.h> 36 #include <sys/cpuvar.h> 37 #include <sys/async.h> 38 #include <sys/cmn_err.h> 39 #include <sys/debug.h> 40 #include <sys/dditypes.h> 41 #include <sys/sunddi.h> 42 #include <sys/cpu_module.h> 43 #include <sys/prom_debug.h> 44 #include <sys/vmsystm.h> 45 #include <sys/prom_plat.h> 46 #include <sys/sysmacros.h> 47 #include <sys/intreg.h> 48 #include <sys/machtrap.h> 49 #include <sys/ontrap.h> 50 #include <sys/ivintr.h> 51 #include <sys/atomic.h> 52 #include <sys/panic.h> 53 #include <sys/dtrace.h> 54 #include <vm/seg_spt.h> 55 #include <sys/hypervisor_api.h> 56 #include <sys/rock_hypervisor_api.h> 57 #include <sys/hsvc.h> 58 #include <vm/hat_sfmmu.h> 59 #include <sys/mutex_impl.h> 60 61 uint_t root_phys_addr_lo_mask = 0xffffffffU; 62 uint8_t enable_tm = 1; 63 64 char cpu_module_name[] = "SUNW,UltraSPARC-AT10"; 65 boolean_t hsvc_tm_available = B_TRUE; 66 67 static hsvc_info_t rock_tm_hsvc = { 68 HSVC_REV_1, /* HSVC rev num */ 69 NULL, /* Private */ 70 HSVC_GROUP_TM, /* Requested API Group */ 71 ROCK_HSVC_MAJOR, /* Requested Major */ 72 ROCK_HSVC_MINOR, /* Requested Minor */ 73 cpu_module_name /* Module name */ 74 }; 75 76 boolean_t hsvc_mmu_ext_available = B_TRUE; 77 78 static hsvc_info_t rock_mmu_ext_hsvc = { 79 HSVC_REV_1, /* HSVC rev num */ 80 NULL, /* Private */ 81 HSVC_GROUP_RKMMU_EXT, /* Requested API Group */ 82 ROCK_HSVC_MAJOR, /* Requested Major */ 83 ROCK_HSVC_MINOR, /* Requested Minor */ 84 cpu_module_name /* Module name */ 85 }; 86 87 static void encode_pgsz_order(uint64_t, int, int, uint16_t *, uchar_t *); 88 static void set_pgsz_order(uchar_t, uchar_t, uint64_t *, int *, int *, 89 sfmmu_t *); 90 91 extern void rock_mutex_delay(void); 92 93 /* 94 * External /etc/system tunable, for controlling whether shared or private pages 95 * come first in the pagesize order register. 96 */ 97 int pgsz_order_shared_first = 1; 98 99 #define MCOREID_MASK 0x1E 100 #define MCOREID_SHIFT 1 101 102 static uint_t mmu_disable_large_pages = ((1 << TTE512K) | (1 << TTE32M) | 103 (1 << TTE2G) | (1 << TTE16G)); 104 static uint_t mmu_disable_ism_large_pages = ((1 << TTE512K) | (1 << TTE32M) | 105 (1 << TTE2G) | (1 << TTE16G)); 106 static uint_t mmu_disable_auto_data_large_pages = ((1 << TTE512K) | 107 (1 << TTE32M) | (1 << TTE2G) | (1 << TTE16G)); 108 static uint_t mmu_disable_auto_text_large_pages = ((1 << TTE512K) | 109 (1 << TTE32M) | (1 << TTE2G) | (1 << TTE16G)); 110 111 void 112 cpu_setup(void) 113 { 114 extern int cpc_has_overflow_intr; 115 uint64_t sup_minor; 116 int status; 117 118 /* 119 * The setup common to all CPU modules is done in cpu_setup_common 120 * routine. 121 */ 122 cpu_setup_common(NULL); 123 124 /* 125 * Rock's max nctxs is 64K. Set it accordingly. 126 */ 127 nctxs = MAX_NCTXS; 128 129 /* 130 * Rock I$ is non-coherent. 131 */ 132 mach_setup_icache(0); 133 134 #ifdef DEBUG 135 /* 136 * These should always be present on Rock 137 */ 138 if (cpu_hwcap_flags == 0) 139 cmn_err(CE_WARN, "hwcap-list missing from MD"); 140 #endif 141 cpu_hwcap_flags |= AV_SPARC_ASI_CACHE_SPARING; 142 143 cache |= (CACHE_PTAG | CACHE_IOCOHERENT); 144 145 if (use_page_coloring) { 146 do_pg_coloring = 1; 147 } 148 149 /* 150 * Rock generates hpriv performance event trap instead of pic overflow 151 * trap. To get the attention of the guest hv in-turn generates pic 152 * overflow trap. Therefore enable support for that. 153 */ 154 cpc_has_overflow_intr = 1; 155 156 /* 157 * Enable 4M pages for OOB. 158 */ 159 max_uheap_lpsize = MMU_PAGESIZE4M; 160 max_ustack_lpsize = MMU_PAGESIZE4M; 161 max_privmap_lpsize = MMU_PAGESIZE4M; 162 163 /* 164 * hv_tm_enable is a part of TM group. We need to 165 * negotiate that API group before we can use it. 166 */ 167 status = hsvc_register(&rock_tm_hsvc, &sup_minor); 168 if ((status != 0) || (sup_minor < (uint64_t)ROCK_HSVC_MINOR)) { 169 cmn_err(CE_WARN, "%s cannot negotiate hypervisor services: " 170 "major: 0x%lx minor: 0x%lx group: 0x%x errno: %d", 171 cpu_module_name, rock_tm_hsvc.hsvc_major, 172 rock_tm_hsvc.hsvc_minor, HSVC_GROUP_TM, status); 173 hsvc_tm_available = B_FALSE; 174 } 175 176 /* 177 * Negotiate API group for rock mmu extensions. 178 */ 179 status = hsvc_register(&rock_mmu_ext_hsvc, &sup_minor); 180 if ((status != 0) || (sup_minor < 181 (uint64_t)ROCK_HSVC_MINOR)) { 182 cmn_err(CE_WARN, "%s cannot negotiate hypervisor services: " 183 "major: 0x%lx minor: 0x%lx group: 0x%x errno: %d", 184 cpu_module_name, rock_mmu_ext_hsvc.hsvc_major, 185 rock_mmu_ext_hsvc.hsvc_minor, HSVC_GROUP_RKMMU_EXT, 186 status); 187 hsvc_mmu_ext_available = B_FALSE; 188 } 189 } 190 191 /* 192 * Set the magic constants of the implementation. 193 */ 194 void 195 cpu_fiximp(struct cpu_node *cpunode) 196 { 197 /* 198 * The Cache node is optional in MD. Therefore in case it 199 * does not exist, use hardcoded values. 200 */ 201 #ifdef DEBUG 202 /* 203 * ...that said, we do want this info to come from the MD. 204 */ 205 if (cpunode->ecache_size == 0 || cpunode->ecache_linesize == 0 || 206 cpunode->ecache_associativity == 0) { 207 cmn_err(CE_WARN, "ecache info missing from MD"); 208 } 209 #endif 210 if (cpunode->ecache_size == 0) 211 cpunode->ecache_size = 2 * 1024 * 1024; 212 if (cpunode->ecache_linesize == 0) 213 cpunode->ecache_linesize = 64; 214 if (cpunode->ecache_associativity == 0) 215 cpunode->ecache_associativity = 8; 216 } 217 218 void 219 dtrace_flush_sec(uintptr_t addr) 220 { 221 pfn_t pfn; 222 proc_t *procp = ttoproc(curthread); 223 page_t *pp; 224 caddr_t va; 225 226 pfn = hat_getpfnum(procp->p_as->a_hat, (void *)addr); 227 if (pfn != -1) { 228 ASSERT(pf_is_memory(pfn)); 229 pp = page_numtopp_noreclaim(pfn, SE_SHARED); 230 if (pp != NULL) { 231 va = ppmapin(pp, PROT_READ | PROT_WRITE, (void *)addr); 232 /* sparc needs 8-byte align */ 233 doflush((caddr_t)((uintptr_t)va & -8l)); 234 ppmapout(va); 235 page_unlock(pp); 236 } 237 } 238 } 239 240 void 241 cpu_map_exec_units(struct cpu *cp) 242 { 243 ASSERT(MUTEX_HELD(&cpu_lock)); 244 245 /* 246 * The cpu_ipipe and cpu_fpu fields are initialized based on 247 * the execution unit sharing information from the MD. They 248 * default to the CPU id in the absence of such information. 249 */ 250 cp->cpu_m.cpu_ipipe = cpunodes[cp->cpu_id].exec_unit_mapping; 251 if (cp->cpu_m.cpu_ipipe == NO_EU_MAPPING_FOUND) 252 cp->cpu_m.cpu_ipipe = (id_t)(cp->cpu_id); 253 254 cp->cpu_m.cpu_fpu = cpunodes[cp->cpu_id].fpu_mapping; 255 if (cp->cpu_m.cpu_fpu == NO_EU_MAPPING_FOUND) 256 cp->cpu_m.cpu_fpu = (id_t)(cp->cpu_id); 257 258 cp->cpu_m.cpu_core = (cp->cpu_id & MCOREID_MASK) >> MCOREID_SHIFT; 259 260 /* 261 * The cpu_chip field is initialized based on the information 262 * in the MD and assume that all cpus within a chip 263 * share the same L2 cache. If no such info is available, we 264 * set the cpu to CPU_CHIPID_INVALID. 265 */ 266 cp->cpu_m.cpu_mpipe = cpunodes[cp->cpu_id].l2_cache_mapping; 267 if (cp->cpu_m.cpu_mpipe == NO_L2_CACHE_MAPPING_FOUND) 268 cp->cpu_m.cpu_mpipe = CPU_L2_CACHEID_INVALID; 269 270 cp->cpu_m.cpu_chip = cpunodes[cp->cpu_id].l2_cache_mapping; 271 if (cp->cpu_m.cpu_chip == NO_L2_CACHE_MAPPING_FOUND) 272 cp->cpu_m.cpu_chip = CPU_CHIPID_INVALID; 273 } 274 275 void 276 cpu_init_private(struct cpu *cp) 277 { 278 cpu_map_exec_units(cp); 279 mutex_delay = rock_mutex_delay; 280 } 281 282 /*ARGSUSED*/ 283 void 284 cpu_uninit_private(struct cpu *cp) 285 { 286 } 287 288 /* 289 * cpu_feature_init 290 * 291 * This function is called once per strand. 292 */ 293 void 294 cpu_feature_init(void) 295 { 296 static int set_mutex_backoff_tunables = 0; 297 /* 298 * Set constants for mutex_backoff only once. 299 * On Rock, setting this to 8 gives the best performance, 300 * even for multi-chip systems. 301 */ 302 if (! set_mutex_backoff_tunables) { 303 mutex_backoff_base = 1; 304 mutex_cap_factor = 8; 305 set_mutex_backoff_tunables = 1; 306 } 307 308 /* 309 * Enable or disable for each cpu if hypervisor API is negotiated. 310 */ 311 if (hsvc_tm_available == B_TRUE) 312 (void) hv_tm_enable((uint64_t)enable_tm); 313 } 314 315 /* 316 * Flush specified address range from I$ via hv_mem_iflush interface 317 * Note that the hypervisor interface expects physical address range 318 * and can flush less than the requested size. 319 */ 320 321 void 322 rock_sync_icache(caddr_t addr, size_t size) 323 { 324 uint64_t pa, i, flushlen, flushed; 325 326 if (!force_sync_icache_after_bcopy) 327 /* 328 * Do not clear the I-cache after bcopy. 329 * The default value is 0. This flag made be 330 * set via /etc/system. 331 */ 332 return; 333 334 if (!tba_taken_over) 335 /* 336 * Very early in boot, va_to_pa() will try to call back 337 * into OBP. Very *very* early in boot, this will fail 338 * because we haven't set up the OBP callback handler. 339 * (Without this check, kmdb boot will fail.) 340 */ 341 return; 342 343 for (i = 0; i < size; i += flushed) { 344 pa = va_to_pa(addr + i); 345 ASSERT(pa != -1); 346 347 /* 348 * Only flush the required length up to a PAGESIZE. 349 */ 350 351 flushlen = MIN((size - i), (PAGESIZE - (pa & MMU_PAGEOFFSET))); 352 353 /* 354 * Flush I$ up to the page bounday. This call should never 355 * fail. If it does, we panic the system as I$ may contain 356 * stale instructions, which can result in silent data 357 * corruption. 358 */ 359 360 if (hv_mem_iflush(pa, flushlen, &flushed) != H_EOK) { 361 cmn_err(CE_PANIC, "Flushing the Icache failed"); 362 } 363 364 } 365 } 366 367 /* 368 * There are no Hypervisor trapstat(1m) interfaces for Rock 369 * If trapstat(1m) wants to do its thing, it will have to 370 * take over all TLB miss handling. 371 */ 372 int 373 cpu_trapstat_conf(int cmd) 374 { 375 int status; 376 377 switch (cmd) { 378 case CPU_TSTATCONF_INIT: 379 case CPU_TSTATCONF_FINI: 380 case CPU_TSTATCONF_ENABLE: 381 case CPU_TSTATCONF_DISABLE: 382 status = ENOTSUP; 383 break; 384 default: 385 status = EINVAL; 386 break; 387 } 388 return (status); 389 } 390 391 /*ARGSUSED*/ 392 void 393 cpu_trapstat_data(void *buf, uint_t tstat_pgszs) 394 { 395 } 396 397 #define MAX_PAGE_COLORS (1 << MAX_PAGE_COLORS_SHIFT) 398 #define MAX_PAGE_COLORS_SHIFT (5) 399 400 /*ARGSUSED*/ 401 uint_t 402 page_pfn_2_color_cpu(pfn_t pfn, uchar_t szc, void *cookie) 403 { 404 uint_t color; 405 406 pfn = PFN_BASE(pfn, szc); 407 color = pfn ^ (pfn >> 20); 408 color = color ^ (color >> 10); 409 return ((color ^ (color >> 5)) & 0x1f); 410 } 411 412 /* 413 * this macro rotates value "x" n steps to the right 414 * mask consists of "n + m" bits 415 * ASSERT(x < (1 << (n + m)); 416 */ 417 #define ROTATE_BITS(x, n, m) (((x) >> (n)) | (((x) & ((1 << (n)) - 1)) << m)) 418 419 420 uchar_t clr2sqnclr_table[MMU_PAGE_SIZES][MAX_PAGE_COLORS]; 421 422 /* 423 * on Rock, the hash cache index is calculated as follows: 424 * pa[47:43]^pa[42:38]^pa[37:33]^pa[32:28]^ 425 * pa[27:23]^pa[22:18]^pa[17:13].pa[12:6] 426 * That is, every 5 bits is folded and XORd together. Page sizes 427 * differ by 3 bits, which is a factor of 8. This function computes 428 * the next sequential color by rotating by 3 steps within a field of 5 bits 429 * for every page size. 430 */ 431 void 432 clr2sqnclr_table_init() 433 { 434 uchar_t szc; 435 uint_t color; 436 uint_t rot = 0; 437 438 for (szc = 0; szc < MMU_PAGE_SIZES; szc++) { 439 rot = (szc * 3) % MAX_PAGE_COLORS_SHIFT; 440 for (color = 0; color < MAX_PAGE_COLORS; color++) { 441 clr2sqnclr_table[szc][color] = 442 ROTATE_BITS(color, rot, 443 (MAX_PAGE_COLORS_SHIFT - rot)); 444 } 445 } 446 } 447 448 uint_t 449 clr2sqnclr(uchar_t szc, uint_t color) 450 { 451 ASSERT(szc < MMU_PAGE_SIZES); 452 ASSERT(color < MAX_PAGE_COLORS); 453 454 return (clr2sqnclr_table[szc][color]); 455 } 456 457 #if MMU_PAGE_SIZES > 8 458 #error MMU_PAGE_SIZES can be at most 8 459 #endif 460 461 uint_t 462 page_get_nsz_color_mask_cpu(uchar_t szc, uint_t mask) 463 { 464 static uint_t rock_color_masks[7] = {0x18, 6, 0x11, 0xc, 3, 0x18, 6}; 465 466 ASSERT(szc < MMU_PAGE_SIZES - 1); 467 return (mask & rock_color_masks[szc]); 468 } 469 470 /*ARGSUSED*/ 471 uint_t 472 page_get_nsz_color_cpu(uchar_t szc, uint_t color) 473 { 474 return (color); 475 } 476 477 uint_t 478 page_get_color_shift_cpu(uchar_t szc, uchar_t nszc) 479 { 480 ASSERT(nszc >= szc); 481 return (0); 482 } 483 484 /*ARGSUSED*/ 485 pfn_t 486 page_next_pfn_for_color_cpu(pfn_t pfn, uchar_t szc, uint_t color, 487 uint_t ceq_mask, uint_t color_mask, void *cookie) 488 { 489 uint_t sqn_ceq_mask = clr2sqnclr(szc, ceq_mask); 490 uint_t sqn_color = clr2sqnclr(szc, color); 491 uint_t pfn_shift = PNUM_SHIFT(szc); 492 pfn_t cpfn, npfn, base_pfn = pfn & (~(pfn_t)color_mask << pfn_shift); 493 uint_t base_sqn_color, nsqn_color, wrap = 0; 494 495 ASSERT((color & ~ceq_mask) == 0); 496 497 base_sqn_color = clr2sqnclr(szc, 498 page_pfn_2_color_cpu(base_pfn, szc, NULL)) ^ sqn_color; 499 nsqn_color = base_sqn_color; 500 501 cpfn = (pfn_t)-1L; 502 do { 503 npfn = base_pfn | (nsqn_color << pfn_shift); 504 505 ASSERT(((page_pfn_2_color_cpu(npfn, szc, NULL) ^ color) & 506 ceq_mask) == 0); 507 508 if (npfn > pfn && npfn < cpfn) 509 cpfn = npfn; 510 511 nsqn_color = INC_MASKED(nsqn_color, sqn_ceq_mask, color_mask); 512 if (nsqn_color != base_sqn_color) 513 continue; 514 515 if (cpfn != (pfn_t)-1L) 516 break; 517 518 base_pfn += ((pfn_t)color_mask + 1) << pfn_shift; 519 520 base_sqn_color = clr2sqnclr(szc, 521 page_pfn_2_color_cpu(base_pfn, szc, NULL)) ^ sqn_color; 522 nsqn_color = base_sqn_color; 523 wrap++; 524 525 } while (nsqn_color != base_sqn_color || wrap < 2); 526 527 ASSERT(cpfn != (pfn_t)-1L); 528 529 return (cpfn); 530 } 531 532 void 533 page_coloring_init_cpu() 534 { 535 int i; 536 uint_t colors = 1 << MAX_PAGE_COLORS_SHIFT; 537 538 for (i = 0; i < mmu_page_sizes; i++) { 539 hw_page_array[i].hp_colors = colors; 540 } 541 542 /* 543 * initialise conversion table between page colors and 544 * sequential colors 545 */ 546 clr2sqnclr_table_init(); 547 548 } 549 550 /* 551 * group colorequiv colors on Rock by low order bits of the color first 552 */ 553 void 554 page_set_colorequiv_arr_cpu(void) 555 { 556 static uint_t nequiv_shades_log2[MMU_PAGE_SIZES] = {0, 3, 0, 0, 0, 0}; 557 558 if (colorequiv > 1) { 559 int i; 560 uint_t sv_a = lowbit(colorequiv) - 1; 561 562 if (sv_a > 15) 563 sv_a = 15; 564 565 for (i = 0; i < MMU_PAGE_SIZES; i++) { 566 uint_t colors; 567 uint_t a = sv_a; 568 569 if ((colors = hw_page_array[i].hp_colors) <= 1) 570 continue; 571 while ((colors >> a) == 0) 572 a--; 573 if (a > (colorequivszc[i] & 0xf) + 574 (colorequivszc[i] >> 4)) { 575 if (a <= nequiv_shades_log2[i]) { 576 colorequivszc[i] = (uchar_t)a; 577 } else { 578 colorequivszc[i] = 579 ((a - nequiv_shades_log2[i]) << 4) | 580 nequiv_shades_log2[i]; 581 } 582 } 583 } 584 } 585 } 586 587 /* 588 * Calculate the page sizes needed to program Rock TLB page size register. 589 * The invctx parameter is a flag which indicates that it will be necessary to 590 * synchronize by invalidating contexts if the sfmmu pagesize register is 591 * updated. 592 */ 593 void 594 mmu_set_pgsz_order(sfmmu_t *sfmmup, int invctx) 595 { 596 uchar_t private_pgsz_mask; 597 uchar_t shared_pgsz_mask; 598 uint16_t pgsz_order_hv[MAX_PGSZ_SEARCH_ORDER]; 599 uint64_t pgsz_order = 0; 600 uchar_t pgsz_map = 0; 601 int private_pgsz_num = 0; 602 int shared_pgsz_num = 0; 603 int tot_pgsz_num; 604 sf_scd_t *scdp; 605 int ret; 606 int i; 607 608 /* 609 * The hatlock must be held in all cases except when the sfmmu is 610 * being initialized by hat_alloc() or we are calling hat_dup(), in 611 * these cases no other thread will be using the sfmmu yet. 612 */ 613 614 ASSERT(!invctx || sfmmu_hat_lock_held(sfmmup)); 615 616 if (pgsz_search_on == 0) 617 return; 618 619 /* Always enable 8K private mappings */ 620 private_pgsz_mask = 1 << TTE8K; 621 622 /* Enable 64K private mappings unless specifically disabled */ 623 if (!(disable_large_pages & (1 << TTE64K))) { 624 private_pgsz_mask |= 1 << TTE64K; 625 } 626 627 /* 628 * First check for ISM segments not in an SCD. The algorithm for 629 * creating an SCD is to create one when an (D)ISM segment is attached 630 * unless the process's shared segments are a subset of an SCD which 631 * already exists. 632 * 633 * This situation also arises when we attach to more than the maximum 634 * number of (D)ISM segments defined in the region bit map 635 * (currently 64). 636 * 637 * We have set mmu_disable_ism_large_pages to force ISM segments to use 638 * only 4M and 256M pages. 639 */ 640 if (SFMMU_FLAGS_ISSET(sfmmup, HAT_ISMNOTINSCD)) { 641 private_pgsz_mask |= 1 << TTE4M; 642 if (SFMMU_FLAGS_ISSET(sfmmup, HAT_256M_ISM)) { 643 private_pgsz_mask |= 1 << TTE256M; 644 } 645 } 646 647 /* Now check for regions not included in the SCD. */ 648 if ((scdp = sfmmup->sfmmu_scdp) != NULL) { 649 SF_RGNMAP_EQUAL(&scdp->scd_hmeregion_map, 650 &sfmmup->sfmmu_hmeregion_map, 651 SFMMU_HMERGNMAP_WORDS, ret); 652 if (!ret) { 653 private_pgsz_mask |= sfmmup->sfmmu_rtteflags; 654 } 655 } else { 656 private_pgsz_mask |= sfmmup->sfmmu_rtteflags; 657 } 658 659 private_pgsz_mask |= sfmmup->sfmmu_tteflags; 660 661 /* 662 * If the process is part of an SCD then enable 4M and 256M shared 663 * page sizes - unless these are specifically disabled. If the 4M 664 * shared page size is specifically disabled and the process has (D)ISM 665 * segments attached or 4M regions then enable the private 4M page size. 666 * If the 256M shared page size is disabled and the process has a 256M 667 * page size region then enable the 256M private page size. The trap 668 * handler looks at the shared page sizes enabled and if a shared 669 * mapping does not correspond to one these sizes then it is treated 670 * as a private mapping. 671 * 672 * The SCD includes the process's main text segment and (D)ISM segments 673 * but we only enable the 4M shared page size so an 8K main text 674 * segment will be treated as private due to the trap handler support. 675 * 676 * Note that for simplicity the ordering of the shared page sizes is 677 * hard coded. 678 */ 679 shared_pgsz_mask = 0; 680 if (sfmmup->sfmmu_scdp != NULL) { 681 if (!(disable_shctx_large_pages & (1 << TTE4M))) { 682 shared_pgsz_mask |= 1 << TTE4M; 683 } else if (sfmmup->sfmmu_iblk != NULL || 684 (sfmmup->sfmmu_rtteflags & 685 (1 << TTE4M))) { 686 private_pgsz_mask |= 1 << TTE4M; 687 } 688 689 if (SFMMU_FLAGS_ISSET(sfmmup, HAT_256M_ISM) || 690 (sfmmup->sfmmu_rtteflags & (1 << TTE256M))) { 691 if (!(disable_shctx_large_pages & (1 << TTE256M))) { 692 shared_pgsz_mask |= 1 << TTE256M; 693 } else { 694 private_pgsz_mask |= 1 << TTE256M; 695 } 696 } 697 } 698 699 set_pgsz_order(private_pgsz_mask, shared_pgsz_mask, &pgsz_order, 700 &private_pgsz_num, &shared_pgsz_num, sfmmup); 701 702 encode_pgsz_order(pgsz_order, private_pgsz_num, shared_pgsz_num, 703 pgsz_order_hv, &pgsz_map); 704 705 tot_pgsz_num = private_pgsz_num + shared_pgsz_num; 706 ASSERT(tot_pgsz_num <= MAX_PGSZ_SEARCH_ORDER); 707 708 for (i = 0; i < tot_pgsz_num; i++) { 709 if (pgsz_order_hv[i] != sfmmup->sfmmu_pgsz_order_hv[i]) 710 break; 711 } 712 713 /* 714 * If either we've reached the maximum number of page sizes or the 715 * next element is 0, indicating the end of the list, then both the 716 * entries and their number in both arrays is the same and we return. 717 */ 718 if ((i == tot_pgsz_num) && (i == MAX_PGSZ_SEARCH_ORDER || 719 sfmmup->sfmmu_pgsz_order_hv[i] == 0)) { 720 ASSERT(pgsz_map == sfmmup->sfmmu_pgsz_map); 721 return; 722 } 723 724 /* Otherwise update the sw page size register setting */ 725 if (invctx) { 726 sfmmu_invalidate_ctx(sfmmup); 727 } 728 729 for (i = 0; i < tot_pgsz_num; i++) { 730 sfmmup->sfmmu_pgsz_order_hv[i] = pgsz_order_hv[i]; 731 } 732 733 /* Disable next entry in search list to mark the end */ 734 if (i < MAX_PGSZ_SEARCH_ORDER) { 735 sfmmup->sfmmu_pgsz_order_hv[i] = 0; 736 } 737 sfmmup->sfmmu_pgsz_map = pgsz_map; 738 } 739 740 /* 741 * Encode the Rock TLB page size register. 742 * 743 * Input: 744 * pgsz_order, ordered list of page sizes, private and shared, the order 745 * between these depends on the pgsz_order_shared_first config variable. 746 * private_pgsz_num, number of private page sizes. 747 * shared_pgsz_num, number of shared page sizes. 748 * Output: 749 * pgsz_order_hv contains the encoded pagesize search order for the hv 750 * pgsz_map field contains the page size bit map used by the trap 751 * handler to prevent unauthorized shared page sizes being used. 752 */ 753 754 static void 755 encode_pgsz_order(uint64_t pgsz_order, int private_pgsz_num, 756 int shared_pgsz_num, uint16_t *pgsz_order_hv, uchar_t *pgsz_map) 757 { 758 int i; 759 int tot_pgsz_num; 760 uint16_t pgsz_entry; 761 uint16_t first_entry_mask, second_entry_mask; 762 int first_pgsz_num; 763 764 ASSERT(private_pgsz_num < MMU_PAGE_SIZES); 765 ASSERT(shared_pgsz_num < MMU_PAGE_SIZES); 766 ASSERT(private_pgsz_num > 0); 767 768 if (pgsz_order_shared_first) { 769 first_entry_mask = TLB_PGSZ_CONTEXT1_ENABLE; 770 second_entry_mask = TLB_PGSZ_ENABLE; 771 first_pgsz_num = shared_pgsz_num; 772 } else { 773 first_entry_mask = TLB_PGSZ_ENABLE; 774 second_entry_mask = TLB_PGSZ_CONTEXT1_ENABLE; 775 first_pgsz_num = private_pgsz_num; 776 } 777 778 tot_pgsz_num = private_pgsz_num + shared_pgsz_num; 779 for (i = 0; i < tot_pgsz_num; i++) { 780 pgsz_entry = pgsz_order & TTE_SZ_BITS; 781 if (i < first_pgsz_num) { 782 if (pgsz_order_shared_first) { 783 *pgsz_map |= (1 << pgsz_entry); 784 } 785 pgsz_entry |= first_entry_mask; 786 } else { 787 if (!pgsz_order_shared_first) { 788 *pgsz_map |= (1 << pgsz_entry); 789 } 790 pgsz_entry |= second_entry_mask; 791 } 792 pgsz_order >>= 4; 793 pgsz_order_hv[i] = pgsz_entry; 794 } 795 } 796 797 /* 798 * The function returns the mmu-specific values for the 799 * hat's disable_large_pages, disable_ism_large_pages, and 800 * disable_auto_data_large_pages and 801 * disable_text_data_large_pages variables. 802 */ 803 uint_t 804 mmu_large_pages_disabled(uint_t flag) 805 { 806 uint_t pages_disable = 0; 807 808 if (flag == HAT_LOAD) { 809 pages_disable = mmu_disable_large_pages; 810 } else if (flag == HAT_LOAD_SHARE) { 811 pages_disable = mmu_disable_ism_large_pages; 812 } else if (flag == HAT_AUTO_DATA) { 813 pages_disable = mmu_disable_auto_data_large_pages; 814 } else if (flag == HAT_AUTO_TEXT) { 815 pages_disable = mmu_disable_auto_text_large_pages; 816 } 817 return (pages_disable); 818 } 819 820 /* 821 * Uses private and shared page size bitmaps to produce an ordered list 822 * of page sizes and counts to be passed to encode_pgsz_order(). 823 * 824 * Input: 825 * private_pgsz_mask, bit map of private page sizes. 826 * shared_pgsz_mask, bit map of private page sizes. 827 * sfmmup, pointer to hat structure. 828 * 829 * Output: 830 * pgsz_order, ordered list of page sizes. 831 * private_pgsz_num, number of private page sizes in pgsz_order. 832 * shared_pgsz_num, number of shared page sizes in pgsz_order. 833 */ 834 static void 835 set_pgsz_order(uchar_t private_pgsz_mask, uchar_t shared_pgsz_mask, 836 uint64_t *pgsz_order, int *private_pgsz_num, int *shared_pgsz_num, 837 sfmmu_t *sfmmup) 838 { 839 int64_t sortcnt[MMU_PAGE_SIZES]; 840 int8_t tmp_pgsz[MMU_PAGE_SIZES]; 841 ulong_t tmp; 842 uint8_t i, j, max; 843 844 *private_pgsz_num = 0; 845 *shared_pgsz_num = 0; 846 *pgsz_order = 0; 847 848 /* Sort pages by area mapped */ 849 for (i = 0; i < mmu_page_sizes; i++) { 850 tmp = sfmmup->sfmmu_ttecnt[i] + sfmmup->sfmmu_ismttecnt[i]; 851 sortcnt[i] = tmp << TTE_PAGE_SHIFT(i); 852 } 853 854 for (j = 0; j < mmu_page_sizes; j++) { 855 for (i = mmu_page_sizes - 1, max = 0; i > 0; i--) { 856 if (sortcnt[i] > sortcnt[max]) 857 max = i; 858 } 859 tmp_pgsz[j] = max; 860 sortcnt[max] = -1; 861 } 862 863 /* Add shared page sizes to page order if these come first */ 864 if (pgsz_order_shared_first) { 865 if (shared_pgsz_mask & (1 << TTE256M)) { 866 *pgsz_order = TTE256M; 867 (*shared_pgsz_num)++; 868 } 869 if (shared_pgsz_mask & (1 << TTE4M)) { 870 *pgsz_order |= (TTE4M << (*shared_pgsz_num * 4)); 871 (*shared_pgsz_num)++; 872 } 873 } 874 875 876 /* Add private page sizes to page order */ 877 for (i = 0; i < mmu_page_sizes; i++) { 878 if (private_pgsz_mask & (1 << tmp_pgsz[i])) { 879 *pgsz_order |= (tmp_pgsz[i] << 880 ((*private_pgsz_num + *shared_pgsz_num) * 4)); 881 (*private_pgsz_num)++; 882 } 883 } 884 885 /* Add shared page sizes to page order if these come last */ 886 if (!pgsz_order_shared_first) { 887 if (shared_pgsz_mask & (1 << TTE256M)) { 888 *pgsz_order |= (TTE256M << 889 ((*private_pgsz_num + *shared_pgsz_num) * 4)); 890 (*shared_pgsz_num)++; 891 } 892 if (shared_pgsz_mask & (1 << TTE4M)) { 893 *pgsz_order |= (TTE4M << 894 ((*private_pgsz_num + *shared_pgsz_num) * 4)); 895 (*shared_pgsz_num)++; 896 } 897 } 898 899 ASSERT(*pgsz_order); 900 ASSERT(*private_pgsz_num); 901 ASSERT((*private_pgsz_num + *shared_pgsz_num) 902 <= MAX_PGSZ_SEARCH_ORDER); 903 } 904 905 /* 906 * This routine is called without holding the hat lock to determine 907 * whether the process's optimal page size order has changed significantly 908 * since the page size register was last set. If it has changed we get the 909 * hat lock and call mmu_set_pgsz_order() to update the effective pagesize 910 * order. 911 */ 912 void 913 mmu_check_page_sizes(sfmmu_t *sfmmup, uint64_t *ttecnt) 914 { 915 int64_t sortcnt[MMU_PAGE_SIZES]; 916 int8_t tmp_pgsz[MMU_PAGE_SIZES]; 917 ulong_t tmp; 918 int8_t i, j, max; 919 uint_t pgsz; 920 uint16_t *pgsz_order_hv; 921 int page_order_changed; 922 hatlock_t *hatlockp; 923 int pgsz_count = 0; 924 925 ASSERT(!sfmmu_hat_lock_held(sfmmup)); 926 927 if (pgsz_search_on == 0) 928 return; 929 930 /* 931 * Check if ttecnt has changed significantly, since the last time we 932 * were called. If the shared page sizes have changed then this is 933 * handled by mmu_set_pgsz_order() being called directly when we join 934 * the SCD. 935 */ 936 for (i = 0; i < mmu_page_sizes; i++) { 937 if (ttecnt[i] > (sfmmup->sfmmu_mmuttecnt[i] << 1) || 938 ttecnt[i] < (sfmmup->sfmmu_mmuttecnt[i] >> 1)) 939 break; 940 } 941 942 if (i == mmu_page_sizes) { 943 return; 944 } 945 946 /* Sort pages by area mapped */ 947 for (i = 0; i < mmu_page_sizes; i++) { 948 tmp = ttecnt[i]; 949 sortcnt[i] = tmp << TTE_PAGE_SHIFT(i); 950 } 951 952 for (j = 0; j < mmu_page_sizes; j++) { 953 for (i = mmu_page_sizes - 1, max = 0; i > 0; i--) { 954 if (sortcnt[i] > sortcnt[max]) 955 max = i; 956 } 957 tmp_pgsz[j] = max; 958 sortcnt[max] = -1; 959 } 960 961 /* 962 * Check if the order of the private page sizes has changed. We call 963 * mmu_set_pgsz_order() directly if additional page sizes are used, 964 * so we can assume that the number of entries is unchanged. 965 */ 966 pgsz_order_hv = sfmmup->sfmmu_pgsz_order_hv; 967 if (pgsz_order_shared_first) { 968 /* skip over shared pgsz entries */ 969 while ((pgsz_order_hv[pgsz_count] & TLB_PGSZ_CONTEXT1_ENABLE) == 970 TLB_PGSZ_CONTEXT1_ENABLE) { 971 pgsz_count++; 972 } 973 } 974 975 i = 0; 976 page_order_changed = 0; 977 while ((pgsz_order_hv[pgsz_count] & TLB_PGSZ_ENABLE) && 978 !(pgsz_order_hv[pgsz_count] & TLB_PGSZ_CONTEXT1) && 979 (pgsz_count < MAX_PGSZ_SEARCH_ORDER)) { 980 pgsz = (pgsz_order_hv[pgsz_count] & TTE_SZ_BITS); 981 ASSERT(pgsz < MMU_PAGE_SIZES); 982 983 if (pgsz != tmp_pgsz[i]) { 984 page_order_changed = 1; 985 break; 986 } 987 pgsz_count++; 988 i++; 989 } 990 991 if (page_order_changed) { 992 hatlockp = sfmmu_hat_enter(sfmmup); 993 /* Save old values of ttecnt */ 994 for (i = 0; i < mmu_page_sizes; i++) { 995 sfmmup->sfmmu_mmuttecnt[i] = ttecnt[i]; 996 } 997 mmu_set_pgsz_order(sfmmup, 1); 998 sfmmu_hat_exit(hatlockp); 999 } 1000 } 1001 1002 /* 1003 * If the mmu extension API is supported and pgsz_search_on is set, 1004 * patch out the instruction to branch over the hypervisor call in 1005 * sfmmu_load_mmustate(). 1006 */ 1007 void 1008 mmu_enable_pgsz_search() 1009 { 1010 if ((hsvc_mmu_ext_available == B_TRUE) && pgsz_search_on) { 1011 /* patch in hcall to set pgsz order */ 1012 sfmmu_patch_pgsz_reg(); 1013 } 1014 } 1015