Home | History | Annotate | Download | only in vm
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #ifndef	_VM_HTABLE_H
     27 #define	_VM_HTABLE_H
     28 
     29 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     30 
     31 #ifdef	__cplusplus
     32 extern "C" {
     33 #endif
     34 
     35 #if defined(__GNUC__) && defined(_ASM_INLINES) && defined(_KERNEL)
     36 #include <asm/htable.h>
     37 #endif
     38 
     39 extern void atomic_andb(uint8_t *addr, uint8_t value);
     40 extern void atomic_orb(uint8_t *addr, uint8_t value);
     41 extern void atomic_inc16(uint16_t *addr);
     42 extern void atomic_dec16(uint16_t *addr);
     43 extern void mmu_tlbflush_entry(caddr_t addr);
     44 
     45 /*
     46  * Each hardware page table has an htable_t describing it.
     47  *
     48  * We use a reference counter mechanism to detect when we can free an htable.
     49  * In the implmentation the reference count is split into 2 separate counters:
     50  *
     51  *	ht_busy is a traditional reference count of uses of the htable pointer
     52  *
     53  *	ht_valid_cnt is a count of how references are implied by valid PTE/PTP
     54  *	         entries in the pagetable
     55  *
     56  * ht_busy is only incremented by htable_lookup() or htable_create()
     57  * while holding the appropriate hash_table mutex. While installing a new
     58  * valid PTE or PTP, in order to increment ht_valid_cnt a thread must have
     59  * done an htable_lookup() or htable_create() but not the htable_release yet.
     60  *
     61  * htable_release(), while holding the mutex, can know that if
     62  * busy == 1 and valid_cnt == 0, the htable can be free'd.
     63  *
     64  * The fields have been ordered to make htable_lookup() fast. Hence,
     65  * ht_hat, ht_vaddr, ht_level and ht_next need to be clustered together.
     66  */
     67 struct htable {
     68 	struct htable	*ht_next;	/* forward link for hash table */
     69 	struct hat	*ht_hat;	/* hat this mapping comes from */
     70 	uintptr_t	ht_vaddr;	/* virt addr at start of this table */
     71 	int8_t		ht_level;	/* page table level: 0=4K, 1=2M, ... */
     72 	uint8_t		ht_flags;	/* see below */
     73 	int16_t		ht_busy;	/* implements locking protocol */
     74 	int16_t		ht_valid_cnt;	/* # of valid entries in this table */
     75 	uint32_t	ht_lock_cnt;	/* # of locked entries in this table */
     76 					/* never used for kernel hat */
     77 	pfn_t		ht_pfn;		/* pfn of page of the pagetable */
     78 	struct htable	*ht_prev;	/* backward link for hash table */
     79 	struct htable	*ht_parent;	/* htable that points to this htable */
     80 	struct htable	*ht_shares;	/* for HTABLE_SHARED_PFN only */
     81 };
     82 typedef struct htable htable_t;
     83 
     84 /*
     85  * Flags values for htable ht_flags field:
     86  *
     87  * HTABLE_VLP - this is the top level htable of a VLP HAT.
     88  *
     89  * HTABLE_SHARED_PFN - this htable had its PFN assigned from sharing another
     90  * 	htable. Used by hat_share() for ISM.
     91  */
     92 #define	HTABLE_VLP		(0x01)
     93 #define	HTABLE_SHARED_PFN	(0x02)
     94 
     95 /*
     96  * The htable hash table hashing function.  The 28 is so that high
     97  * order bits are include in the hash index to skew the wrap
     98  * around of addresses. Even though the hash buckets are stored per
     99  * hat we include the value of hat pointer in the hash function so
    100  * that the secondary hash for the htable mutex winds up begin different in
    101  * every address space.
    102  */
    103 #define	HTABLE_HASH(hat, va, lvl)					\
    104 	((((va) >> LEVEL_SHIFT(1)) + ((va) >> 28) + (lvl) +		\
    105 	((uintptr_t)(hat) >> 4)) & ((hat)->hat_num_hash - 1))
    106 
    107 /*
    108  * Each CPU gets a unique hat_cpu_info structure in cpu_hat_info.
    109  */
    110 struct hat_cpu_info {
    111 	kmutex_t hci_mutex;		/* mutex to ensure sequential usage */
    112 #if defined(__amd64)
    113 	pfn_t	hci_vlp_pfn;		/* pfn of hci_vlp_l3ptes */
    114 	x86pte_t *hci_vlp_l3ptes;	/* VLP Level==3 pagetable (top) */
    115 	x86pte_t *hci_vlp_l2ptes;	/* VLP Level==2 pagetable */
    116 #endif	/* __amd64 */
    117 };
    118 
    119 
    120 /*
    121  * Compute the last page aligned VA mapped by an htable.
    122  *
    123  * Given a va and a level, compute the virtual address of the start of the
    124  * next page at that level.
    125  *
    126  * XX64 - The check for the VA hole needs to be better generalized.
    127  */
    128 #if defined(__amd64)
    129 #define	HTABLE_NUM_PTES(ht)	(((ht)->ht_flags & HTABLE_VLP) ? 4 : 512)
    130 
    131 #define	HTABLE_LAST_PAGE(ht)						\
    132 	((ht)->ht_level == mmu.max_level ? ((uintptr_t)0UL - MMU_PAGESIZE) :\
    133 	((ht)->ht_vaddr - MMU_PAGESIZE +				\
    134 	((uintptr_t)HTABLE_NUM_PTES(ht) << LEVEL_SHIFT((ht)->ht_level))))
    135 
    136 #define	NEXT_ENTRY_VA(va, l)	\
    137 	((va & LEVEL_MASK(l)) + LEVEL_SIZE(l) == mmu.hole_start ?	\
    138 	mmu.hole_end : (va & LEVEL_MASK(l)) + LEVEL_SIZE(l))
    139 
    140 #elif defined(__i386)
    141 
    142 #define	HTABLE_NUM_PTES(ht)	\
    143 	(!mmu.pae_hat ? 1024 : ((ht)->ht_level == 2 ? 4 : 512))
    144 
    145 #define	HTABLE_LAST_PAGE(ht)	((ht)->ht_vaddr - MMU_PAGESIZE + \
    146 	((uintptr_t)HTABLE_NUM_PTES(ht) << LEVEL_SHIFT((ht)->ht_level)))
    147 
    148 #define	NEXT_ENTRY_VA(va, l) ((va & LEVEL_MASK(l)) + LEVEL_SIZE(l))
    149 
    150 #endif
    151 
    152 #if defined(_KERNEL)
    153 
    154 /*
    155  * initialization function called from hat_init()
    156  */
    157 extern void htable_init(void);
    158 
    159 /*
    160  * Functions to lookup, or "lookup and create", the htable corresponding
    161  * to the virtual address "vaddr"  in the "hat" at the given "level" of
    162  * page tables. htable_lookup() may return NULL if no such entry exists.
    163  *
    164  * On return the given htable is marked busy (a shared lock) - this prevents
    165  * the htable from being stolen or freed) until htable_release() is called.
    166  *
    167  * If kalloc_flag is set on an htable_create() we can't call kmem allocation
    168  * routines for this htable, since it's for the kernel hat itself.
    169  *
    170  * htable_acquire() is used when an htable pointer has been extracted from
    171  * an hment and we need to get a reference to the htable.
    172  */
    173 extern htable_t *htable_lookup(struct hat *hat, uintptr_t vaddr, level_t level);
    174 extern htable_t *htable_create(struct hat *hat, uintptr_t vaddr, level_t level,
    175 	htable_t *shared);
    176 extern void htable_acquire(htable_t *);
    177 
    178 extern void htable_release(htable_t *ht);
    179 extern void htable_destroy(htable_t *ht);
    180 
    181 /*
    182  * Code to free all remaining htables for a hat. Called after the hat is no
    183  * longer in use by any thread.
    184  */
    185 extern void htable_purge_hat(struct hat *hat);
    186 
    187 /*
    188  * Find the htable, page table entry index, and PTE of the given virtual
    189  * address.  If not found returns NULL. When found, returns the htable_t *,
    190  * sets entry, and has a hold on the htable.
    191  */
    192 extern htable_t *htable_getpte(struct hat *, uintptr_t, uint_t *, x86pte_t *,
    193 	level_t);
    194 
    195 /*
    196  * Similar to hat_getpte(), except that this only succeeds if a valid
    197  * page mapping is present.
    198  */
    199 extern htable_t *htable_getpage(struct hat *hat, uintptr_t va, uint_t *entry);
    200 
    201 /*
    202  * Called to allocate initial/additional htables for reserve.
    203  */
    204 extern void htable_initial_reserve(uint_t);
    205 extern void htable_reserve(uint_t);
    206 
    207 /*
    208  * Used to readjust the htable reserve after the reserve list has been used.
    209  * Also called after boot to release left over boot reserves.
    210  */
    211 extern void htable_adjust_reserve(void);
    212 
    213 /*
    214  * return number of bytes mapped by all the htables in a given hat
    215  */
    216 extern size_t htable_mapped(struct hat *);
    217 
    218 
    219 /*
    220  * Attach initial pagetables as htables
    221  */
    222 extern void htable_attach(struct hat *, uintptr_t, level_t, struct htable *,
    223     pfn_t);
    224 
    225 /*
    226  * Routine to find the next populated htable at or above a given virtual
    227  * address. Can specify an upper limit, or HTABLE_WALK_TO_END to indicate
    228  * that it should search the entire address space.  Similar to
    229  * hat_getpte(), but used for walking through address ranges. It can be
    230  * used like this:
    231  *
    232  *	va = ...
    233  *	ht = NULL;
    234  *	while (va < end_va) {
    235  *		pte = htable_walk(hat, &ht, &va, end_va);
    236  *		if (!pte)
    237  *			break;
    238  *
    239  *		... code to operate on page at va ...
    240  *
    241  *		va += LEVEL_SIZE(ht->ht_level);
    242  *	}
    243  *	if (ht)
    244  *		htable_release(ht);
    245  *
    246  */
    247 extern x86pte_t htable_walk(struct hat *hat, htable_t **ht, uintptr_t *va,
    248 	uintptr_t eaddr);
    249 
    250 #define	HTABLE_WALK_TO_END ((uintptr_t)-1)
    251 
    252 /*
    253  * Utilities convert between virtual addresses and page table entry indeces.
    254  */
    255 extern uint_t htable_va2entry(uintptr_t va, htable_t *ht);
    256 extern uintptr_t htable_e2va(htable_t *ht, uint_t entry);
    257 
    258 /*
    259  * Interfaces that provide access to page table entries via the htable.
    260  *
    261  * Note that all accesses except x86pte_copy() and x86pte_zero() are atomic.
    262  */
    263 extern void	x86pte_cpu_init(cpu_t *);
    264 extern void	x86pte_cpu_fini(cpu_t *);
    265 
    266 extern x86pte_t	x86pte_get(htable_t *, uint_t entry);
    267 
    268 /*
    269  * x86pte_set returns LPAGE_ERROR if it's asked to overwrite a page table
    270  * link with a large page mapping.
    271  */
    272 #define	LPAGE_ERROR (-(x86pte_t)1)
    273 extern x86pte_t	x86pte_set(htable_t *, uint_t entry, x86pte_t new, void *);
    274 
    275 extern x86pte_t x86pte_inval(htable_t *ht, uint_t entry,
    276 	x86pte_t old, x86pte_t *ptr);
    277 
    278 extern x86pte_t x86pte_update(htable_t *ht, uint_t entry,
    279 	x86pte_t old, x86pte_t new);
    280 
    281 extern void	x86pte_copy(htable_t *src, htable_t *dest, uint_t entry,
    282 	uint_t cnt);
    283 
    284 /*
    285  * access to a pagetable knowing only the pfn
    286  */
    287 extern x86pte_t *x86pte_mapin(pfn_t, uint_t, htable_t *);
    288 extern void x86pte_mapout(void);
    289 
    290 /*
    291  * these are actually inlines for "lock; incw", "lock; decw", etc. instructions.
    292  */
    293 #define	HTABLE_INC(x)	atomic_inc16((uint16_t *)&x)
    294 #define	HTABLE_DEC(x)	atomic_dec16((uint16_t *)&x)
    295 #define	HTABLE_LOCK_INC(ht)	atomic_add_32(&(ht)->ht_lock_cnt, 1)
    296 #define	HTABLE_LOCK_DEC(ht)	atomic_add_32(&(ht)->ht_lock_cnt, -1)
    297 
    298 #ifdef __xpv
    299 extern void xen_flush_va(caddr_t va);
    300 extern void xen_gflush_va(caddr_t va, cpuset_t);
    301 extern void xen_flush_tlb(void);
    302 extern void xen_gflush_tlb(cpuset_t);
    303 extern void xen_pin(pfn_t, level_t);
    304 extern void xen_unpin(pfn_t);
    305 extern int xen_kpm_page(pfn_t, uint_t);
    306 
    307 /*
    308  * The hypervisor maps all page tables into our address space read-only.
    309  * Under normal circumstances, the hypervisor then handles all updates to
    310  * the page tables underneath the covers for us.  However, when we are
    311  * trying to dump core after a hypervisor panic, the hypervisor is no
    312  * longer available to do these updates.  To work around the protection
    313  * problem, we simply disable write-protect checking for the duration of a
    314  * pagetable update operation.
    315  */
    316 #define	XPV_ALLOW_PAGETABLE_UPDATES()					\
    317 	{								\
    318 		if (IN_XPV_PANIC())					\
    319 			setcr0((getcr0() & ~CR0_WP) & 0xffffffff); 	\
    320 	}
    321 #define	XPV_DISALLOW_PAGETABLE_UPDATES()				\
    322 	{								\
    323 		if (IN_XPV_PANIC() > 0)					\
    324 			setcr0((getcr0() | CR0_WP) & 0xffffffff);	\
    325 	}
    326 
    327 #else /* __xpv */
    328 
    329 #define	XPV_ALLOW_PAGETABLE_UPDATES()
    330 #define	XPV_DISALLOW_PAGETABLE_UPDATES()
    331 
    332 #endif
    333 
    334 #endif	/* _KERNEL */
    335 
    336 
    337 #ifdef	__cplusplus
    338 }
    339 #endif
    340 
    341 #endif	/* _VM_HTABLE_H */
    342