Home | History | Annotate | Download | only in os
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     27 
     28 #include <sys/types.h>
     29 #include <sys/sysmacros.h>
     30 #include <sys/systm.h>
     31 #include <sys/mman.h>
     32 #include <sys/buf.h>
     33 #include <sys/vmem.h>
     34 #include <sys/cmn_err.h>
     35 #include <sys/debug.h>
     36 #include <sys/machparam.h>
     37 #include <vm/page.h>
     38 #include <vm/seg_kmem.h>
     39 #include <vm/seg_kpm.h>
     40 
     41 #ifdef __sparc
     42 #include <sys/cpu_module.h>
     43 #define	BP_FLUSH(addr, size)	flush_instr_mem((void *)addr, size);
     44 #else
     45 #define	BP_FLUSH(addr, size)
     46 #endif
     47 
     48 int bp_force_copy = 0;
     49 typedef enum {
     50 	BP_COPYIN	= 0,
     51 	BP_COPYOUT	= 1
     52 } bp_copydir_t;
     53 static int bp_copy_common(bp_copydir_t dir, struct buf *bp, void *driverbuf,
     54     offset_t offset, size_t size);
     55 
     56 static vmem_t *bp_map_arena;
     57 static size_t bp_align;
     58 static uint_t bp_devload_flags = PROT_READ | PROT_WRITE | HAT_NOSYNC;
     59 int	bp_max_cache = 1 << 17;		/* 128K default; tunable */
     60 int	bp_mapin_kpm_enable = 1;	/* enable default; tunable */
     61 
     62 static void *
     63 bp_vmem_alloc(vmem_t *vmp, size_t size, int vmflag)
     64 {
     65 	return (vmem_xalloc(vmp, size, bp_align, 0, 0, NULL, NULL, vmflag));
     66 }
     67 
     68 void
     69 bp_init(size_t align, uint_t devload_flags)
     70 {
     71 	bp_align = MAX(align, PAGESIZE);
     72 	bp_devload_flags |= devload_flags;
     73 
     74 	if (bp_align <= bp_max_cache)
     75 		bp_map_arena = vmem_create("bp_map", NULL, 0, bp_align,
     76 		    bp_vmem_alloc, vmem_free, heap_arena,
     77 		    MIN(8 * bp_align, bp_max_cache), VM_SLEEP);
     78 }
     79 
     80 /*
     81  * common routine so can be called with/without VM_SLEEP
     82  */
     83 void *
     84 bp_mapin_common(struct buf *bp, int flag)
     85 {
     86 	struct as	*as;
     87 	pfn_t		pfnum;
     88 	page_t		*pp;
     89 	page_t		**pplist;
     90 	caddr_t		kaddr;
     91 	caddr_t		addr;
     92 	uintptr_t	off;
     93 	size_t		size;
     94 	pgcnt_t		npages;
     95 	int		color;
     96 
     97 	/* return if already mapped in, no pageio/physio, or physio to kas */
     98 	if ((bp->b_flags & B_REMAPPED) ||
     99 	    !(bp->b_flags & (B_PAGEIO | B_PHYS)) ||
    100 	    (((bp->b_flags & (B_PAGEIO | B_PHYS)) == B_PHYS) &&
    101 	    ((bp->b_proc == NULL) || (bp->b_proc->p_as == &kas))))
    102 		return (bp->b_un.b_addr);
    103 
    104 	ASSERT((bp->b_flags & (B_PAGEIO | B_PHYS)) != (B_PAGEIO | B_PHYS));
    105 
    106 	addr = (caddr_t)bp->b_un.b_addr;
    107 	off = (uintptr_t)addr & PAGEOFFSET;
    108 	size = P2ROUNDUP(bp->b_bcount + off, PAGESIZE);
    109 	npages = btop(size);
    110 
    111 	/* Fastpath single page IO to locked memory by using kpm. */
    112 	if ((bp->b_flags & (B_SHADOW | B_PAGEIO)) && (npages == 1) &&
    113 	    kpm_enable && bp_mapin_kpm_enable) {
    114 		if (bp->b_flags & B_SHADOW)
    115 			pp = *bp->b_shadow;
    116 		else
    117 			pp = bp->b_pages;
    118 		kaddr = hat_kpm_mapin(pp, NULL);
    119 		bp->b_un.b_addr = kaddr + off;
    120 		bp->b_flags |= B_REMAPPED;
    121 		return (bp->b_un.b_addr);
    122 	}
    123 
    124 	/*
    125 	 * Allocate kernel virtual space for remapping.
    126 	 */
    127 	color = bp_color(bp);
    128 	ASSERT(color < bp_align);
    129 
    130 	if (bp_map_arena != NULL) {
    131 		kaddr = (caddr_t)vmem_alloc(bp_map_arena,
    132 		    P2ROUNDUP(color + size, bp_align), flag);
    133 		if (kaddr == NULL)
    134 			return (NULL);
    135 		kaddr += color;
    136 	} else {
    137 		kaddr = vmem_xalloc(heap_arena, size, bp_align, color,
    138 		    0, NULL, NULL, flag);
    139 		if (kaddr == NULL)
    140 			return (NULL);
    141 	}
    142 
    143 	ASSERT(P2PHASE((uintptr_t)kaddr, bp_align) == color);
    144 
    145 	/*
    146 	 * Map bp into the virtual space we just allocated.
    147 	 */
    148 	if (bp->b_flags & B_PAGEIO) {
    149 		pp = bp->b_pages;
    150 		pplist = NULL;
    151 	} else if (bp->b_flags & B_SHADOW) {
    152 		pp = NULL;
    153 		pplist = bp->b_shadow;
    154 	} else {
    155 		pp = NULL;
    156 		pplist = NULL;
    157 		if (bp->b_proc == NULL || (as = bp->b_proc->p_as) == NULL)
    158 			as = &kas;
    159 	}
    160 
    161 	bp->b_flags |= B_REMAPPED;
    162 	bp->b_un.b_addr = kaddr + off;
    163 
    164 	while (npages-- != 0) {
    165 		if (pp) {
    166 			pfnum = pp->p_pagenum;
    167 			pp = pp->p_next;
    168 		} else if (pplist == NULL) {
    169 			pfnum = hat_getpfnum(as->a_hat,
    170 			    (caddr_t)((uintptr_t)addr & MMU_PAGEMASK));
    171 			if (pfnum == PFN_INVALID)
    172 				panic("bp_mapin_common: hat_getpfnum for"
    173 				    " addr %p failed\n", (void *)addr);
    174 			addr += PAGESIZE;
    175 		} else {
    176 			pfnum = (*pplist)->p_pagenum;
    177 			pplist++;
    178 		}
    179 
    180 		hat_devload(kas.a_hat, kaddr, PAGESIZE, pfnum,
    181 		    bp_devload_flags, HAT_LOAD_LOCK);
    182 
    183 		kaddr += PAGESIZE;
    184 	}
    185 	return (bp->b_un.b_addr);
    186 }
    187 
    188 /*
    189  * Convert bp for pageio/physio to a kernel addressable location.
    190  */
    191 void
    192 bp_mapin(struct buf *bp)
    193 {
    194 	(void) bp_mapin_common(bp, VM_SLEEP);
    195 }
    196 
    197 /*
    198  * Release all the resources associated with a previous bp_mapin() call.
    199  */
    200 void
    201 bp_mapout(struct buf *bp)
    202 {
    203 	caddr_t		addr;
    204 	uintptr_t	off;
    205 	uintptr_t	base;
    206 	uintptr_t	color;
    207 	size_t		size;
    208 	pgcnt_t		npages;
    209 	page_t		*pp;
    210 
    211 	if ((bp->b_flags & B_REMAPPED) == 0)
    212 		return;
    213 
    214 	addr = bp->b_un.b_addr;
    215 	off = (uintptr_t)addr & PAGEOFFSET;
    216 	size = P2ROUNDUP(bp->b_bcount + off, PAGESIZE);
    217 	npages = btop(size);
    218 
    219 	bp->b_un.b_addr = (caddr_t)off;		/* debugging aid */
    220 
    221 	if ((bp->b_flags & (B_SHADOW | B_PAGEIO)) && (npages == 1) &&
    222 	    kpm_enable && bp_mapin_kpm_enable) {
    223 		if (bp->b_flags & B_SHADOW)
    224 			pp = *bp->b_shadow;
    225 		else
    226 			pp = bp->b_pages;
    227 		addr = (caddr_t)((uintptr_t)addr & MMU_PAGEMASK);
    228 		hat_kpm_mapout(pp, NULL, addr);
    229 		bp->b_flags &= ~B_REMAPPED;
    230 		return;
    231 	}
    232 
    233 	base = (uintptr_t)addr & MMU_PAGEMASK;
    234 	BP_FLUSH(base, size);
    235 	hat_unload(kas.a_hat, (void *)base, size,
    236 	    HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK);
    237 	if (bp_map_arena != NULL) {
    238 		color = P2PHASE(base, bp_align);
    239 		vmem_free(bp_map_arena, (void *)(base - color),
    240 		    P2ROUNDUP(color + size, bp_align));
    241 	} else
    242 		vmem_free(heap_arena, (void *)base, size);
    243 	bp->b_flags &= ~B_REMAPPED;
    244 }
    245 
    246 /*
    247  * copy data from a KVA into a buf_t which may not be mapped in. offset
    248  * is relative to the buf_t only.
    249  */
    250 int
    251 bp_copyout(void *driverbuf, struct buf *bp, offset_t offset, size_t size)
    252 {
    253 	return (bp_copy_common(BP_COPYOUT, bp, driverbuf, offset, size));
    254 }
    255 
    256 /*
    257  * copy data from a buf_t which may not be mapped in, into a KVA.. offset
    258  * is relative to the buf_t only.
    259  */
    260 int
    261 bp_copyin(struct buf *bp, void *driverbuf, offset_t offset, size_t size)
    262 {
    263 	return (bp_copy_common(BP_COPYIN, bp, driverbuf, offset, size));
    264 }
    265 
    266 
    267 #define	BP_COPY(dir, driverbuf, baddr, sz)	\
    268 	(dir == BP_COPYIN) ? \
    269 	bcopy(baddr, driverbuf, sz) :  bcopy(driverbuf, baddr, sz)
    270 
    271 static int
    272 bp_copy_common(bp_copydir_t dir, struct buf *bp, void *driverbuf,
    273     offset_t offset, size_t size)
    274 {
    275 	page_t **pplist;
    276 	uintptr_t poff;
    277 	uintptr_t voff;
    278 	struct as *as;
    279 	caddr_t kaddr;
    280 	caddr_t addr;
    281 	page_t *page;
    282 	size_t psize;
    283 	page_t *pp;
    284 	pfn_t pfn;
    285 
    286 
    287 	ASSERT((offset + size) <= bp->b_bcount);
    288 
    289 	/* if the buf_t already has a KVA, just do a bcopy */
    290 	if (!(bp->b_flags & (B_PHYS | B_PAGEIO))) {
    291 		BP_COPY(dir, driverbuf, bp->b_un.b_addr + offset, size);
    292 		return (0);
    293 	}
    294 
    295 	/* if we don't have kpm enabled, we need to do the slow path */
    296 	if (!kpm_enable || bp_force_copy) {
    297 		bp_mapin(bp);
    298 		BP_COPY(dir, driverbuf, bp->b_un.b_addr + offset, size);
    299 		bp_mapout(bp);
    300 		return (0);
    301 	}
    302 
    303 	/*
    304 	 * kpm is enabled, and we need to map in the buf_t for the copy
    305 	 */
    306 
    307 	/* setup pp, plist, and make sure 'as' is right */
    308 	if (bp->b_flags & B_PAGEIO) {
    309 		pp = bp->b_pages;
    310 		pplist = NULL;
    311 	} else if (bp->b_flags & B_SHADOW) {
    312 		pp = NULL;
    313 		pplist = bp->b_shadow;
    314 	} else {
    315 		pp = NULL;
    316 		pplist = NULL;
    317 		if (bp->b_proc == NULL || (as = bp->b_proc->p_as) == NULL) {
    318 			as = &kas;
    319 		}
    320 	}
    321 
    322 	/*
    323 	 * locals for the address, the offset into the first page, and the
    324 	 * size of the first page we are going to copy.
    325 	 */
    326 	addr = (caddr_t)bp->b_un.b_addr;
    327 	poff = (uintptr_t)addr & PAGEOFFSET;
    328 	psize = MIN(PAGESIZE - poff, size);
    329 
    330 	/*
    331 	 * we always start with a 0 offset into the driverbuf provided. The
    332 	 * offset passed in only applies to the buf_t.
    333 	 */
    334 	voff = 0;
    335 
    336 	/* Loop until we've copied al the data */
    337 	while (size > 0) {
    338 
    339 		/*
    340 		 * for a pp or pplist, get the pfn, then go to the next page_t
    341 		 * for the next time around the loop.
    342 		 */
    343 		if (pp) {
    344 			page = pp;
    345 			pp = pp->p_next;
    346 		} else if (pplist != NULL) {
    347 			page = (*pplist);
    348 			pplist++;
    349 
    350 		/*
    351 		 * We have a user VA. If we are going to copy this page, (e.g.
    352 		 * the offset into the buf_t where we start to copy is
    353 		 * within this page), get the pfn. Don't waste the cycles
    354 		 * getting the pfn if we're not copying this page.
    355 		 */
    356 		} else if (offset < psize) {
    357 			pfn = hat_getpfnum(as->a_hat,
    358 			    (caddr_t)((uintptr_t)addr & PAGEMASK));
    359 			if (pfn == PFN_INVALID) {
    360 				return (-1);
    361 			}
    362 			page = page_numtopp_nolock(pfn);
    363 			addr += psize - offset;
    364 		} else {
    365 			addr += psize;
    366 		}
    367 
    368 		/*
    369 		 * if we have an initial offset into the buf_t passed in,
    370 		 * and it falls within the current page, account for it in
    371 		 * the page size (how much we will copy) and the offset into the
    372 		 * page (where we'll start copying from).
    373 		 */
    374 		if ((offset > 0) && (offset < psize)) {
    375 			psize -= offset;
    376 			poff += offset;
    377 			offset = 0;
    378 
    379 		/*
    380 		 * if we have an initial offset into the buf_t passed in,
    381 		 * and it's not within the current page, skip this page.
    382 		 * We don't have to worry about the first page offset and size
    383 		 * anymore. psize will normally be PAGESIZE now unless we are
    384 		 * on the last page.
    385 		 */
    386 		} else if (offset >= psize) {
    387 			offset -= psize;
    388 			psize = MIN(PAGESIZE, size);
    389 			poff = 0;
    390 			continue;
    391 		}
    392 
    393 		/*
    394 		 * get a kpm mapping to the page, them copy in/out of the
    395 		 * page. update size left and offset into the driverbuf passed
    396 		 * in for the next time around the loop.
    397 		 */
    398 		kaddr = hat_kpm_mapin(page, NULL) + poff;
    399 		BP_COPY(dir, (void *)((uintptr_t)driverbuf + voff), kaddr,
    400 		    psize);
    401 		hat_kpm_mapout(page, NULL, kaddr - poff);
    402 
    403 		size -= psize;
    404 		voff += psize;
    405 
    406 		poff = 0;
    407 		psize = MIN(PAGESIZE, size);
    408 	}
    409 
    410 	return (0);
    411 }
    412