Home | History | Annotate | Download | only in io
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  * Memory special file
     28  */
     29 
     30 #include <sys/types.h>
     31 #include <sys/param.h>
     32 #include <sys/user.h>
     33 #include <sys/buf.h>
     34 #include <sys/systm.h>
     35 #include <sys/cred.h>
     36 #include <sys/vm.h>
     37 #include <sys/uio.h>
     38 #include <sys/mman.h>
     39 #include <sys/kmem.h>
     40 #include <vm/seg.h>
     41 #include <vm/page.h>
     42 #include <sys/stat.h>
     43 #include <sys/vmem.h>
     44 #include <sys/memlist.h>
     45 #include <sys/bootconf.h>
     46 
     47 #include <vm/seg_vn.h>
     48 #include <vm/seg_dev.h>
     49 #include <vm/seg_kmem.h>
     50 #include <vm/seg_kp.h>
     51 #include <vm/seg_kpm.h>
     52 #include <vm/hat.h>
     53 
     54 #include <sys/conf.h>
     55 #include <sys/mem.h>
     56 #include <sys/types.h>
     57 #include <sys/conf.h>
     58 #include <sys/param.h>
     59 #include <sys/systm.h>
     60 #include <sys/errno.h>
     61 #include <sys/modctl.h>
     62 #include <sys/memlist.h>
     63 #include <sys/ddi.h>
     64 #include <sys/sunddi.h>
     65 #include <sys/debug.h>
     66 #include <sys/fm/protocol.h>
     67 
     68 #if defined(__sparc)
     69 extern int cpu_get_mem_name(uint64_t, uint64_t *, uint64_t, char *, int, int *);
     70 extern int cpu_get_mem_info(uint64_t, uint64_t, uint64_t *, uint64_t *,
     71     uint64_t *, int *, int *, int *);
     72 extern size_t cpu_get_name_bufsize(void);
     73 extern int cpu_get_mem_sid(char *, char *, int, int *);
     74 extern int cpu_get_mem_addr(char *, char *, uint64_t, uint64_t *);
     75 #elif defined(__x86)
     76 #include <sys/cpu_module.h>
     77 #endif	/* __sparc */
     78 
     79 /*
     80  * Turn a byte length into a pagecount.  The DDI btop takes a
     81  * 32-bit size on 32-bit machines, this handles 64-bit sizes for
     82  * large physical-memory 32-bit machines.
     83  */
     84 #define	BTOP(x)	((pgcnt_t)((x) >> _pageshift))
     85 
     86 static kmutex_t mm_lock;
     87 static caddr_t mm_map;
     88 
     89 static dev_info_t *mm_dip;	/* private copy of devinfo pointer */
     90 
     91 static int mm_kmem_io_access;
     92 
     93 static int mm_kstat_update(kstat_t *ksp, int rw);
     94 static int mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw);
     95 
     96 static int mm_read_mem_name(intptr_t data, mem_name_t *mem_name);
     97 
     98 /*ARGSUSED1*/
     99 static int
    100 mm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
    101 {
    102 	int i;
    103 	struct mem_minor {
    104 		char *name;
    105 		minor_t minor;
    106 		int privonly;
    107 		const char *rdpriv;
    108 		const char *wrpriv;
    109 		mode_t priv_mode;
    110 	} mm[] = {
    111 		{ "mem",	M_MEM,		0,	NULL,	"all",	0640 },
    112 		{ "kmem",	M_KMEM,		0,	NULL,	"all",	0640 },
    113 		{ "allkmem",	M_ALLKMEM,	0,	"all",	"all",	0600 },
    114 		{ "null",	M_NULL,	PRIVONLY_DEV,	NULL,	NULL,	0666 },
    115 		{ "zero",	M_ZERO, PRIVONLY_DEV,	NULL,	NULL,	0666 },
    116 	};
    117 	kstat_t *ksp;
    118 
    119 	mutex_init(&mm_lock, NULL, MUTEX_DEFAULT, NULL);
    120 	mm_map = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
    121 
    122 	for (i = 0; i < (sizeof (mm) / sizeof (mm[0])); i++) {
    123 		if (ddi_create_priv_minor_node(devi, mm[i].name, S_IFCHR,
    124 		    mm[i].minor, DDI_PSEUDO, mm[i].privonly,
    125 		    mm[i].rdpriv, mm[i].wrpriv, mm[i].priv_mode) ==
    126 		    DDI_FAILURE) {
    127 			ddi_remove_minor_node(devi, NULL);
    128 			return (DDI_FAILURE);
    129 		}
    130 	}
    131 
    132 	mm_dip = devi;
    133 
    134 	ksp = kstat_create("mm", 0, "phys_installed", "misc",
    135 	    KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE | KSTAT_FLAG_VIRTUAL);
    136 	if (ksp != NULL) {
    137 		ksp->ks_update = mm_kstat_update;
    138 		ksp->ks_snapshot = mm_kstat_snapshot;
    139 		ksp->ks_lock = &mm_lock; /* XXX - not really needed */
    140 		kstat_install(ksp);
    141 	}
    142 
    143 	mm_kmem_io_access = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
    144 	    "kmem_io_access", 0);
    145 
    146 	return (DDI_SUCCESS);
    147 }
    148 
    149 /*ARGSUSED*/
    150 static int
    151 mm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
    152 {
    153 	register int error;
    154 
    155 	switch (infocmd) {
    156 	case DDI_INFO_DEVT2DEVINFO:
    157 		*result = (void *)mm_dip;
    158 		error = DDI_SUCCESS;
    159 		break;
    160 	case DDI_INFO_DEVT2INSTANCE:
    161 		*result = (void *)0;
    162 		error = DDI_SUCCESS;
    163 		break;
    164 	default:
    165 		error = DDI_FAILURE;
    166 	}
    167 	return (error);
    168 }
    169 
    170 /*ARGSUSED1*/
    171 static int
    172 mmopen(dev_t *devp, int flag, int typ, struct cred *cred)
    173 {
    174 	switch (getminor(*devp)) {
    175 	case M_NULL:
    176 	case M_ZERO:
    177 	case M_MEM:
    178 	case M_KMEM:
    179 	case M_ALLKMEM:
    180 		/* standard devices */
    181 		break;
    182 
    183 	default:
    184 		/* Unsupported or unknown type */
    185 		return (EINVAL);
    186 	}
    187 	/* must be character device */
    188 	if (typ != OTYP_CHR)
    189 		return (EINVAL);
    190 	return (0);
    191 }
    192 
    193 struct pollhead	mm_pollhd;
    194 
    195 /*ARGSUSED*/
    196 static int
    197 mmchpoll(dev_t dev, short events, int anyyet, short *reventsp,
    198     struct pollhead **phpp)
    199 {
    200 	switch (getminor(dev)) {
    201 	case M_NULL:
    202 	case M_ZERO:
    203 	case M_MEM:
    204 	case M_KMEM:
    205 	case M_ALLKMEM:
    206 		*reventsp = events & (POLLIN | POLLOUT | POLLPRI | POLLRDNORM |
    207 		    POLLWRNORM | POLLRDBAND | POLLWRBAND);
    208 		/*
    209 		 * A non NULL pollhead pointer should be returned in case
    210 		 * user polls for 0 events.
    211 		 */
    212 		*phpp = !anyyet && !*reventsp ?
    213 		    &mm_pollhd : (struct pollhead *)NULL;
    214 		return (0);
    215 	default:
    216 		/* no other devices currently support polling */
    217 		return (ENXIO);
    218 	}
    219 }
    220 
    221 static int
    222 mmpropop(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags,
    223     char *name, caddr_t valuep, int *lengthp)
    224 {
    225 	/*
    226 	 * implement zero size to reduce overhead (avoid two failing
    227 	 * property lookups per stat).
    228 	 */
    229 	return (ddi_prop_op_size(dev, dip, prop_op,
    230 	    flags, name, valuep, lengthp, 0));
    231 }
    232 
    233 static int
    234 mmio(struct uio *uio, enum uio_rw rw, pfn_t pfn, off_t pageoff, int allowio,
    235     page_t *pp)
    236 {
    237 	int error = 0;
    238 	int devload = 0;
    239 	int is_memory = pf_is_memory(pfn);
    240 	size_t nbytes = MIN((size_t)(PAGESIZE - pageoff),
    241 	    (size_t)uio->uio_iov->iov_len);
    242 	caddr_t va = NULL;
    243 
    244 	mutex_enter(&mm_lock);
    245 
    246 	if (is_memory && kpm_enable) {
    247 		if (pp)
    248 			va = hat_kpm_mapin(pp, NULL);
    249 		else
    250 			va = hat_kpm_mapin_pfn(pfn);
    251 	}
    252 
    253 	if (va == NULL) {
    254 		hat_devload(kas.a_hat, mm_map, PAGESIZE, pfn,
    255 		    (uint_t)(rw == UIO_READ ? PROT_READ : PROT_READ|PROT_WRITE),
    256 		    HAT_LOAD_NOCONSIST|HAT_LOAD_LOCK);
    257 		va = mm_map;
    258 		devload = 1;
    259 	}
    260 
    261 	if (!is_memory) {
    262 		if (allowio) {
    263 			size_t c = uio->uio_iov->iov_len;
    264 
    265 			if (ddi_peekpokeio(NULL, uio, rw,
    266 			    (caddr_t)(uintptr_t)uio->uio_loffset, c,
    267 			    sizeof (int32_t)) != DDI_SUCCESS)
    268 				error = EFAULT;
    269 		} else
    270 			error = EIO;
    271 	} else
    272 		error = uiomove(va + pageoff, nbytes, rw, uio);
    273 
    274 	if (devload)
    275 		hat_unload(kas.a_hat, mm_map, PAGESIZE, HAT_UNLOAD_UNLOCK);
    276 	else if (pp)
    277 		hat_kpm_mapout(pp, NULL, va);
    278 	else
    279 		hat_kpm_mapout_pfn(pfn);
    280 
    281 	mutex_exit(&mm_lock);
    282 	return (error);
    283 }
    284 
    285 static int
    286 mmpagelock(struct as *as, caddr_t va)
    287 {
    288 	struct seg *seg;
    289 	int i;
    290 
    291 	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
    292 	seg = as_segat(as, va);
    293 	i = (seg != NULL)? SEGOP_CAPABLE(seg, S_CAPABILITY_NOMINFLT) : 0;
    294 	AS_LOCK_EXIT(as, &as->a_lock);
    295 
    296 	return (i);
    297 }
    298 
    299 #ifdef	__sparc
    300 
    301 #define	NEED_LOCK_KVADDR(kva)	mmpagelock(&kas, kva)
    302 
    303 #else	/* __i386, __amd64 */
    304 
    305 #define	NEED_LOCK_KVADDR(va)	0
    306 
    307 #endif	/* __sparc */
    308 
    309 /*ARGSUSED3*/
    310 static int
    311 mmrw(dev_t dev, struct uio *uio, enum uio_rw rw, cred_t *cred)
    312 {
    313 	pfn_t v;
    314 	struct iovec *iov;
    315 	int error = 0;
    316 	size_t c;
    317 	ssize_t oresid = uio->uio_resid;
    318 	minor_t minor = getminor(dev);
    319 
    320 	while (uio->uio_resid > 0 && error == 0) {
    321 		iov = uio->uio_iov;
    322 		if (iov->iov_len == 0) {
    323 			uio->uio_iov++;
    324 			uio->uio_iovcnt--;
    325 			if (uio->uio_iovcnt < 0)
    326 				panic("mmrw");
    327 			continue;
    328 		}
    329 		switch (minor) {
    330 
    331 		case M_MEM:
    332 			memlist_read_lock();
    333 			if (!address_in_memlist(phys_install,
    334 			    (uint64_t)uio->uio_loffset, 1)) {
    335 				memlist_read_unlock();
    336 				error = EFAULT;
    337 				break;
    338 			}
    339 			memlist_read_unlock();
    340 
    341 			v = BTOP((u_offset_t)uio->uio_loffset);
    342 			error = mmio(uio, rw, v,
    343 			    uio->uio_loffset & PAGEOFFSET, 0, NULL);
    344 			break;
    345 
    346 		case M_KMEM:
    347 		case M_ALLKMEM:
    348 			{
    349 			page_t **ppp = NULL;
    350 			caddr_t vaddr = (caddr_t)uio->uio_offset;
    351 			int try_lock = NEED_LOCK_KVADDR(vaddr);
    352 			int locked = 0;
    353 
    354 			if ((error = plat_mem_do_mmio(uio, rw)) != ENOTSUP)
    355 				break;
    356 
    357 			/*
    358 			 * If vaddr does not map a valid page, as_pagelock()
    359 			 * will return failure. Hence we can't check the
    360 			 * return value and return EFAULT here as we'd like.
    361 			 * seg_kp and seg_kpm do not properly support
    362 			 * as_pagelock() for this context so we avoid it
    363 			 * using the try_lock set check above.  Some day when
    364 			 * the kernel page locking gets redesigned all this
    365 			 * muck can be cleaned up.
    366 			 */
    367 			if (try_lock)
    368 				locked = (as_pagelock(&kas, &ppp, vaddr,
    369 				    PAGESIZE, S_WRITE) == 0);
    370 
    371 			v = hat_getpfnum(kas.a_hat,
    372 			    (caddr_t)(uintptr_t)uio->uio_loffset);
    373 			if (v == PFN_INVALID) {
    374 				if (locked)
    375 					as_pageunlock(&kas, ppp, vaddr,
    376 					    PAGESIZE, S_WRITE);
    377 				error = EFAULT;
    378 				break;
    379 			}
    380 
    381 			error = mmio(uio, rw, v, uio->uio_loffset & PAGEOFFSET,
    382 			    minor == M_ALLKMEM || mm_kmem_io_access,
    383 			    (locked && ppp) ? *ppp : NULL);
    384 			if (locked)
    385 				as_pageunlock(&kas, ppp, vaddr, PAGESIZE,
    386 				    S_WRITE);
    387 			}
    388 
    389 			break;
    390 
    391 		case M_ZERO:
    392 			if (rw == UIO_READ) {
    393 				label_t ljb;
    394 
    395 				if (on_fault(&ljb)) {
    396 					no_fault();
    397 					error = EFAULT;
    398 					break;
    399 				}
    400 				uzero(iov->iov_base, iov->iov_len);
    401 				no_fault();
    402 				uio->uio_resid -= iov->iov_len;
    403 				uio->uio_loffset += iov->iov_len;
    404 				break;
    405 			}
    406 			/* else it's a write, fall through to NULL case */
    407 			/*FALLTHROUGH*/
    408 
    409 		case M_NULL:
    410 			if (rw == UIO_READ)
    411 				return (0);
    412 			c = iov->iov_len;
    413 			iov->iov_base += c;
    414 			iov->iov_len -= c;
    415 			uio->uio_loffset += c;
    416 			uio->uio_resid -= c;
    417 			break;
    418 
    419 		}
    420 	}
    421 	return (uio->uio_resid == oresid ? error : 0);
    422 }
    423 
    424 static int
    425 mmread(dev_t dev, struct uio *uio, cred_t *cred)
    426 {
    427 	return (mmrw(dev, uio, UIO_READ, cred));
    428 }
    429 
    430 static int
    431 mmwrite(dev_t dev, struct uio *uio, cred_t *cred)
    432 {
    433 	return (mmrw(dev, uio, UIO_WRITE, cred));
    434 }
    435 
    436 /*
    437  * Private ioctl for libkvm to support kvm_physaddr().
    438  * Given an address space and a VA, compute the PA.
    439  */
    440 static int
    441 mmioctl_vtop(intptr_t data)
    442 {
    443 #ifdef _SYSCALL32
    444 	mem_vtop32_t vtop32;
    445 #endif
    446 	mem_vtop_t mem_vtop;
    447 	proc_t *p;
    448 	pfn_t pfn = (pfn_t)PFN_INVALID;
    449 	pid_t pid = 0;
    450 	struct as *as;
    451 	struct seg *seg;
    452 
    453 	if (get_udatamodel() == DATAMODEL_NATIVE) {
    454 		if (copyin((void *)data, &mem_vtop, sizeof (mem_vtop_t)))
    455 			return (EFAULT);
    456 	}
    457 #ifdef _SYSCALL32
    458 	else {
    459 		if (copyin((void *)data, &vtop32, sizeof (mem_vtop32_t)))
    460 			return (EFAULT);
    461 		mem_vtop.m_as = (struct as *)(uintptr_t)vtop32.m_as;
    462 		mem_vtop.m_va = (void *)(uintptr_t)vtop32.m_va;
    463 
    464 		if (mem_vtop.m_as != NULL)
    465 			return (EINVAL);
    466 	}
    467 #endif
    468 
    469 	if (mem_vtop.m_as == &kas) {
    470 		pfn = hat_getpfnum(kas.a_hat, mem_vtop.m_va);
    471 	} else {
    472 		if (mem_vtop.m_as == NULL) {
    473 			/*
    474 			 * Assume the calling process's address space if the
    475 			 * caller didn't specify one.
    476 			 */
    477 			p = curthread->t_procp;
    478 			if (p == NULL)
    479 				return (EIO);
    480 			mem_vtop.m_as = p->p_as;
    481 		}
    482 
    483 		mutex_enter(&pidlock);
    484 		for (p = practive; p != NULL; p = p->p_next) {
    485 			if (p->p_as == mem_vtop.m_as) {
    486 				pid = p->p_pid;
    487 				break;
    488 			}
    489 		}
    490 		mutex_exit(&pidlock);
    491 		if (p == NULL)
    492 			return (EIO);
    493 		p = sprlock(pid);
    494 		if (p == NULL)
    495 			return (EIO);
    496 		as = p->p_as;
    497 		if (as == mem_vtop.m_as) {
    498 			mutex_exit(&p->p_lock);
    499 			AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
    500 			for (seg = AS_SEGFIRST(as); seg != NULL;
    501 			    seg = AS_SEGNEXT(as, seg))
    502 				if ((uintptr_t)mem_vtop.m_va -
    503 				    (uintptr_t)seg->s_base < seg->s_size)
    504 					break;
    505 			if (seg != NULL)
    506 				pfn = hat_getpfnum(as->a_hat, mem_vtop.m_va);
    507 			AS_LOCK_EXIT(as, &as->a_lock);
    508 			mutex_enter(&p->p_lock);
    509 		}
    510 		sprunlock(p);
    511 	}
    512 	mem_vtop.m_pfn = pfn;
    513 	if (pfn == PFN_INVALID)
    514 		return (EIO);
    515 
    516 	if (get_udatamodel() == DATAMODEL_NATIVE) {
    517 		if (copyout(&mem_vtop, (void *)data, sizeof (mem_vtop_t)))
    518 			return (EFAULT);
    519 	}
    520 #ifdef _SYSCALL32
    521 	else {
    522 		vtop32.m_pfn = mem_vtop.m_pfn;
    523 		if (copyout(&vtop32, (void *)data, sizeof (mem_vtop32_t)))
    524 			return (EFAULT);
    525 	}
    526 #endif
    527 
    528 	return (0);
    529 }
    530 
    531 /*
    532  * Given a PA, execute the given page retire command on it.
    533  */
    534 static int
    535 mmioctl_page_retire(int cmd, intptr_t data)
    536 {
    537 	extern int page_retire_test(void);
    538 	uint64_t pa;
    539 
    540 	if (copyin((void *)data, &pa, sizeof (uint64_t))) {
    541 		return (EFAULT);
    542 	}
    543 
    544 	switch (cmd) {
    545 	case MEM_PAGE_ISRETIRED:
    546 		return (page_retire_check(pa, NULL));
    547 
    548 	case MEM_PAGE_UNRETIRE:
    549 		return (page_unretire(pa));
    550 
    551 	case MEM_PAGE_RETIRE:
    552 		return (page_retire(pa, PR_FMA));
    553 
    554 	case MEM_PAGE_RETIRE_MCE:
    555 		return (page_retire(pa, PR_MCE));
    556 
    557 	case MEM_PAGE_RETIRE_UE:
    558 		return (page_retire(pa, PR_UE));
    559 
    560 	case MEM_PAGE_GETERRORS:
    561 		{
    562 			uint64_t page_errors;
    563 			int rc = page_retire_check(pa, &page_errors);
    564 			if (copyout(&page_errors, (void *)data,
    565 			    sizeof (uint64_t))) {
    566 				return (EFAULT);
    567 			}
    568 			return (rc);
    569 		}
    570 
    571 	case MEM_PAGE_RETIRE_TEST:
    572 		return (page_retire_test());
    573 
    574 	}
    575 
    576 	return (EINVAL);
    577 }
    578 
    579 #ifdef __sparc
    580 /*
    581  * Given a syndrome, syndrome type, and address return the
    582  * associated memory name in the provided data buffer.
    583  */
    584 static int
    585 mmioctl_get_mem_name(intptr_t data)
    586 {
    587 	mem_name_t mem_name;
    588 	void *buf;
    589 	size_t bufsize;
    590 	int len, err;
    591 
    592 	if ((bufsize = cpu_get_name_bufsize()) == 0)
    593 		return (ENOTSUP);
    594 
    595 	if ((err = mm_read_mem_name(data, &mem_name)) < 0)
    596 		return (err);
    597 
    598 	buf = kmem_alloc(bufsize, KM_SLEEP);
    599 
    600 	/*
    601 	 * Call into cpu specific code to do the lookup.
    602 	 */
    603 	if ((err = cpu_get_mem_name(mem_name.m_synd, mem_name.m_type,
    604 	    mem_name.m_addr, buf, bufsize, &len)) != 0) {
    605 		kmem_free(buf, bufsize);
    606 		return (err);
    607 	}
    608 
    609 	if (len >= mem_name.m_namelen) {
    610 		kmem_free(buf, bufsize);
    611 		return (ENOSPC);
    612 	}
    613 
    614 	if (copyoutstr(buf, (char *)mem_name.m_name,
    615 	    mem_name.m_namelen, NULL) != 0) {
    616 		kmem_free(buf, bufsize);
    617 		return (EFAULT);
    618 	}
    619 
    620 	kmem_free(buf, bufsize);
    621 	return (0);
    622 }
    623 
    624 /*
    625  * Given a syndrome and address return information about the associated memory.
    626  */
    627 static int
    628 mmioctl_get_mem_info(intptr_t data)
    629 {
    630 	mem_info_t mem_info;
    631 	int err;
    632 
    633 	if (copyin((void *)data, &mem_info, sizeof (mem_info_t)))
    634 		return (EFAULT);
    635 
    636 	if ((err = cpu_get_mem_info(mem_info.m_synd, mem_info.m_addr,
    637 	    &mem_info.m_mem_size, &mem_info.m_seg_size, &mem_info.m_bank_size,
    638 	    &mem_info.m_segments, &mem_info.m_banks, &mem_info.m_mcid)) != 0)
    639 		return (err);
    640 
    641 	if (copyout(&mem_info, (void *)data, sizeof (mem_info_t)) != 0)
    642 		return (EFAULT);
    643 
    644 	return (0);
    645 }
    646 
    647 /*
    648  * Given a memory name, return its associated serial id
    649  */
    650 static int
    651 mmioctl_get_mem_sid(intptr_t data)
    652 {
    653 	mem_name_t mem_name;
    654 	void *buf;
    655 	void *name;
    656 	size_t	name_len;
    657 	size_t bufsize;
    658 	int len, err;
    659 
    660 	if ((bufsize = cpu_get_name_bufsize()) == 0)
    661 		return (ENOTSUP);
    662 
    663 	if ((err = mm_read_mem_name(data, &mem_name)) < 0)
    664 		return (err);
    665 
    666 	buf = kmem_alloc(bufsize, KM_SLEEP);
    667 
    668 	if (mem_name.m_namelen > 1024)
    669 		mem_name.m_namelen = 1024; /* cap at 1024 bytes */
    670 
    671 	name = kmem_alloc(mem_name.m_namelen, KM_SLEEP);
    672 
    673 	if ((err = copyinstr((char *)mem_name.m_name, (char *)name,
    674 	    mem_name.m_namelen, &name_len)) != 0) {
    675 		kmem_free(buf, bufsize);
    676 		kmem_free(name, mem_name.m_namelen);
    677 		return (err);
    678 	}
    679 
    680 	/*
    681 	 * Call into cpu specific code to do the lookup.
    682 	 */
    683 	if ((err = cpu_get_mem_sid(name, buf, bufsize, &len)) != 0) {
    684 		kmem_free(buf, bufsize);
    685 		kmem_free(name, mem_name.m_namelen);
    686 		return (err);
    687 	}
    688 
    689 	if (len > mem_name.m_sidlen) {
    690 		kmem_free(buf, bufsize);
    691 		kmem_free(name, mem_name.m_namelen);
    692 		return (ENAMETOOLONG);
    693 	}
    694 
    695 	if (copyoutstr(buf, (char *)mem_name.m_sid,
    696 	    mem_name.m_sidlen, NULL) != 0) {
    697 		kmem_free(buf, bufsize);
    698 		kmem_free(name, mem_name.m_namelen);
    699 		return (EFAULT);
    700 	}
    701 
    702 	kmem_free(buf, bufsize);
    703 	kmem_free(name, mem_name.m_namelen);
    704 	return (0);
    705 }
    706 #endif	/* __sparc */
    707 
    708 /*
    709  * Private ioctls for
    710  *	libkvm to support kvm_physaddr().
    711  *	FMA support for page_retire() and memory attribute information.
    712  */
    713 /*ARGSUSED*/
    714 static int
    715 mmioctl(dev_t dev, int cmd, intptr_t data, int flag, cred_t *cred, int *rvalp)
    716 {
    717 	if ((cmd == MEM_VTOP && getminor(dev) != M_KMEM) ||
    718 	    (cmd != MEM_VTOP && getminor(dev) != M_MEM))
    719 		return (ENXIO);
    720 
    721 	switch (cmd) {
    722 	case MEM_VTOP:
    723 		return (mmioctl_vtop(data));
    724 
    725 	case MEM_PAGE_RETIRE:
    726 	case MEM_PAGE_ISRETIRED:
    727 	case MEM_PAGE_UNRETIRE:
    728 	case MEM_PAGE_RETIRE_MCE:
    729 	case MEM_PAGE_RETIRE_UE:
    730 	case MEM_PAGE_GETERRORS:
    731 	case MEM_PAGE_RETIRE_TEST:
    732 		return (mmioctl_page_retire(cmd, data));
    733 
    734 #ifdef __sparc
    735 	case MEM_NAME:
    736 		return (mmioctl_get_mem_name(data));
    737 
    738 	case MEM_INFO:
    739 		return (mmioctl_get_mem_info(data));
    740 
    741 	case MEM_SID:
    742 		return (mmioctl_get_mem_sid(data));
    743 #else
    744 	case MEM_NAME:
    745 	case MEM_INFO:
    746 	case MEM_SID:
    747 		return (ENOTSUP);
    748 #endif	/* __sparc */
    749 	}
    750 	return (ENXIO);
    751 }
    752 
    753 /*ARGSUSED2*/
    754 static int
    755 mmmmap(dev_t dev, off_t off, int prot)
    756 {
    757 	pfn_t pf;
    758 	struct memlist *pmem;
    759 	minor_t minor = getminor(dev);
    760 
    761 	switch (minor) {
    762 	case M_MEM:
    763 		pf = btop(off);
    764 		memlist_read_lock();
    765 		for (pmem = phys_install; pmem != NULL; pmem = pmem->next) {
    766 			if (pf >= BTOP(pmem->address) &&
    767 			    pf < BTOP(pmem->address + pmem->size)) {
    768 				memlist_read_unlock();
    769 				return (impl_obmem_pfnum(pf));
    770 			}
    771 		}
    772 		memlist_read_unlock();
    773 		break;
    774 
    775 	case M_KMEM:
    776 	case M_ALLKMEM:
    777 		/* no longer supported with KPR */
    778 		return (-1);
    779 
    780 	case M_ZERO:
    781 		/*
    782 		 * We shouldn't be mmap'ing to /dev/zero here as
    783 		 * mmsegmap() should have already converted
    784 		 * a mapping request for this device to a mapping
    785 		 * using seg_vn for anonymous memory.
    786 		 */
    787 		break;
    788 
    789 	}
    790 	return (-1);
    791 }
    792 
    793 /*
    794  * This function is called when a memory device is mmap'ed.
    795  * Set up the mapping to the correct device driver.
    796  */
    797 static int
    798 mmsegmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len,
    799     uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
    800 {
    801 	struct segvn_crargs vn_a;
    802 	struct segdev_crargs dev_a;
    803 	int error;
    804 	minor_t minor;
    805 	off_t i;
    806 
    807 	minor = getminor(dev);
    808 
    809 	as_rangelock(as);
    810 	/*
    811 	 * No need to worry about vac alignment on /dev/zero
    812 	 * since this is a "clone" object that doesn't yet exist.
    813 	 */
    814 	error = choose_addr(as, addrp, len, off,
    815 	    (minor == M_MEM) || (minor == M_KMEM), flags);
    816 	if (error != 0) {
    817 		as_rangeunlock(as);
    818 		return (error);
    819 	}
    820 
    821 	switch (minor) {
    822 	case M_MEM:
    823 		/* /dev/mem cannot be mmap'ed with MAP_PRIVATE */
    824 		if ((flags & MAP_TYPE) != MAP_SHARED) {
    825 			as_rangeunlock(as);
    826 			return (EINVAL);
    827 		}
    828 
    829 		/*
    830 		 * Check to ensure that the entire range is
    831 		 * legal and we are not trying to map in
    832 		 * more than the device will let us.
    833 		 */
    834 		for (i = 0; i < len; i += PAGESIZE) {
    835 			if (mmmmap(dev, off + i, maxprot) == -1) {
    836 				as_rangeunlock(as);
    837 				return (ENXIO);
    838 			}
    839 		}
    840 
    841 		/*
    842 		 * Use seg_dev segment driver for /dev/mem mapping.
    843 		 */
    844 		dev_a.mapfunc = mmmmap;
    845 		dev_a.dev = dev;
    846 		dev_a.offset = off;
    847 		dev_a.type = (flags & MAP_TYPE);
    848 		dev_a.prot = (uchar_t)prot;
    849 		dev_a.maxprot = (uchar_t)maxprot;
    850 		dev_a.hat_attr = 0;
    851 
    852 		/*
    853 		 * Make /dev/mem mappings non-consistent since we can't
    854 		 * alias pages that don't have page structs behind them,
    855 		 * such as kernel stack pages. If someone mmap()s a kernel
    856 		 * stack page and if we give him a tte with cv, a line from
    857 		 * that page can get into both pages of the spitfire d$.
    858 		 * But snoop from another processor will only invalidate
    859 		 * the first page. This later caused kernel (xc_attention)
    860 		 * to go into an infinite loop at pil 13 and no interrupts
    861 		 * could come in. See 1203630.
    862 		 *
    863 		 */
    864 		dev_a.hat_flags = HAT_LOAD_NOCONSIST;
    865 		dev_a.devmap_data = NULL;
    866 
    867 		error = as_map(as, *addrp, len, segdev_create, &dev_a);
    868 		break;
    869 
    870 	case M_ZERO:
    871 		/*
    872 		 * Use seg_vn segment driver for /dev/zero mapping.
    873 		 * Passing in a NULL amp gives us the "cloning" effect.
    874 		 */
    875 		vn_a.vp = NULL;
    876 		vn_a.offset = 0;
    877 		vn_a.type = (flags & MAP_TYPE);
    878 		vn_a.prot = prot;
    879 		vn_a.maxprot = maxprot;
    880 		vn_a.flags = flags & ~MAP_TYPE;
    881 		vn_a.cred = cred;
    882 		vn_a.amp = NULL;
    883 		vn_a.szc = 0;
    884 		vn_a.lgrp_mem_policy_flags = 0;
    885 		error = as_map(as, *addrp, len, segvn_create, &vn_a);
    886 		break;
    887 
    888 	case M_KMEM:
    889 	case M_ALLKMEM:
    890 		/* No longer supported with KPR. */
    891 		error = ENXIO;
    892 		break;
    893 
    894 	case M_NULL:
    895 		/*
    896 		 * Use seg_dev segment driver for /dev/null mapping.
    897 		 */
    898 		dev_a.mapfunc = mmmmap;
    899 		dev_a.dev = dev;
    900 		dev_a.offset = off;
    901 		dev_a.type = 0;		/* neither PRIVATE nor SHARED */
    902 		dev_a.prot = dev_a.maxprot = (uchar_t)PROT_NONE;
    903 		dev_a.hat_attr = 0;
    904 		dev_a.hat_flags = 0;
    905 		error = as_map(as, *addrp, len, segdev_create, &dev_a);
    906 		break;
    907 
    908 	default:
    909 		error = ENXIO;
    910 	}
    911 
    912 	as_rangeunlock(as);
    913 	return (error);
    914 }
    915 
    916 static struct cb_ops mm_cb_ops = {
    917 	mmopen,			/* open */
    918 	nulldev,		/* close */
    919 	nodev,			/* strategy */
    920 	nodev,			/* print */
    921 	nodev,			/* dump */
    922 	mmread,			/* read */
    923 	mmwrite,		/* write */
    924 	mmioctl,		/* ioctl */
    925 	nodev,			/* devmap */
    926 	mmmmap,			/* mmap */
    927 	mmsegmap,		/* segmap */
    928 	mmchpoll,		/* poll */
    929 	mmpropop,		/* prop_op */
    930 	0,			/* streamtab  */
    931 	D_NEW | D_MP | D_64BIT | D_U64BIT
    932 };
    933 
    934 static struct dev_ops mm_ops = {
    935 	DEVO_REV,		/* devo_rev, */
    936 	0,			/* refcnt  */
    937 	mm_info,		/* get_dev_info */
    938 	nulldev,		/* identify */
    939 	nulldev,		/* probe */
    940 	mm_attach,		/* attach */
    941 	nodev,			/* detach */
    942 	nodev,			/* reset */
    943 	&mm_cb_ops,		/* driver operations */
    944 	(struct bus_ops *)0,	/* bus operations */
    945 	NULL,			/* power */
    946 	ddi_quiesce_not_needed,		/* quiesce */
    947 };
    948 
    949 static struct modldrv modldrv = {
    950 	&mod_driverops, "memory driver", &mm_ops,
    951 };
    952 
    953 static struct modlinkage modlinkage = {
    954 	MODREV_1, &modldrv, NULL
    955 };
    956 
    957 int
    958 _init(void)
    959 {
    960 	return (mod_install(&modlinkage));
    961 }
    962 
    963 int
    964 _info(struct modinfo *modinfop)
    965 {
    966 	return (mod_info(&modlinkage, modinfop));
    967 }
    968 
    969 int
    970 _fini(void)
    971 {
    972 	return (mod_remove(&modlinkage));
    973 }
    974 
    975 static int
    976 mm_kstat_update(kstat_t *ksp, int rw)
    977 {
    978 	struct memlist *pmem;
    979 	uint_t count;
    980 
    981 	if (rw == KSTAT_WRITE)
    982 		return (EACCES);
    983 
    984 	count = 0;
    985 	memlist_read_lock();
    986 	for (pmem = phys_install; pmem != NULL; pmem = pmem->next) {
    987 		count++;
    988 	}
    989 	memlist_read_unlock();
    990 
    991 	ksp->ks_ndata = count;
    992 	ksp->ks_data_size = count * 2 * sizeof (uint64_t);
    993 
    994 	return (0);
    995 }
    996 
    997 static int
    998 mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw)
    999 {
   1000 	struct memlist *pmem;
   1001 	struct memunit {
   1002 		uint64_t address;
   1003 		uint64_t size;
   1004 	} *kspmem;
   1005 
   1006 	if (rw == KSTAT_WRITE)
   1007 		return (EACCES);
   1008 
   1009 	ksp->ks_snaptime = gethrtime();
   1010 
   1011 	kspmem = (struct memunit *)buf;
   1012 	memlist_read_lock();
   1013 	for (pmem = phys_install; pmem != NULL; pmem = pmem->next, kspmem++) {
   1014 		if ((caddr_t)kspmem >= (caddr_t)buf + ksp->ks_data_size)
   1015 			break;
   1016 		kspmem->address = pmem->address;
   1017 		kspmem->size = pmem->size;
   1018 	}
   1019 	memlist_read_unlock();
   1020 
   1021 	return (0);
   1022 }
   1023 
   1024 /*
   1025  * Read a mem_name_t from user-space and store it in the mem_name_t
   1026  * pointed to by the mem_name argument.
   1027  */
   1028 static int
   1029 mm_read_mem_name(intptr_t data, mem_name_t *mem_name)
   1030 {
   1031 	if (get_udatamodel() == DATAMODEL_NATIVE) {
   1032 		if (copyin((void *)data, mem_name, sizeof (mem_name_t)))
   1033 			return (EFAULT);
   1034 	}
   1035 #ifdef	_SYSCALL32
   1036 	else {
   1037 		mem_name32_t mem_name32;
   1038 
   1039 		if (copyin((void *)data, &mem_name32, sizeof (mem_name32_t)))
   1040 			return (EFAULT);
   1041 		mem_name->m_addr = mem_name32.m_addr;
   1042 		mem_name->m_synd = mem_name32.m_synd;
   1043 		mem_name->m_type[0] = mem_name32.m_type[0];
   1044 		mem_name->m_type[1] = mem_name32.m_type[1];
   1045 		mem_name->m_name = (caddr_t)(uintptr_t)mem_name32.m_name;
   1046 		mem_name->m_namelen = (size_t)mem_name32.m_namelen;
   1047 		mem_name->m_sid = (caddr_t)(uintptr_t)mem_name32.m_sid;
   1048 		mem_name->m_sidlen = (size_t)mem_name32.m_sidlen;
   1049 	}
   1050 #endif	/* _SYSCALL32 */
   1051 
   1052 	return (0);
   1053 }
   1054