Home | History | Annotate | Download | only in os
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 /*
     28  * PC specific DDI implementation
     29  */
     30 #include <sys/types.h>
     31 #include <sys/autoconf.h>
     32 #include <sys/avintr.h>
     33 #include <sys/bootconf.h>
     34 #include <sys/conf.h>
     35 #include <sys/cpuvar.h>
     36 #include <sys/ddi_impldefs.h>
     37 #include <sys/ddi_subrdefs.h>
     38 #include <sys/ethernet.h>
     39 #include <sys/fp.h>
     40 #include <sys/instance.h>
     41 #include <sys/kmem.h>
     42 #include <sys/machsystm.h>
     43 #include <sys/modctl.h>
     44 #include <sys/promif.h>
     45 #include <sys/prom_plat.h>
     46 #include <sys/sunndi.h>
     47 #include <sys/ndi_impldefs.h>
     48 #include <sys/ddi_impldefs.h>
     49 #include <sys/sysmacros.h>
     50 #include <sys/systeminfo.h>
     51 #include <sys/utsname.h>
     52 #include <sys/atomic.h>
     53 #include <sys/spl.h>
     54 #include <sys/archsystm.h>
     55 #include <vm/seg_kmem.h>
     56 #include <sys/ontrap.h>
     57 #include <sys/fm/protocol.h>
     58 #include <sys/ramdisk.h>
     59 #include <sys/sunndi.h>
     60 #include <sys/vmem.h>
     61 #include <sys/pci_impl.h>
     62 #if defined(__xpv)
     63 #include <sys/hypervisor.h>
     64 #endif
     65 #include <sys/mach_intr.h>
     66 #include <vm/hat_i86.h>
     67 #include <sys/x86_archext.h>
     68 
     69 /*
     70  * DDI Boot Configuration
     71  */
     72 
     73 /*
     74  * Platform drivers on this platform
     75  */
     76 char *platform_module_list[] = {
     77 	"acpippm",
     78 	"ppm",
     79 	(char *)0
     80 };
     81 
     82 /* pci bus resource maps */
     83 struct pci_bus_resource *pci_bus_res;
     84 
     85 size_t dma_max_copybuf_size = 0x101000;		/* 1M + 4K */
     86 
     87 uint64_t ramdisk_start, ramdisk_end;
     88 
     89 int pseudo_isa = 0;
     90 
     91 /*
     92  * Forward declarations
     93  */
     94 static int getlongprop_buf();
     95 static void get_boot_properties(void);
     96 static void impl_bus_initialprobe(void);
     97 static void impl_bus_reprobe(void);
     98 
     99 static int poke_mem(peekpoke_ctlops_t *in_args);
    100 static int peek_mem(peekpoke_ctlops_t *in_args);
    101 
    102 static int kmem_override_cache_attrs(caddr_t, size_t, uint_t);
    103 
    104 #if defined(__amd64) && !defined(__xpv)
    105 extern void immu_init(void);
    106 #endif
    107 
    108 #define	CTGENTRIES	15
    109 
    110 static struct ctgas {
    111 	struct ctgas	*ctg_next;
    112 	int		ctg_index;
    113 	void		*ctg_addr[CTGENTRIES];
    114 	size_t		ctg_size[CTGENTRIES];
    115 } ctglist;
    116 
    117 static kmutex_t		ctgmutex;
    118 #define	CTGLOCK()	mutex_enter(&ctgmutex)
    119 #define	CTGUNLOCK()	mutex_exit(&ctgmutex)
    120 
    121 /*
    122  * Minimum pfn value of page_t's put on the free list.  This is to simplify
    123  * support of ddi dma memory requests which specify small, non-zero addr_lo
    124  * values.
    125  *
    126  * The default value of 2, which corresponds to the only known non-zero addr_lo
    127  * value used, means a single page will be sacrificed (pfn typically starts
    128  * at 1).  ddiphysmin can be set to 0 to disable. It cannot be set above 0x100
    129  * otherwise mp startup panics.
    130  */
    131 pfn_t	ddiphysmin = 2;
    132 
    133 static void
    134 check_driver_disable(void)
    135 {
    136 	int proplen = 128;
    137 	char *prop_name;
    138 	char *drv_name, *propval;
    139 	major_t major;
    140 
    141 	prop_name = kmem_alloc(proplen, KM_SLEEP);
    142 	for (major = 0; major < devcnt; major++) {
    143 		drv_name = ddi_major_to_name(major);
    144 		if (drv_name == NULL)
    145 			continue;
    146 		(void) snprintf(prop_name, proplen, "disable-%s", drv_name);
    147 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(),
    148 		    DDI_PROP_DONTPASS, prop_name, &propval) == DDI_SUCCESS) {
    149 			if (strcmp(propval, "true") == 0) {
    150 				devnamesp[major].dn_flags |= DN_DRIVER_REMOVED;
    151 				cmn_err(CE_NOTE, "driver %s disabled",
    152 				    drv_name);
    153 			}
    154 			ddi_prop_free(propval);
    155 		}
    156 	}
    157 	kmem_free(prop_name, proplen);
    158 }
    159 
    160 
    161 /*
    162  * Configure the hardware on the system.
    163  * Called before the rootfs is mounted
    164  */
    165 void
    166 configure(void)
    167 {
    168 	extern void i_ddi_init_root();
    169 
    170 #if defined(__i386)
    171 	extern int fpu_pentium_fdivbug;
    172 #endif	/* __i386 */
    173 	extern int fpu_ignored;
    174 
    175 	/*
    176 	 * Determine if an FPU is attached
    177 	 */
    178 
    179 	fpu_probe();
    180 
    181 #if defined(__i386)
    182 	if (fpu_pentium_fdivbug) {
    183 		printf("\
    184 FP hardware exhibits Pentium floating point divide problem\n");
    185 	}
    186 #endif	/* __i386 */
    187 
    188 	if (fpu_ignored) {
    189 		printf("FP hardware will not be used\n");
    190 	} else if (!fpu_exists) {
    191 		printf("No FPU in configuration\n");
    192 	}
    193 
    194 	/*
    195 	 * Initialize devices on the machine.
    196 	 * Uses configuration tree built by the PROMs to determine what
    197 	 * is present, and builds a tree of prototype dev_info nodes
    198 	 * corresponding to the hardware which identified itself.
    199 	 */
    200 
    201 	/*
    202 	 * Initialize root node.
    203 	 */
    204 	i_ddi_init_root();
    205 
    206 	/* reprogram devices not set up by firmware (BIOS) */
    207 	impl_bus_reprobe();
    208 
    209 #if defined(__amd64) && !defined(__xpv)
    210 	/*
    211 	 * Setup but don't startup the IOMMU
    212 	 * Startup happens later via a direct call
    213 	 * to IOMMU code by boot code.
    214 	 * At this point, all PCI bus renumbering
    215 	 * is done, so safe to init the IMMU
    216 	 * AKA Intel IOMMU.
    217 	 */
    218 	immu_init();
    219 #endif
    220 
    221 	/*
    222 	 * attach the isa nexus to get ACPI resource usage
    223 	 * isa is "kind of" a pseudo node
    224 	 */
    225 #if defined(__xpv)
    226 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
    227 		if (pseudo_isa)
    228 			(void) i_ddi_attach_pseudo_node("isa");
    229 		else
    230 			(void) i_ddi_attach_hw_nodes("isa");
    231 	}
    232 #else
    233 	if (pseudo_isa)
    234 		(void) i_ddi_attach_pseudo_node("isa");
    235 	else
    236 		(void) i_ddi_attach_hw_nodes("isa");
    237 #endif
    238 }
    239 
    240 /*
    241  * The "status" property indicates the operational status of a device.
    242  * If this property is present, the value is a string indicating the
    243  * status of the device as follows:
    244  *
    245  *	"okay"		operational.
    246  *	"disabled"	not operational, but might become operational.
    247  *	"fail"		not operational because a fault has been detected,
    248  *			and it is unlikely that the device will become
    249  *			operational without repair. no additional details
    250  *			are available.
    251  *	"fail-xxx"	not operational because a fault has been detected,
    252  *			and it is unlikely that the device will become
    253  *			operational without repair. "xxx" is additional
    254  *			human-readable information about the particular
    255  *			fault condition that was detected.
    256  *
    257  * The absence of this property means that the operational status is
    258  * unknown or okay.
    259  *
    260  * This routine checks the status property of the specified device node
    261  * and returns 0 if the operational status indicates failure, and 1 otherwise.
    262  *
    263  * The property may exist on plug-in cards the existed before IEEE 1275-1994.
    264  * And, in that case, the property may not even be a string. So we carefully
    265  * check for the value "fail", in the beginning of the string, noting
    266  * the property length.
    267  */
    268 int
    269 status_okay(int id, char *buf, int buflen)
    270 {
    271 	char status_buf[OBP_MAXPROPNAME];
    272 	char *bufp = buf;
    273 	int len = buflen;
    274 	int proplen;
    275 	static const char *status = "status";
    276 	static const char *fail = "fail";
    277 	int fail_len = (int)strlen(fail);
    278 
    279 	/*
    280 	 * Get the proplen ... if it's smaller than "fail",
    281 	 * or doesn't exist ... then we don't care, since
    282 	 * the value can't begin with the char string "fail".
    283 	 *
    284 	 * NB: proplen, if it's a string, includes the NULL in the
    285 	 * the size of the property, and fail_len does not.
    286 	 */
    287 	proplen = prom_getproplen((pnode_t)id, (caddr_t)status);
    288 	if (proplen <= fail_len)	/* nonexistant or uninteresting len */
    289 		return (1);
    290 
    291 	/*
    292 	 * if a buffer was provided, use it
    293 	 */
    294 	if ((buf == (char *)NULL) || (buflen <= 0)) {
    295 		bufp = status_buf;
    296 		len = sizeof (status_buf);
    297 	}
    298 	*bufp = (char)0;
    299 
    300 	/*
    301 	 * Get the property into the buffer, to the extent of the buffer,
    302 	 * and in case the buffer is smaller than the property size,
    303 	 * NULL terminate the buffer. (This handles the case where
    304 	 * a buffer was passed in and the caller wants to print the
    305 	 * value, but the buffer was too small).
    306 	 */
    307 	(void) prom_bounded_getprop((pnode_t)id, (caddr_t)status,
    308 	    (caddr_t)bufp, len);
    309 	*(bufp + len - 1) = (char)0;
    310 
    311 	/*
    312 	 * If the value begins with the char string "fail",
    313 	 * then it means the node is failed. We don't care
    314 	 * about any other values. We assume the node is ok
    315 	 * although it might be 'disabled'.
    316 	 */
    317 	if (strncmp(bufp, fail, fail_len) == 0)
    318 		return (0);
    319 
    320 	return (1);
    321 }
    322 
    323 /*
    324  * Check the status of the device node passed as an argument.
    325  *
    326  *	if ((status is OKAY) || (status is DISABLED))
    327  *		return DDI_SUCCESS
    328  *	else
    329  *		print a warning and return DDI_FAILURE
    330  */
    331 /*ARGSUSED1*/
    332 int
    333 check_status(int id, char *name, dev_info_t *parent)
    334 {
    335 	char status_buf[64];
    336 	char devtype_buf[OBP_MAXPROPNAME];
    337 	int retval = DDI_FAILURE;
    338 
    339 	/*
    340 	 * is the status okay?
    341 	 */
    342 	if (status_okay(id, status_buf, sizeof (status_buf)))
    343 		return (DDI_SUCCESS);
    344 
    345 	/*
    346 	 * a status property indicating bad memory will be associated
    347 	 * with a node which has a "device_type" property with a value of
    348 	 * "memory-controller". in this situation, return DDI_SUCCESS
    349 	 */
    350 	if (getlongprop_buf(id, OBP_DEVICETYPE, devtype_buf,
    351 	    sizeof (devtype_buf)) > 0) {
    352 		if (strcmp(devtype_buf, "memory-controller") == 0)
    353 			retval = DDI_SUCCESS;
    354 	}
    355 
    356 	/*
    357 	 * print the status property information
    358 	 */
    359 	cmn_err(CE_WARN, "status '%s' for '%s'", status_buf, name);
    360 	return (retval);
    361 }
    362 
    363 /*ARGSUSED*/
    364 uint_t
    365 softlevel1(caddr_t arg1, caddr_t arg2)
    366 {
    367 	softint();
    368 	return (1);
    369 }
    370 
    371 /*
    372  * Allow for implementation specific correction of PROM property values.
    373  */
    374 
    375 /*ARGSUSED*/
    376 void
    377 impl_fix_props(dev_info_t *dip, dev_info_t *ch_dip, char *name, int len,
    378     caddr_t buffer)
    379 {
    380 	/*
    381 	 * There are no adjustments needed in this implementation.
    382 	 */
    383 }
    384 
    385 static int
    386 getlongprop_buf(int id, char *name, char *buf, int maxlen)
    387 {
    388 	int size;
    389 
    390 	size = prom_getproplen((pnode_t)id, name);
    391 	if (size <= 0 || (size > maxlen - 1))
    392 		return (-1);
    393 
    394 	if (-1 == prom_getprop((pnode_t)id, name, buf))
    395 		return (-1);
    396 
    397 	if (strcmp("name", name) == 0) {
    398 		if (buf[size - 1] != '\0') {
    399 			buf[size] = '\0';
    400 			size += 1;
    401 		}
    402 	}
    403 
    404 	return (size);
    405 }
    406 
    407 static int
    408 get_prop_int_array(dev_info_t *di, char *pname, int **pval, uint_t *plen)
    409 {
    410 	int ret;
    411 
    412 	if ((ret = ddi_prop_lookup_int_array(DDI_DEV_T_ANY, di,
    413 	    DDI_PROP_DONTPASS, pname, pval, plen))
    414 	    == DDI_PROP_SUCCESS) {
    415 		*plen = (*plen) * (sizeof (int));
    416 	}
    417 	return (ret);
    418 }
    419 
    420 
    421 /*
    422  * Node Configuration
    423  */
    424 
    425 struct prop_ispec {
    426 	uint_t	pri, vec;
    427 };
    428 
    429 /*
    430  * For the x86, we're prepared to claim that the interrupt string
    431  * is in the form of a list of <ipl,vec> specifications.
    432  */
    433 
    434 #define	VEC_MIN	1
    435 #define	VEC_MAX	255
    436 
    437 static int
    438 impl_xlate_intrs(dev_info_t *child, int *in,
    439     struct ddi_parent_private_data *pdptr)
    440 {
    441 	size_t size;
    442 	int n;
    443 	struct intrspec *new;
    444 	caddr_t got_prop;
    445 	int *inpri;
    446 	int got_len;
    447 	extern int ignore_hardware_nodes;	/* force flag from ddi_impl.c */
    448 
    449 	static char bad_intr_fmt[] =
    450 	    "bad interrupt spec from %s%d - ipl %d, irq %d\n";
    451 
    452 	/*
    453 	 * determine if the driver is expecting the new style "interrupts"
    454 	 * property which just contains the IRQ, or the old style which
    455 	 * contains pairs of <IPL,IRQ>.  if it is the new style, we always
    456 	 * assign IPL 5 unless an "interrupt-priorities" property exists.
    457 	 * in that case, the "interrupt-priorities" property contains the
    458 	 * IPL values that match, one for one, the IRQ values in the
    459 	 * "interrupts" property.
    460 	 */
    461 	inpri = NULL;
    462 	if ((ddi_getprop(DDI_DEV_T_ANY, child, DDI_PROP_DONTPASS,
    463 	    "ignore-hardware-nodes", -1) != -1) || ignore_hardware_nodes) {
    464 		/* the old style "interrupts" property... */
    465 
    466 		/*
    467 		 * The list consists of <ipl,vec> elements
    468 		 */
    469 		if ((n = (*in++ >> 1)) < 1)
    470 			return (DDI_FAILURE);
    471 
    472 		pdptr->par_nintr = n;
    473 		size = n * sizeof (struct intrspec);
    474 		new = pdptr->par_intr = kmem_zalloc(size, KM_SLEEP);
    475 
    476 		while (n--) {
    477 			int level = *in++;
    478 			int vec = *in++;
    479 
    480 			if (level < 1 || level > MAXIPL ||
    481 			    vec < VEC_MIN || vec > VEC_MAX) {
    482 				cmn_err(CE_CONT, bad_intr_fmt,
    483 				    DEVI(child)->devi_name,
    484 				    DEVI(child)->devi_instance, level, vec);
    485 				goto broken;
    486 			}
    487 			new->intrspec_pri = level;
    488 			if (vec != 2)
    489 				new->intrspec_vec = vec;
    490 			else
    491 				/*
    492 				 * irq 2 on the PC bus is tied to irq 9
    493 				 * on ISA, EISA and MicroChannel
    494 				 */
    495 				new->intrspec_vec = 9;
    496 			new++;
    497 		}
    498 
    499 		return (DDI_SUCCESS);
    500 	} else {
    501 		/* the new style "interrupts" property... */
    502 
    503 		/*
    504 		 * The list consists of <vec> elements
    505 		 */
    506 		if ((n = (*in++)) < 1)
    507 			return (DDI_FAILURE);
    508 
    509 		pdptr->par_nintr = n;
    510 		size = n * sizeof (struct intrspec);
    511 		new = pdptr->par_intr = kmem_zalloc(size, KM_SLEEP);
    512 
    513 		/* XXX check for "interrupt-priorities" property... */
    514 		if (ddi_getlongprop(DDI_DEV_T_ANY, child, DDI_PROP_DONTPASS,
    515 		    "interrupt-priorities", (caddr_t)&got_prop, &got_len)
    516 		    == DDI_PROP_SUCCESS) {
    517 			if (n != (got_len / sizeof (int))) {
    518 				cmn_err(CE_CONT,
    519 				    "bad interrupt-priorities length"
    520 				    " from %s%d: expected %d, got %d\n",
    521 				    DEVI(child)->devi_name,
    522 				    DEVI(child)->devi_instance, n,
    523 				    (int)(got_len / sizeof (int)));
    524 				goto broken;
    525 			}
    526 			inpri = (int *)got_prop;
    527 		}
    528 
    529 		while (n--) {
    530 			int level;
    531 			int vec = *in++;
    532 
    533 			if (inpri == NULL)
    534 				level = 5;
    535 			else
    536 				level = *inpri++;
    537 
    538 			if (level < 1 || level > MAXIPL ||
    539 			    vec < VEC_MIN || vec > VEC_MAX) {
    540 				cmn_err(CE_CONT, bad_intr_fmt,
    541 				    DEVI(child)->devi_name,
    542 				    DEVI(child)->devi_instance, level, vec);
    543 				goto broken;
    544 			}
    545 			new->intrspec_pri = level;
    546 			if (vec != 2)
    547 				new->intrspec_vec = vec;
    548 			else
    549 				/*
    550 				 * irq 2 on the PC bus is tied to irq 9
    551 				 * on ISA, EISA and MicroChannel
    552 				 */
    553 				new->intrspec_vec = 9;
    554 			new++;
    555 		}
    556 
    557 		if (inpri != NULL)
    558 			kmem_free(got_prop, got_len);
    559 		return (DDI_SUCCESS);
    560 	}
    561 
    562 broken:
    563 	kmem_free(pdptr->par_intr, size);
    564 	pdptr->par_intr = NULL;
    565 	pdptr->par_nintr = 0;
    566 	if (inpri != NULL)
    567 		kmem_free(got_prop, got_len);
    568 
    569 	return (DDI_FAILURE);
    570 }
    571 
    572 /*
    573  * Create a ddi_parent_private_data structure from the ddi properties of
    574  * the dev_info node.
    575  *
    576  * The "reg" and either an "intr" or "interrupts" properties are required
    577  * if the driver wishes to create mappings or field interrupts on behalf
    578  * of the device.
    579  *
    580  * The "reg" property is assumed to be a list of at least one triple
    581  *
    582  *	<bustype, address, size>*1
    583  *
    584  * The "intr" property is assumed to be a list of at least one duple
    585  *
    586  *	<SPARC ipl, vector#>*1
    587  *
    588  * The "interrupts" property is assumed to be a list of at least one
    589  * n-tuples that describes the interrupt capabilities of the bus the device
    590  * is connected to.  For SBus, this looks like
    591  *
    592  *	<SBus-level>*1
    593  *
    594  * (This property obsoletes the 'intr' property).
    595  *
    596  * The "ranges" property is optional.
    597  */
    598 void
    599 make_ddi_ppd(dev_info_t *child, struct ddi_parent_private_data **ppd)
    600 {
    601 	struct ddi_parent_private_data *pdptr;
    602 	int n;
    603 	int *reg_prop, *rng_prop, *intr_prop, *irupts_prop;
    604 	uint_t reg_len, rng_len, intr_len, irupts_len;
    605 
    606 	*ppd = pdptr = kmem_zalloc(sizeof (*pdptr), KM_SLEEP);
    607 
    608 	/*
    609 	 * Handle the 'reg' property.
    610 	 */
    611 	if ((get_prop_int_array(child, "reg", &reg_prop, &reg_len) ==
    612 	    DDI_PROP_SUCCESS) && (reg_len != 0)) {
    613 		pdptr->par_nreg = reg_len / (int)sizeof (struct regspec);
    614 		pdptr->par_reg = (struct regspec *)reg_prop;
    615 	}
    616 
    617 	/*
    618 	 * See if I have a range (adding one where needed - this
    619 	 * means to add one for sbus node in sun4c, when romvec > 0,
    620 	 * if no range is already defined in the PROM node.
    621 	 * (Currently no sun4c PROMS define range properties,
    622 	 * but they should and may in the future.)  For the SBus
    623 	 * node, the range is defined by the SBus reg property.
    624 	 */
    625 	if (get_prop_int_array(child, "ranges", &rng_prop, &rng_len)
    626 	    == DDI_PROP_SUCCESS) {
    627 		pdptr->par_nrng = rng_len / (int)(sizeof (struct rangespec));
    628 		pdptr->par_rng = (struct rangespec *)rng_prop;
    629 	}
    630 
    631 	/*
    632 	 * Handle the 'intr' and 'interrupts' properties
    633 	 */
    634 
    635 	/*
    636 	 * For backwards compatibility
    637 	 * we first look for the 'intr' property for the device.
    638 	 */
    639 	if (get_prop_int_array(child, "intr", &intr_prop, &intr_len)
    640 	    != DDI_PROP_SUCCESS) {
    641 		intr_len = 0;
    642 	}
    643 
    644 	/*
    645 	 * If we're to support bus adapters and future platforms cleanly,
    646 	 * we need to support the generalized 'interrupts' property.
    647 	 */
    648 	if (get_prop_int_array(child, "interrupts", &irupts_prop,
    649 	    &irupts_len) != DDI_PROP_SUCCESS) {
    650 		irupts_len = 0;
    651 	} else if (intr_len != 0) {
    652 		/*
    653 		 * If both 'intr' and 'interrupts' are defined,
    654 		 * then 'interrupts' wins and we toss the 'intr' away.
    655 		 */
    656 		ddi_prop_free((void *)intr_prop);
    657 		intr_len = 0;
    658 	}
    659 
    660 	if (intr_len != 0) {
    661 
    662 		/*
    663 		 * Translate the 'intr' property into an array
    664 		 * an array of struct intrspec's.  There's not really
    665 		 * very much to do here except copy what's out there.
    666 		 */
    667 
    668 		struct intrspec *new;
    669 		struct prop_ispec *l;
    670 
    671 		n = pdptr->par_nintr = intr_len / sizeof (struct prop_ispec);
    672 		l = (struct prop_ispec *)intr_prop;
    673 		pdptr->par_intr =
    674 		    new = kmem_zalloc(n * sizeof (struct intrspec), KM_SLEEP);
    675 		while (n--) {
    676 			new->intrspec_pri = l->pri;
    677 			new->intrspec_vec = l->vec;
    678 			new++;
    679 			l++;
    680 		}
    681 		ddi_prop_free((void *)intr_prop);
    682 
    683 	} else if ((n = irupts_len) != 0) {
    684 		size_t size;
    685 		int *out;
    686 
    687 		/*
    688 		 * Translate the 'interrupts' property into an array
    689 		 * of intrspecs for the rest of the DDI framework to
    690 		 * toy with.  Only our ancestors really know how to
    691 		 * do this, so ask 'em.  We massage the 'interrupts'
    692 		 * property so that it is pre-pended by a count of
    693 		 * the number of integers in the argument.
    694 		 */
    695 		size = sizeof (int) + n;
    696 		out = kmem_alloc(size, KM_SLEEP);
    697 		*out = n / sizeof (int);
    698 		bcopy(irupts_prop, out + 1, (size_t)n);
    699 		ddi_prop_free((void *)irupts_prop);
    700 		if (impl_xlate_intrs(child, out, pdptr) != DDI_SUCCESS) {
    701 			cmn_err(CE_CONT,
    702 			    "Unable to translate 'interrupts' for %s%d\n",
    703 			    DEVI(child)->devi_binding_name,
    704 			    DEVI(child)->devi_instance);
    705 		}
    706 		kmem_free(out, size);
    707 	}
    708 }
    709 
    710 /*
    711  * Name a child
    712  */
    713 static int
    714 impl_sunbus_name_child(dev_info_t *child, char *name, int namelen)
    715 {
    716 	/*
    717 	 * Fill in parent-private data and this function returns to us
    718 	 * an indication if it used "registers" to fill in the data.
    719 	 */
    720 	if (ddi_get_parent_data(child) == NULL) {
    721 		struct ddi_parent_private_data *pdptr;
    722 		make_ddi_ppd(child, &pdptr);
    723 		ddi_set_parent_data(child, pdptr);
    724 	}
    725 
    726 	name[0] = '\0';
    727 	if (sparc_pd_getnreg(child) > 0) {
    728 		(void) snprintf(name, namelen, "%x,%x",
    729 		    (uint_t)sparc_pd_getreg(child, 0)->regspec_bustype,
    730 		    (uint_t)sparc_pd_getreg(child, 0)->regspec_addr);
    731 	}
    732 
    733 	return (DDI_SUCCESS);
    734 }
    735 
    736 /*
    737  * Called from the bus_ctl op of sunbus (sbus, obio, etc) nexus drivers
    738  * to implement the DDI_CTLOPS_INITCHILD operation.  That is, it names
    739  * the children of sun busses based on the reg spec.
    740  *
    741  * Handles the following properties (in make_ddi_ppd):
    742  *	Property		value
    743  *	  Name			type
    744  *	reg		register spec
    745  *	intr		old-form interrupt spec
    746  *	interrupts	new (bus-oriented) interrupt spec
    747  *	ranges		range spec
    748  */
    749 int
    750 impl_ddi_sunbus_initchild(dev_info_t *child)
    751 {
    752 	char name[MAXNAMELEN];
    753 	void impl_ddi_sunbus_removechild(dev_info_t *);
    754 
    755 	/*
    756 	 * Name the child, also makes parent private data
    757 	 */
    758 	(void) impl_sunbus_name_child(child, name, MAXNAMELEN);
    759 	ddi_set_name_addr(child, name);
    760 
    761 	/*
    762 	 * Attempt to merge a .conf node; if successful, remove the
    763 	 * .conf node.
    764 	 */
    765 	if ((ndi_dev_is_persistent_node(child) == 0) &&
    766 	    (ndi_merge_node(child, impl_sunbus_name_child) == DDI_SUCCESS)) {
    767 		/*
    768 		 * Return failure to remove node
    769 		 */
    770 		impl_ddi_sunbus_removechild(child);
    771 		return (DDI_FAILURE);
    772 	}
    773 	return (DDI_SUCCESS);
    774 }
    775 
    776 void
    777 impl_free_ddi_ppd(dev_info_t *dip)
    778 {
    779 	struct ddi_parent_private_data *pdptr;
    780 	size_t n;
    781 
    782 	if ((pdptr = ddi_get_parent_data(dip)) == NULL)
    783 		return;
    784 
    785 	if ((n = (size_t)pdptr->par_nintr) != 0)
    786 		/*
    787 		 * Note that kmem_free is used here (instead of
    788 		 * ddi_prop_free) because the contents of the
    789 		 * property were placed into a separate buffer and
    790 		 * mucked with a bit before being stored in par_intr.
    791 		 * The actual return value from the prop lookup
    792 		 * was freed with ddi_prop_free previously.
    793 		 */
    794 		kmem_free(pdptr->par_intr, n * sizeof (struct intrspec));
    795 
    796 	if ((n = (size_t)pdptr->par_nrng) != 0)
    797 		ddi_prop_free((void *)pdptr->par_rng);
    798 
    799 	if ((n = pdptr->par_nreg) != 0)
    800 		ddi_prop_free((void *)pdptr->par_reg);
    801 
    802 	kmem_free(pdptr, sizeof (*pdptr));
    803 	ddi_set_parent_data(dip, NULL);
    804 }
    805 
    806 void
    807 impl_ddi_sunbus_removechild(dev_info_t *dip)
    808 {
    809 	impl_free_ddi_ppd(dip);
    810 	ddi_set_name_addr(dip, NULL);
    811 	/*
    812 	 * Strip the node to properly convert it back to prototype form
    813 	 */
    814 	impl_rem_dev_props(dip);
    815 }
    816 
    817 /*
    818  * DDI Interrupt
    819  */
    820 
    821 /*
    822  * turn this on to force isa, eisa, and mca device to ignore the new
    823  * hardware nodes in the device tree (normally turned on only for
    824  * drivers that need it by setting the property "ignore-hardware-nodes"
    825  * in their driver.conf file).
    826  *
    827  * 7/31/96 -- Turned off globally.  Leaving variable in for the moment
    828  *		as safety valve.
    829  */
    830 int ignore_hardware_nodes = 0;
    831 
    832 /*
    833  * Local data
    834  */
    835 static struct impl_bus_promops *impl_busp;
    836 
    837 
    838 /*
    839  * New DDI interrupt framework
    840  */
    841 
    842 /*
    843  * i_ddi_intr_ops:
    844  *
    845  * This is the interrupt operator function wrapper for the bus function
    846  * bus_intr_op.
    847  */
    848 int
    849 i_ddi_intr_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t op,
    850     ddi_intr_handle_impl_t *hdlp, void * result)
    851 {
    852 	dev_info_t	*pdip = (dev_info_t *)DEVI(dip)->devi_parent;
    853 	int		ret = DDI_FAILURE;
    854 
    855 	/* request parent to process this interrupt op */
    856 	if (NEXUS_HAS_INTR_OP(pdip))
    857 		ret = (*(DEVI(pdip)->devi_ops->devo_bus_ops->bus_intr_op))(
    858 		    pdip, rdip, op, hdlp, result);
    859 	else
    860 		cmn_err(CE_WARN, "Failed to process interrupt "
    861 		    "for %s%d due to down-rev nexus driver %s%d",
    862 		    ddi_get_name(rdip), ddi_get_instance(rdip),
    863 		    ddi_get_name(pdip), ddi_get_instance(pdip));
    864 	return (ret);
    865 }
    866 
    867 /*
    868  * i_ddi_add_softint - allocate and add a soft interrupt to the system
    869  */
    870 int
    871 i_ddi_add_softint(ddi_softint_hdl_impl_t *hdlp)
    872 {
    873 	int ret;
    874 
    875 	/* add soft interrupt handler */
    876 	ret = add_avsoftintr((void *)hdlp, hdlp->ih_pri, hdlp->ih_cb_func,
    877 	    DEVI(hdlp->ih_dip)->devi_name, hdlp->ih_cb_arg1, hdlp->ih_cb_arg2);
    878 	return (ret ? DDI_SUCCESS : DDI_FAILURE);
    879 }
    880 
    881 
    882 void
    883 i_ddi_remove_softint(ddi_softint_hdl_impl_t *hdlp)
    884 {
    885 	(void) rem_avsoftintr((void *)hdlp, hdlp->ih_pri, hdlp->ih_cb_func);
    886 }
    887 
    888 
    889 extern void (*setsoftint)(int, struct av_softinfo *);
    890 extern boolean_t av_check_softint_pending(struct av_softinfo *, boolean_t);
    891 
    892 int
    893 i_ddi_trigger_softint(ddi_softint_hdl_impl_t *hdlp, void *arg2)
    894 {
    895 	if (av_check_softint_pending(hdlp->ih_pending, B_FALSE))
    896 		return (DDI_EPENDING);
    897 
    898 	update_avsoftintr_args((void *)hdlp, hdlp->ih_pri, arg2);
    899 
    900 	(*setsoftint)(hdlp->ih_pri, hdlp->ih_pending);
    901 	return (DDI_SUCCESS);
    902 }
    903 
    904 /*
    905  * i_ddi_set_softint_pri:
    906  *
    907  * The way this works is that it first tries to add a softint vector
    908  * at the new priority in hdlp. If that succeeds; then it removes the
    909  * existing softint vector at the old priority.
    910  */
    911 int
    912 i_ddi_set_softint_pri(ddi_softint_hdl_impl_t *hdlp, uint_t old_pri)
    913 {
    914 	int ret;
    915 
    916 	/*
    917 	 * If a softint is pending at the old priority then fail the request.
    918 	 */
    919 	if (av_check_softint_pending(hdlp->ih_pending, B_TRUE))
    920 		return (DDI_FAILURE);
    921 
    922 	ret = av_softint_movepri((void *)hdlp, old_pri);
    923 	return (ret ? DDI_SUCCESS : DDI_FAILURE);
    924 }
    925 
    926 void
    927 i_ddi_alloc_intr_phdl(ddi_intr_handle_impl_t *hdlp)
    928 {
    929 	hdlp->ih_private = (void *)kmem_zalloc(sizeof (ihdl_plat_t), KM_SLEEP);
    930 }
    931 
    932 void
    933 i_ddi_free_intr_phdl(ddi_intr_handle_impl_t *hdlp)
    934 {
    935 	kmem_free(hdlp->ih_private, sizeof (ihdl_plat_t));
    936 	hdlp->ih_private = NULL;
    937 }
    938 
    939 int
    940 i_ddi_get_intx_nintrs(dev_info_t *dip)
    941 {
    942 	struct ddi_parent_private_data *pdp;
    943 
    944 	if ((pdp = ddi_get_parent_data(dip)) == NULL)
    945 		return (0);
    946 
    947 	return (pdp->par_nintr);
    948 }
    949 
    950 /*
    951  * DDI Memory/DMA
    952  */
    953 
    954 /*
    955  * Support for allocating DMAable memory to implement
    956  * ddi_dma_mem_alloc(9F) interface.
    957  */
    958 
    959 #define	KA_ALIGN_SHIFT	7
    960 #define	KA_ALIGN	(1 << KA_ALIGN_SHIFT)
    961 #define	KA_NCACHE	(PAGESHIFT + 1 - KA_ALIGN_SHIFT)
    962 
    963 /*
    964  * Dummy DMA attribute template for kmem_io[].kmem_io_attr.  We only
    965  * care about addr_lo, addr_hi, and align.  addr_hi will be dynamically set.
    966  */
    967 
    968 static ddi_dma_attr_t kmem_io_attr = {
    969 	DMA_ATTR_V0,
    970 	0x0000000000000000ULL,		/* dma_attr_addr_lo */
    971 	0x0000000000000000ULL,		/* dma_attr_addr_hi */
    972 	0x00ffffff,
    973 	0x1000,				/* dma_attr_align */
    974 	1, 1, 0xffffffffULL, 0xffffffffULL, 0x1, 1, 0
    975 };
    976 
    977 /* kmem io memory ranges and indices */
    978 enum {
    979 	IO_4P, IO_64G, IO_4G, IO_2G, IO_1G, IO_512M,
    980 	IO_256M, IO_128M, IO_64M, IO_32M, IO_16M, MAX_MEM_RANGES
    981 };
    982 
    983 static struct {
    984 	vmem_t		*kmem_io_arena;
    985 	kmem_cache_t	*kmem_io_cache[KA_NCACHE];
    986 	ddi_dma_attr_t	kmem_io_attr;
    987 } kmem_io[MAX_MEM_RANGES];
    988 
    989 static int kmem_io_idx;		/* index of first populated kmem_io[] */
    990 
    991 static page_t *
    992 page_create_io_wrapper(void *addr, size_t len, int vmflag, void *arg)
    993 {
    994 	extern page_t *page_create_io(vnode_t *, u_offset_t, uint_t,
    995 	    uint_t, struct as *, caddr_t, ddi_dma_attr_t *);
    996 
    997 	return (page_create_io(&kvp, (u_offset_t)(uintptr_t)addr, len,
    998 	    PG_EXCL | ((vmflag & VM_NOSLEEP) ? 0 : PG_WAIT), &kas, addr, arg));
    999 }
   1000 
   1001 #ifdef __xpv
   1002 static void
   1003 segkmem_free_io(vmem_t *vmp, void * ptr, size_t size)
   1004 {
   1005 	extern void page_destroy_io(page_t *);
   1006 	segkmem_xfree(vmp, ptr, size, page_destroy_io);
   1007 }
   1008 #endif
   1009 
   1010 static void *
   1011 segkmem_alloc_io_4P(vmem_t *vmp, size_t size, int vmflag)
   1012 {
   1013 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
   1014 	    page_create_io_wrapper, &kmem_io[IO_4P].kmem_io_attr));
   1015 }
   1016 
   1017 static void *
   1018 segkmem_alloc_io_64G(vmem_t *vmp, size_t size, int vmflag)
   1019 {
   1020 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
   1021 	    page_create_io_wrapper, &kmem_io[IO_64G].kmem_io_attr));
   1022 }
   1023 
   1024 static void *
   1025 segkmem_alloc_io_4G(vmem_t *vmp, size_t size, int vmflag)
   1026 {
   1027 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
   1028 	    page_create_io_wrapper, &kmem_io[IO_4G].kmem_io_attr));
   1029 }
   1030 
   1031 static void *
   1032 segkmem_alloc_io_2G(vmem_t *vmp, size_t size, int vmflag)
   1033 {
   1034 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
   1035 	    page_create_io_wrapper, &kmem_io[IO_2G].kmem_io_attr));
   1036 }
   1037 
   1038 static void *
   1039 segkmem_alloc_io_1G(vmem_t *vmp, size_t size, int vmflag)
   1040 {
   1041 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
   1042 	    page_create_io_wrapper, &kmem_io[IO_1G].kmem_io_attr));
   1043 }
   1044 
   1045 static void *
   1046 segkmem_alloc_io_512M(vmem_t *vmp, size_t size, int vmflag)
   1047 {
   1048 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
   1049 	    page_create_io_wrapper, &kmem_io[IO_512M].kmem_io_attr));
   1050 }
   1051 
   1052 static void *
   1053 segkmem_alloc_io_256M(vmem_t *vmp, size_t size, int vmflag)
   1054 {
   1055 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
   1056 	    page_create_io_wrapper, &kmem_io[IO_256M].kmem_io_attr));
   1057 }
   1058 
   1059 static void *
   1060 segkmem_alloc_io_128M(vmem_t *vmp, size_t size, int vmflag)
   1061 {
   1062 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
   1063 	    page_create_io_wrapper, &kmem_io[IO_128M].kmem_io_attr));
   1064 }
   1065 
   1066 static void *
   1067 segkmem_alloc_io_64M(vmem_t *vmp, size_t size, int vmflag)
   1068 {
   1069 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
   1070 	    page_create_io_wrapper, &kmem_io[IO_64M].kmem_io_attr));
   1071 }
   1072 
   1073 static void *
   1074 segkmem_alloc_io_32M(vmem_t *vmp, size_t size, int vmflag)
   1075 {
   1076 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
   1077 	    page_create_io_wrapper, &kmem_io[IO_32M].kmem_io_attr));
   1078 }
   1079 
   1080 static void *
   1081 segkmem_alloc_io_16M(vmem_t *vmp, size_t size, int vmflag)
   1082 {
   1083 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
   1084 	    page_create_io_wrapper, &kmem_io[IO_16M].kmem_io_attr));
   1085 }
   1086 
   1087 struct {
   1088 	uint64_t	io_limit;
   1089 	char		*io_name;
   1090 	void		*(*io_alloc)(vmem_t *, size_t, int);
   1091 	int		io_initial;	/* kmem_io_init during startup */
   1092 } io_arena_params[MAX_MEM_RANGES] = {
   1093 	{0x000fffffffffffffULL,	"kmem_io_4P",	segkmem_alloc_io_4P,	1},
   1094 	{0x0000000fffffffffULL,	"kmem_io_64G",	segkmem_alloc_io_64G,	0},
   1095 	{0x00000000ffffffffULL,	"kmem_io_4G",	segkmem_alloc_io_4G,	1},
   1096 	{0x000000007fffffffULL,	"kmem_io_2G",	segkmem_alloc_io_2G,	1},
   1097 	{0x000000003fffffffULL,	"kmem_io_1G",	segkmem_alloc_io_1G,	0},
   1098 	{0x000000001fffffffULL,	"kmem_io_512M",	segkmem_alloc_io_512M,	0},
   1099 	{0x000000000fffffffULL,	"kmem_io_256M",	segkmem_alloc_io_256M,	0},
   1100 	{0x0000000007ffffffULL,	"kmem_io_128M",	segkmem_alloc_io_128M,	0},
   1101 	{0x0000000003ffffffULL,	"kmem_io_64M",	segkmem_alloc_io_64M,	0},
   1102 	{0x0000000001ffffffULL,	"kmem_io_32M",	segkmem_alloc_io_32M,	0},
   1103 	{0x0000000000ffffffULL,	"kmem_io_16M",	segkmem_alloc_io_16M,	1}
   1104 };
   1105 
   1106 void
   1107 kmem_io_init(int a)
   1108 {
   1109 	int	c;
   1110 	char name[40];
   1111 
   1112 	kmem_io[a].kmem_io_arena = vmem_create(io_arena_params[a].io_name,
   1113 	    NULL, 0, PAGESIZE, io_arena_params[a].io_alloc,
   1114 #ifdef __xpv
   1115 	    segkmem_free_io,
   1116 #else
   1117 	    segkmem_free,
   1118 #endif
   1119 	    heap_arena, 0, VM_SLEEP);
   1120 
   1121 	for (c = 0; c < KA_NCACHE; c++) {
   1122 		size_t size = KA_ALIGN << c;
   1123 		(void) sprintf(name, "%s_%lu",
   1124 		    io_arena_params[a].io_name, size);
   1125 		kmem_io[a].kmem_io_cache[c] = kmem_cache_create(name,
   1126 		    size, size, NULL, NULL, NULL, NULL,
   1127 		    kmem_io[a].kmem_io_arena, 0);
   1128 	}
   1129 }
   1130 
   1131 /*
   1132  * Return the index of the highest memory range for addr.
   1133  */
   1134 static int
   1135 kmem_io_index(uint64_t addr)
   1136 {
   1137 	int n;
   1138 
   1139 	for (n = kmem_io_idx; n < MAX_MEM_RANGES; n++) {
   1140 		if (kmem_io[n].kmem_io_attr.dma_attr_addr_hi <= addr) {
   1141 			if (kmem_io[n].kmem_io_arena == NULL)
   1142 				kmem_io_init(n);
   1143 			return (n);
   1144 		}
   1145 	}
   1146 	panic("kmem_io_index: invalid addr - must be at least 16m");
   1147 
   1148 	/*NOTREACHED*/
   1149 }
   1150 
   1151 /*
   1152  * Return the index of the next kmem_io populated memory range
   1153  * after curindex.
   1154  */
   1155 static int
   1156 kmem_io_index_next(int curindex)
   1157 {
   1158 	int n;
   1159 
   1160 	for (n = curindex + 1; n < MAX_MEM_RANGES; n++) {
   1161 		if (kmem_io[n].kmem_io_arena)
   1162 			return (n);
   1163 	}
   1164 	return (-1);
   1165 }
   1166 
   1167 /*
   1168  * allow kmem to be mapped in with different PTE cache attribute settings.
   1169  * Used by i_ddi_mem_alloc()
   1170  */
   1171 int
   1172 kmem_override_cache_attrs(caddr_t kva, size_t size, uint_t order)
   1173 {
   1174 	uint_t hat_flags;
   1175 	caddr_t kva_end;
   1176 	uint_t hat_attr;
   1177 	pfn_t pfn;
   1178 
   1179 	if (hat_getattr(kas.a_hat, kva, &hat_attr) == -1) {
   1180 		return (-1);
   1181 	}
   1182 
   1183 	hat_attr &= ~HAT_ORDER_MASK;
   1184 	hat_attr |= order | HAT_NOSYNC;
   1185 	hat_flags = HAT_LOAD_LOCK;
   1186 
   1187 	kva_end = (caddr_t)(((uintptr_t)kva + size + PAGEOFFSET) &
   1188 	    (uintptr_t)PAGEMASK);
   1189 	kva = (caddr_t)((uintptr_t)kva & (uintptr_t)PAGEMASK);
   1190 
   1191 	while (kva < kva_end) {
   1192 		pfn = hat_getpfnum(kas.a_hat, kva);
   1193 		hat_unload(kas.a_hat, kva, PAGESIZE, HAT_UNLOAD_UNLOCK);
   1194 		hat_devload(kas.a_hat, kva, PAGESIZE, pfn, hat_attr, hat_flags);
   1195 		kva += MMU_PAGESIZE;
   1196 	}
   1197 
   1198 	return (0);
   1199 }
   1200 
   1201 void
   1202 ka_init(void)
   1203 {
   1204 	int a;
   1205 	paddr_t maxphysaddr;
   1206 #if !defined(__xpv)
   1207 	extern pfn_t physmax;
   1208 
   1209 	maxphysaddr = mmu_ptob((paddr_t)physmax) + MMU_PAGEOFFSET;
   1210 #else
   1211 	maxphysaddr = mmu_ptob((paddr_t)HYPERVISOR_memory_op(
   1212 	    XENMEM_maximum_ram_page, NULL)) + MMU_PAGEOFFSET;
   1213 #endif
   1214 
   1215 	ASSERT(maxphysaddr <= io_arena_params[0].io_limit);
   1216 
   1217 	for (a = 0; a < MAX_MEM_RANGES; a++) {
   1218 		if (maxphysaddr >= io_arena_params[a + 1].io_limit) {
   1219 			if (maxphysaddr > io_arena_params[a + 1].io_limit)
   1220 				io_arena_params[a].io_limit = maxphysaddr;
   1221 			else
   1222 				a++;
   1223 			break;
   1224 		}
   1225 	}
   1226 	kmem_io_idx = a;
   1227 
   1228 	for (; a < MAX_MEM_RANGES; a++) {
   1229 		kmem_io[a].kmem_io_attr = kmem_io_attr;
   1230 		kmem_io[a].kmem_io_attr.dma_attr_addr_hi =
   1231 		    io_arena_params[a].io_limit;
   1232 		/*
   1233 		 * initialize kmem_io[] arena/cache corresponding to
   1234 		 * maxphysaddr and to the "common" io memory ranges that
   1235 		 * have io_initial set to a non-zero value.
   1236 		 */
   1237 		if (io_arena_params[a].io_initial || a == kmem_io_idx)
   1238 			kmem_io_init(a);
   1239 	}
   1240 }
   1241 
   1242 /*
   1243  * put contig address/size
   1244  */
   1245 static void *
   1246 putctgas(void *addr, size_t size)
   1247 {
   1248 	struct ctgas	*ctgp = &ctglist;
   1249 	int		i;
   1250 
   1251 	CTGLOCK();
   1252 	do {
   1253 		if ((i = ctgp->ctg_index) < CTGENTRIES) {
   1254 			ctgp->ctg_addr[i] = addr;
   1255 			ctgp->ctg_size[i] = size;
   1256 			ctgp->ctg_index++;
   1257 			break;
   1258 		}
   1259 		if (!ctgp->ctg_next)
   1260 			ctgp->ctg_next = kmem_zalloc(sizeof (struct ctgas),
   1261 			    KM_NOSLEEP);
   1262 		ctgp = ctgp->ctg_next;
   1263 	} while (ctgp);
   1264 
   1265 	CTGUNLOCK();
   1266 	return (ctgp);
   1267 }
   1268 
   1269 /*
   1270  * get contig size by addr
   1271  */
   1272 static size_t
   1273 getctgsz(void *addr)
   1274 {
   1275 	struct ctgas	*ctgp = &ctglist;
   1276 	int		i, j;
   1277 	size_t		sz;
   1278 
   1279 	ASSERT(addr);
   1280 	CTGLOCK();
   1281 
   1282 	while (ctgp) {
   1283 		for (i = 0; i < ctgp->ctg_index; i++) {
   1284 			if (addr != ctgp->ctg_addr[i])
   1285 				continue;
   1286 
   1287 			sz = ctgp->ctg_size[i];
   1288 			j = --ctgp->ctg_index;
   1289 			if (i != j) {
   1290 				ctgp->ctg_size[i] = ctgp->ctg_size[j];
   1291 				ctgp->ctg_addr[i] = ctgp->ctg_addr[j];
   1292 			}
   1293 			CTGUNLOCK();
   1294 			return (sz);
   1295 		}
   1296 		ctgp = ctgp->ctg_next;
   1297 	}
   1298 
   1299 	CTGUNLOCK();
   1300 	return (0);
   1301 }
   1302 
   1303 /*
   1304  * contig_alloc:
   1305  *
   1306  *	allocates contiguous memory to satisfy the 'size' and dma attributes
   1307  *	specified in 'attr'.
   1308  *
   1309  *	Not all of memory need to be physically contiguous if the
   1310  *	scatter-gather list length is greater than 1.
   1311  */
   1312 
   1313 /*ARGSUSED*/
   1314 void *
   1315 contig_alloc(size_t size, ddi_dma_attr_t *attr, uintptr_t align, int cansleep)
   1316 {
   1317 	pgcnt_t		pgcnt = btopr(size);
   1318 	size_t		asize = pgcnt * PAGESIZE;
   1319 	page_t		*ppl;
   1320 	int		pflag;
   1321 	void		*addr;
   1322 
   1323 	extern page_t *page_create_io(vnode_t *, u_offset_t, uint_t,
   1324 	    uint_t, struct as *, caddr_t, ddi_dma_attr_t *);
   1325 
   1326 	/* segkmem_xalloc */
   1327 
   1328 	if (align <= PAGESIZE)
   1329 		addr = vmem_alloc(heap_arena, asize,
   1330 		    (cansleep) ? VM_SLEEP : VM_NOSLEEP);
   1331 	else
   1332 		addr = vmem_xalloc(heap_arena, asize, align, 0, 0, NULL, NULL,
   1333 		    (cansleep) ? VM_SLEEP : VM_NOSLEEP);
   1334 	if (addr) {
   1335 		ASSERT(!((uintptr_t)addr & (align - 1)));
   1336 
   1337 		if (page_resv(pgcnt, (cansleep) ? KM_SLEEP : KM_NOSLEEP) == 0) {
   1338 			vmem_free(heap_arena, addr, asize);
   1339 			return (NULL);
   1340 		}
   1341 		pflag = PG_EXCL;
   1342 
   1343 		if (cansleep)
   1344 			pflag |= PG_WAIT;
   1345 
   1346 		/* 4k req gets from freelists rather than pfn search */
   1347 		if (pgcnt > 1 || align > PAGESIZE)
   1348 			pflag |= PG_PHYSCONTIG;
   1349 
   1350 		ppl = page_create_io(&kvp, (u_offset_t)(uintptr_t)addr,
   1351 		    asize, pflag, &kas, (caddr_t)addr, attr);
   1352 
   1353 		if (!ppl) {
   1354 			vmem_free(heap_arena, addr, asize);
   1355 			page_unresv(pgcnt);
   1356 			return (NULL);
   1357 		}
   1358 
   1359 		while (ppl != NULL) {
   1360 			page_t	*pp = ppl;
   1361 			page_sub(&ppl, pp);
   1362 			ASSERT(page_iolock_assert(pp));
   1363 			page_io_unlock(pp);
   1364 			page_downgrade(pp);
   1365 			hat_memload(kas.a_hat, (caddr_t)(uintptr_t)pp->p_offset,
   1366 			    pp, (PROT_ALL & ~PROT_USER) |
   1367 			    HAT_NOSYNC, HAT_LOAD_LOCK);
   1368 		}
   1369 	}
   1370 	return (addr);
   1371 }
   1372 
   1373 void
   1374 contig_free(void *addr, size_t size)
   1375 {
   1376 	pgcnt_t	pgcnt = btopr(size);
   1377 	size_t	asize = pgcnt * PAGESIZE;
   1378 	caddr_t	a, ea;
   1379 	page_t	*pp;
   1380 
   1381 	hat_unload(kas.a_hat, addr, asize, HAT_UNLOAD_UNLOCK);
   1382 
   1383 	for (a = addr, ea = a + asize; a < ea; a += PAGESIZE) {
   1384 		pp = page_find(&kvp, (u_offset_t)(uintptr_t)a);
   1385 		if (!pp)
   1386 			panic("contig_free: contig pp not found");
   1387 
   1388 		if (!page_tryupgrade(pp)) {
   1389 			page_unlock(pp);
   1390 			pp = page_lookup(&kvp,
   1391 			    (u_offset_t)(uintptr_t)a, SE_EXCL);
   1392 			if (pp == NULL)
   1393 				panic("contig_free: page freed");
   1394 		}
   1395 		page_destroy(pp, 0);
   1396 	}
   1397 
   1398 	page_unresv(pgcnt);
   1399 	vmem_free(heap_arena, addr, asize);
   1400 }
   1401 
   1402 /*
   1403  * Allocate from the system, aligned on a specific boundary.
   1404  * The alignment, if non-zero, must be a power of 2.
   1405  */
   1406 static void *
   1407 kalloca(size_t size, size_t align, int cansleep, int physcontig,
   1408 	ddi_dma_attr_t *attr)
   1409 {
   1410 	size_t *addr, *raddr, rsize;
   1411 	size_t hdrsize = 4 * sizeof (size_t);	/* must be power of 2 */
   1412 	int a, i, c;
   1413 	vmem_t *vmp;
   1414 	kmem_cache_t *cp = NULL;
   1415 
   1416 	if (attr->dma_attr_addr_lo > mmu_ptob((uint64_t)ddiphysmin))
   1417 		return (NULL);
   1418 
   1419 	align = MAX(align, hdrsize);
   1420 	ASSERT((align & (align - 1)) == 0);
   1421 
   1422 	/*
   1423 	 * All of our allocators guarantee 16-byte alignment, so we don't
   1424 	 * need to reserve additional space for the header.
   1425 	 * To simplify picking the correct kmem_io_cache, we round up to
   1426 	 * a multiple of KA_ALIGN.
   1427 	 */
   1428 	rsize = P2ROUNDUP_TYPED(size + align, KA_ALIGN, size_t);
   1429 
   1430 	if (physcontig && rsize > PAGESIZE) {
   1431 		if (addr = contig_alloc(size, attr, align, cansleep)) {
   1432 			if (!putctgas(addr, size))
   1433 				contig_free(addr, size);
   1434 			else
   1435 				return (addr);
   1436 		}
   1437 		return (NULL);
   1438 	}
   1439 
   1440 	a = kmem_io_index(attr->dma_attr_addr_hi);
   1441 
   1442 	if (rsize > PAGESIZE) {
   1443 		vmp = kmem_io[a].kmem_io_arena;
   1444 		raddr = vmem_alloc(vmp, rsize,
   1445 		    (cansleep) ? VM_SLEEP : VM_NOSLEEP);
   1446 	} else {
   1447 		c = highbit((rsize >> KA_ALIGN_SHIFT) - 1);
   1448 		cp = kmem_io[a].kmem_io_cache[c];
   1449 		raddr = kmem_cache_alloc(cp, (cansleep) ? KM_SLEEP :
   1450 		    KM_NOSLEEP);
   1451 	}
   1452 
   1453 	if (raddr == NULL) {
   1454 		int	na;
   1455 
   1456 		ASSERT(cansleep == 0);
   1457 		if (rsize > PAGESIZE)
   1458 			return (NULL);
   1459 		/*
   1460 		 * System does not have memory in the requested range.
   1461 		 * Try smaller kmem io ranges and larger cache sizes
   1462 		 * to see if there might be memory available in
   1463 		 * these other caches.
   1464 		 */
   1465 
   1466 		for (na = kmem_io_index_next(a); na >= 0;
   1467 		    na = kmem_io_index_next(na)) {
   1468 			ASSERT(kmem_io[na].kmem_io_arena);
   1469 			cp = kmem_io[na].kmem_io_cache[c];
   1470 			raddr = kmem_cache_alloc(cp, KM_NOSLEEP);
   1471 			if (raddr)
   1472 				goto kallocdone;
   1473 		}
   1474 		/* now try the larger kmem io cache sizes */
   1475 		for (na = a; na >= 0; na = kmem_io_index_next(na)) {
   1476 			for (i = c + 1; i < KA_NCACHE; i++) {
   1477 				cp = kmem_io[na].kmem_io_cache[i];
   1478 				raddr = kmem_cache_alloc(cp, KM_NOSLEEP);
   1479 				if (raddr)
   1480 					goto kallocdone;
   1481 			}
   1482 		}
   1483 		return (NULL);
   1484 	}
   1485 
   1486 kallocdone:
   1487 	ASSERT(!P2BOUNDARY((uintptr_t)raddr, rsize, PAGESIZE) ||
   1488 	    rsize > PAGESIZE);
   1489 
   1490 	addr = (size_t *)P2ROUNDUP((uintptr_t)raddr + hdrsize, align);
   1491 	ASSERT((uintptr_t)addr + size - (uintptr_t)raddr <= rsize);
   1492 
   1493 	addr[-4] = (size_t)cp;
   1494 	addr[-3] = (size_t)vmp;
   1495 	addr[-2] = (size_t)raddr;
   1496 	addr[-1] = rsize;
   1497 
   1498 	return (addr);
   1499 }
   1500 
   1501 static void
   1502 kfreea(void *addr)
   1503 {
   1504 	size_t		size;
   1505 
   1506 	if (!((uintptr_t)addr & PAGEOFFSET) && (size = getctgsz(addr))) {
   1507 		contig_free(addr, size);
   1508 	} else {
   1509 		size_t	*saddr = addr;
   1510 		if (saddr[-4] == 0)
   1511 			vmem_free((vmem_t *)saddr[-3], (void *)saddr[-2],
   1512 			    saddr[-1]);
   1513 		else
   1514 			kmem_cache_free((kmem_cache_t *)saddr[-4],
   1515 			    (void *)saddr[-2]);
   1516 	}
   1517 }
   1518 
   1519 /*ARGSUSED*/
   1520 void
   1521 i_ddi_devacc_to_hatacc(ddi_device_acc_attr_t *devaccp, uint_t *hataccp)
   1522 {
   1523 }
   1524 
   1525 /*
   1526  * Check if the specified cache attribute is supported on the platform.
   1527  * This function must be called before i_ddi_cacheattr_to_hatacc().
   1528  */
   1529 boolean_t
   1530 i_ddi_check_cache_attr(uint_t flags)
   1531 {
   1532 	/*
   1533 	 * The cache attributes are mutually exclusive. Any combination of
   1534 	 * the attributes leads to a failure.
   1535 	 */
   1536 	uint_t cache_attr = IOMEM_CACHE_ATTR(flags);
   1537 	if ((cache_attr != 0) && ((cache_attr & (cache_attr - 1)) != 0))
   1538 		return (B_FALSE);
   1539 
   1540 	/* All cache attributes are supported on X86/X64 */
   1541 	if (cache_attr & (IOMEM_DATA_UNCACHED | IOMEM_DATA_CACHED |
   1542 	    IOMEM_DATA_UC_WR_COMBINE))
   1543 		return (B_TRUE);
   1544 
   1545 	/* undefined attributes */
   1546 	return (B_FALSE);
   1547 }
   1548 
   1549 /* set HAT cache attributes from the cache attributes */
   1550 void
   1551 i_ddi_cacheattr_to_hatacc(uint_t flags, uint_t *hataccp)
   1552 {
   1553 	uint_t cache_attr = IOMEM_CACHE_ATTR(flags);
   1554 	static char *fname = "i_ddi_cacheattr_to_hatacc";
   1555 
   1556 	/*
   1557 	 * If write-combining is not supported, then it falls back
   1558 	 * to uncacheable.
   1559 	 */
   1560 	if (cache_attr == IOMEM_DATA_UC_WR_COMBINE && !(x86_feature & X86_PAT))
   1561 		cache_attr = IOMEM_DATA_UNCACHED;
   1562 
   1563 	/*
   1564 	 * set HAT attrs according to the cache attrs.
   1565 	 */
   1566 	switch (cache_attr) {
   1567 	case IOMEM_DATA_UNCACHED:
   1568 		*hataccp &= ~HAT_ORDER_MASK;
   1569 		*hataccp |= (HAT_STRICTORDER | HAT_PLAT_NOCACHE);
   1570 		break;
   1571 	case IOMEM_DATA_UC_WR_COMBINE:
   1572 		*hataccp &= ~HAT_ORDER_MASK;
   1573 		*hataccp |= (HAT_MERGING_OK | HAT_PLAT_NOCACHE);
   1574 		break;
   1575 	case IOMEM_DATA_CACHED:
   1576 		*hataccp &= ~HAT_ORDER_MASK;
   1577 		*hataccp |= HAT_UNORDERED_OK;
   1578 		break;
   1579 	/*
   1580 	 * This case must not occur because the cache attribute is scrutinized
   1581 	 * before this function is called.
   1582 	 */
   1583 	default:
   1584 		/*
   1585 		 * set cacheable to hat attrs.
   1586 		 */
   1587 		*hataccp &= ~HAT_ORDER_MASK;
   1588 		*hataccp |= HAT_UNORDERED_OK;
   1589 		cmn_err(CE_WARN, "%s: cache_attr=0x%x is ignored.",
   1590 		    fname, cache_attr);
   1591 	}
   1592 }
   1593 
   1594 /*
   1595  * This should actually be called i_ddi_dma_mem_alloc. There should
   1596  * also be an i_ddi_pio_mem_alloc. i_ddi_dma_mem_alloc should call
   1597  * through the device tree with the DDI_CTLOPS_DMA_ALIGN ctl ops to
   1598  * get alignment requirements for DMA memory. i_ddi_pio_mem_alloc
   1599  * should use DDI_CTLOPS_PIO_ALIGN. Since we only have i_ddi_mem_alloc
   1600  * so far which is used for both, DMA and PIO, we have to use the DMA
   1601  * ctl ops to make everybody happy.
   1602  */
   1603 /*ARGSUSED*/
   1604 int
   1605 i_ddi_mem_alloc(dev_info_t *dip, ddi_dma_attr_t *attr,
   1606 	size_t length, int cansleep, int flags,
   1607 	ddi_device_acc_attr_t *accattrp, caddr_t *kaddrp,
   1608 	size_t *real_length, ddi_acc_hdl_t *ap)
   1609 {
   1610 	caddr_t a;
   1611 	int iomin;
   1612 	ddi_acc_impl_t *iap;
   1613 	int physcontig = 0;
   1614 	pgcnt_t npages;
   1615 	pgcnt_t minctg;
   1616 	uint_t order;
   1617 	int e;
   1618 
   1619 	/*
   1620 	 * Check legality of arguments
   1621 	 */
   1622 	if (length == 0 || kaddrp == NULL || attr == NULL) {
   1623 		return (DDI_FAILURE);
   1624 	}
   1625 
   1626 	if (attr->dma_attr_minxfer == 0 || attr->dma_attr_align == 0 ||
   1627 	    (attr->dma_attr_align & (attr->dma_attr_align - 1)) ||
   1628 	    (attr->dma_attr_minxfer & (attr->dma_attr_minxfer - 1))) {
   1629 			return (DDI_FAILURE);
   1630 	}
   1631 
   1632 	/*
   1633 	 * figure out most restrictive alignment requirement
   1634 	 */
   1635 	iomin = attr->dma_attr_minxfer;
   1636 	iomin = maxbit(iomin, attr->dma_attr_align);
   1637 	if (iomin == 0)
   1638 		return (DDI_FAILURE);
   1639 
   1640 	ASSERT((iomin & (iomin - 1)) == 0);
   1641 
   1642 	/*
   1643 	 * if we allocate memory with IOMEM_DATA_UNCACHED or
   1644 	 * IOMEM_DATA_UC_WR_COMBINE, make sure we allocate a page aligned
   1645 	 * memory that ends on a page boundry.
   1646 	 * Don't want to have to different cache mappings to the same
   1647 	 * physical page.
   1648 	 */
   1649 	if (OVERRIDE_CACHE_ATTR(flags)) {
   1650 		iomin = (iomin + MMU_PAGEOFFSET) & MMU_PAGEMASK;
   1651 		length = (length + MMU_PAGEOFFSET) & (size_t)MMU_PAGEMASK;
   1652 	}
   1653 
   1654 	/*
   1655 	 * Determine if we need to satisfy the request for physically
   1656 	 * contiguous memory or alignments larger than pagesize.
   1657 	 */
   1658 	npages = btopr(length + attr->dma_attr_align);
   1659 	minctg = howmany(npages, attr->dma_attr_sgllen);
   1660 
   1661 	if (minctg > 1) {
   1662 		uint64_t pfnseg = attr->dma_attr_seg >> PAGESHIFT;
   1663 		/*
   1664 		 * verify that the minimum contig requirement for the
   1665 		 * actual length does not cross segment boundary.
   1666 		 */
   1667 		length = P2ROUNDUP_TYPED(length, attr->dma_attr_minxfer,
   1668 		    size_t);
   1669 		npages = btopr(length);
   1670 		minctg = howmany(npages, attr->dma_attr_sgllen);
   1671 		if (minctg > pfnseg + 1)
   1672 			return (DDI_FAILURE);
   1673 		physcontig = 1;
   1674 	} else {
   1675 		length = P2ROUNDUP_TYPED(length, iomin, size_t);
   1676 	}
   1677 
   1678 	/*
   1679 	 * Allocate the requested amount from the system.
   1680 	 */
   1681 	a = kalloca(length, iomin, cansleep, physcontig, attr);
   1682 
   1683 	if ((*kaddrp = a) == NULL)
   1684 		return (DDI_FAILURE);
   1685 
   1686 	/*
   1687 	 * if we to modify the cache attributes, go back and muck with the
   1688 	 * mappings.
   1689 	 */
   1690 	if (OVERRIDE_CACHE_ATTR(flags)) {
   1691 		order = 0;
   1692 		i_ddi_cacheattr_to_hatacc(flags, &order);
   1693 		e = kmem_override_cache_attrs(a, length, order);
   1694 		if (e != 0) {
   1695 			kfreea(a);
   1696 			return (DDI_FAILURE);
   1697 		}
   1698 	}
   1699 
   1700 	if (real_length) {
   1701 		*real_length = length;
   1702 	}
   1703 	if (ap) {
   1704 		/*
   1705 		 * initialize access handle
   1706 		 */
   1707 		iap = (ddi_acc_impl_t *)ap->ah_platform_private;
   1708 		iap->ahi_acc_attr |= DDI_ACCATTR_CPU_VADDR;
   1709 		impl_acc_hdl_init(ap);
   1710 	}
   1711 
   1712 	return (DDI_SUCCESS);
   1713 }
   1714 
   1715 /*
   1716  * covert old DMA limits structure to DMA attribute structure
   1717  * and continue
   1718  */
   1719 int
   1720 i_ddi_mem_alloc_lim(dev_info_t *dip, ddi_dma_lim_t *limits,
   1721 	size_t length, int cansleep, int streaming,
   1722 	ddi_device_acc_attr_t *accattrp, caddr_t *kaddrp,
   1723 	uint_t *real_length, ddi_acc_hdl_t *ap)
   1724 {
   1725 	ddi_dma_attr_t dma_attr, *attrp;
   1726 	size_t rlen;
   1727 	int ret;
   1728 
   1729 	if (limits == NULL) {
   1730 		return (DDI_FAILURE);
   1731 	}
   1732 
   1733 	/*
   1734 	 * set up DMA attribute structure to pass to i_ddi_mem_alloc()
   1735 	 */
   1736 	attrp = &dma_attr;
   1737 	attrp->dma_attr_version = DMA_ATTR_V0;
   1738 	attrp->dma_attr_addr_lo = (uint64_t)limits->dlim_addr_lo;
   1739 	attrp->dma_attr_addr_hi = (uint64_t)limits->dlim_addr_hi;
   1740 	attrp->dma_attr_count_max = (uint64_t)limits->dlim_ctreg_max;
   1741 	attrp->dma_attr_align = 1;
   1742 	attrp->dma_attr_burstsizes = (uint_t)limits->dlim_burstsizes;
   1743 	attrp->dma_attr_minxfer = (uint32_t)limits->dlim_minxfer;
   1744 	attrp->dma_attr_maxxfer = (uint64_t)limits->dlim_reqsize;
   1745 	attrp->dma_attr_seg = (uint64_t)limits->dlim_adreg_max;
   1746 	attrp->dma_attr_sgllen = limits->dlim_sgllen;
   1747 	attrp->dma_attr_granular = (uint32_t)limits->dlim_granular;
   1748 	attrp->dma_attr_flags = 0;
   1749 
   1750 	ret = i_ddi_mem_alloc(dip, attrp, length, cansleep, streaming,
   1751 	    accattrp, kaddrp, &rlen, ap);
   1752 	if (ret == DDI_SUCCESS) {
   1753 		if (real_length)
   1754 			*real_length = (uint_t)rlen;
   1755 	}
   1756 	return (ret);
   1757 }
   1758 
   1759 /* ARGSUSED */
   1760 void
   1761 i_ddi_mem_free(caddr_t kaddr, ddi_acc_hdl_t *ap)
   1762 {
   1763 	if (ap != NULL) {
   1764 		/*
   1765 		 * if we modified the cache attributes on alloc, go back and
   1766 		 * fix them since this memory could be returned to the
   1767 		 * general pool.
   1768 		 */
   1769 		if (OVERRIDE_CACHE_ATTR(ap->ah_xfermodes)) {
   1770 			uint_t order = 0;
   1771 			int e;
   1772 			i_ddi_cacheattr_to_hatacc(IOMEM_DATA_CACHED, &order);
   1773 			e = kmem_override_cache_attrs(kaddr, ap->ah_len, order);
   1774 			if (e != 0) {
   1775 				cmn_err(CE_WARN, "i_ddi_mem_free() failed to "
   1776 				    "override cache attrs, memory leaked\n");
   1777 				return;
   1778 			}
   1779 		}
   1780 	}
   1781 	kfreea(kaddr);
   1782 }
   1783 
   1784 /*
   1785  * Access Barriers
   1786  *
   1787  */
   1788 /*ARGSUSED*/
   1789 int
   1790 i_ddi_ontrap(ddi_acc_handle_t hp)
   1791 {
   1792 	return (DDI_FAILURE);
   1793 }
   1794 
   1795 /*ARGSUSED*/
   1796 void
   1797 i_ddi_notrap(ddi_acc_handle_t hp)
   1798 {
   1799 }
   1800 
   1801 
   1802 /*
   1803  * Misc Functions
   1804  */
   1805 
   1806 /*
   1807  * Implementation instance override functions
   1808  *
   1809  * No override on i86pc
   1810  */
   1811 /*ARGSUSED*/
   1812 uint_t
   1813 impl_assign_instance(dev_info_t *dip)
   1814 {
   1815 	return ((uint_t)-1);
   1816 }
   1817 
   1818 /*ARGSUSED*/
   1819 int
   1820 impl_keep_instance(dev_info_t *dip)
   1821 {
   1822 
   1823 #if defined(__xpv)
   1824 	/*
   1825 	 * Do not persist instance numbers assigned to devices in dom0
   1826 	 */
   1827 	dev_info_t *pdip;
   1828 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
   1829 		if (((pdip = ddi_get_parent(dip)) != NULL) &&
   1830 		    (strcmp(ddi_get_name(pdip), "xpvd") == 0))
   1831 			return (DDI_SUCCESS);
   1832 	}
   1833 #endif
   1834 	return (DDI_FAILURE);
   1835 }
   1836 
   1837 /*ARGSUSED*/
   1838 int
   1839 impl_free_instance(dev_info_t *dip)
   1840 {
   1841 	return (DDI_FAILURE);
   1842 }
   1843 
   1844 /*ARGSUSED*/
   1845 int
   1846 impl_check_cpu(dev_info_t *devi)
   1847 {
   1848 	return (DDI_SUCCESS);
   1849 }
   1850 
   1851 /*
   1852  * Referenced in common/cpr_driver.c: Power off machine.
   1853  * Don't know how to power off i86pc.
   1854  */
   1855 void
   1856 arch_power_down()
   1857 {}
   1858 
   1859 /*
   1860  * Copy name to property_name, since name
   1861  * is in the low address range below kernelbase.
   1862  */
   1863 static void
   1864 copy_boot_str(const char *boot_str, char *kern_str, int len)
   1865 {
   1866 	int i = 0;
   1867 
   1868 	while (i < len - 1 && boot_str[i] != '\0') {
   1869 		kern_str[i] = boot_str[i];
   1870 		i++;
   1871 	}
   1872 
   1873 	kern_str[i] = 0;	/* null terminate */
   1874 	if (boot_str[i] != '\0')
   1875 		cmn_err(CE_WARN,
   1876 		    "boot property string is truncated to %s", kern_str);
   1877 }
   1878 
   1879 static void
   1880 get_boot_properties(void)
   1881 {
   1882 	extern char hw_provider[];
   1883 	dev_info_t *devi;
   1884 	char *name;
   1885 	int length;
   1886 	char property_name[50], property_val[50];
   1887 	void *bop_staging_area;
   1888 
   1889 	bop_staging_area = kmem_zalloc(MMU_PAGESIZE, KM_NOSLEEP);
   1890 
   1891 	/*
   1892 	 * Import "root" properties from the boot.
   1893 	 *
   1894 	 * We do this by invoking BOP_NEXTPROP until the list
   1895 	 * is completely copied in.
   1896 	 */
   1897 
   1898 	devi = ddi_root_node();
   1899 	for (name = BOP_NEXTPROP(bootops, "");		/* get first */
   1900 	    name;					/* NULL => DONE */
   1901 	    name = BOP_NEXTPROP(bootops, name)) {	/* get next */
   1902 
   1903 		/* copy string to memory above kernelbase */
   1904 		copy_boot_str(name, property_name, 50);
   1905 
   1906 		/*
   1907 		 * Skip vga properties. They will be picked up later
   1908 		 * by get_vga_properties.
   1909 		 */
   1910 		if (strcmp(property_name, "display-edif-block") == 0 ||
   1911 		    strcmp(property_name, "display-edif-id") == 0) {
   1912 			continue;
   1913 		}
   1914 
   1915 		length = BOP_GETPROPLEN(bootops, property_name);
   1916 		if (length == 0)
   1917 			continue;
   1918 		if (length > MMU_PAGESIZE) {
   1919 			cmn_err(CE_NOTE,
   1920 			    "boot property %s longer than 0x%x, ignored\n",
   1921 			    property_name, MMU_PAGESIZE);
   1922 			continue;
   1923 		}
   1924 		BOP_GETPROP(bootops, property_name, bop_staging_area);
   1925 
   1926 		/*
   1927 		 * special properties:
   1928 		 * si-machine, si-hw-provider
   1929 		 *	goes to kernel data structures.
   1930 		 * bios-boot-device and stdout
   1931 		 *	goes to hardware property list so it may show up
   1932 		 *	in the prtconf -vp output. This is needed by
   1933 		 *	Install/Upgrade. Once we fix install upgrade,
   1934 		 *	this can be taken out.
   1935 		 */
   1936 		if (strcmp(name, "si-machine") == 0) {
   1937 			(void) strncpy(utsname.machine, bop_staging_area,
   1938 			    SYS_NMLN);
   1939 			utsname.machine[SYS_NMLN - 1] = (char)NULL;
   1940 		} else if (strcmp(name, "si-hw-provider") == 0) {
   1941 			(void) strncpy(hw_provider, bop_staging_area, SYS_NMLN);
   1942 			hw_provider[SYS_NMLN - 1] = (char)NULL;
   1943 		} else if (strcmp(name, "bios-boot-device") == 0) {
   1944 			copy_boot_str(bop_staging_area, property_val, 50);
   1945 			(void) ndi_prop_update_string(DDI_DEV_T_NONE, devi,
   1946 			    property_name, property_val);
   1947 		} else if (strcmp(name, "stdout") == 0) {
   1948 			(void) ndi_prop_update_int(DDI_DEV_T_NONE, devi,
   1949 			    property_name, *((int *)bop_staging_area));
   1950 		} else {
   1951 			/* Property type unknown, use old prop interface */
   1952 			(void) e_ddi_prop_create(DDI_DEV_T_NONE, devi,
   1953 			    DDI_PROP_CANSLEEP, property_name, bop_staging_area,
   1954 			    length);
   1955 		}
   1956 	}
   1957 
   1958 	kmem_free(bop_staging_area, MMU_PAGESIZE);
   1959 }
   1960 
   1961 static void
   1962 get_vga_properties(void)
   1963 {
   1964 	dev_info_t *devi;
   1965 	major_t major;
   1966 	char *name;
   1967 	int length;
   1968 	char property_val[50];
   1969 	void *bop_staging_area;
   1970 
   1971 	/*
   1972 	 * XXXX Hack Allert!
   1973 	 * There really needs to be a better way for identifying various
   1974 	 * console framebuffers and their related issues.  Till then,
   1975 	 * check for this one as a replacement to vgatext.
   1976 	 */
   1977 	major = ddi_name_to_major("ragexl");
   1978 	if (major == (major_t)-1) {
   1979 		major = ddi_name_to_major("vgatext");
   1980 		if (major == (major_t)-1)
   1981 			return;
   1982 	}
   1983 	devi = devnamesp[major].dn_head;
   1984 	if (devi == NULL)
   1985 		return;
   1986 
   1987 	bop_staging_area = kmem_zalloc(MMU_PAGESIZE, KM_SLEEP);
   1988 
   1989 	/*
   1990 	 * Import "vga" properties from the boot.
   1991 	 */
   1992 	name = "display-edif-block";
   1993 	length = BOP_GETPROPLEN(bootops, name);
   1994 	if (length > 0 && length < MMU_PAGESIZE) {
   1995 		BOP_GETPROP(bootops, name, bop_staging_area);
   1996 		(void) ndi_prop_update_byte_array(DDI_DEV_T_NONE,
   1997 		    devi, name, bop_staging_area, length);
   1998 	}
   1999 
   2000 	/*
   2001 	 * kdmconfig is also looking for display-type and
   2002 	 * video-adapter-type. We default to color and svga.
   2003 	 *
   2004 	 * Could it be "monochrome", "vga"?
   2005 	 * Nah, you've got to come to the 21st century...
   2006 	 * And you can set monitor type manually in kdmconfig
   2007 	 * if you are really an old junky.
   2008 	 */
   2009 	(void) ndi_prop_update_string(DDI_DEV_T_NONE,
   2010 	    devi, "display-type", "color");
   2011 	(void) ndi_prop_update_string(DDI_DEV_T_NONE,
   2012 	    devi, "video-adapter-type", "svga");
   2013 
   2014 	name = "display-edif-id";
   2015 	length = BOP_GETPROPLEN(bootops, name);
   2016 	if (length > 0 && length < MMU_PAGESIZE) {
   2017 		BOP_GETPROP(bootops, name, bop_staging_area);
   2018 		copy_boot_str(bop_staging_area, property_val, length);
   2019 		(void) ndi_prop_update_string(DDI_DEV_T_NONE,
   2020 		    devi, name, property_val);
   2021 	}
   2022 
   2023 	kmem_free(bop_staging_area, MMU_PAGESIZE);
   2024 }
   2025 
   2026 
   2027 /*
   2028  * This is temporary, but absolutely necessary.  If we are being
   2029  * booted with a device tree created by the DevConf project's bootconf
   2030  * program, then we have device information nodes that reflect
   2031  * reality.  At this point in time in the Solaris release schedule, the
   2032  * kernel drivers aren't prepared for reality.  They still depend on their
   2033  * own ad-hoc interpretations of the properties created when their .conf
   2034  * files were interpreted. These drivers use an "ignore-hardware-nodes"
   2035  * property to prevent them from using the nodes passed up from the bootconf
   2036  * device tree.
   2037  *
   2038  * Trying to assemble root file system drivers as we are booting from
   2039  * devconf will fail if the kernel driver is basing its name_addr's on the
   2040  * psuedo-node device info while the bootpath passed up from bootconf is using
   2041  * reality-based name_addrs.  We help the boot along in this case by
   2042  * looking at the pre-bootconf bootpath and determining if we would have
   2043  * successfully matched if that had been the bootpath we had chosen.
   2044  *
   2045  * Note that we only even perform this extra check if we've booted
   2046  * using bootconf's 1275 compliant bootpath, this is the boot device, and
   2047  * we're trying to match the name_addr specified in the 1275 bootpath.
   2048  */
   2049 
   2050 #define	MAXCOMPONENTLEN	32
   2051 
   2052 int
   2053 x86_old_bootpath_name_addr_match(dev_info_t *cdip, char *caddr, char *naddr)
   2054 {
   2055 	/*
   2056 	 *  There are multiple criteria to be met before we can even
   2057 	 *  consider allowing a name_addr match here.
   2058 	 *
   2059 	 *  1) We must have been booted such that the bootconf program
   2060 	 *	created device tree nodes and properties.  This can be
   2061 	 *	determined by examining the 'bootpath' property.  This
   2062 	 *	property will be a non-null string iff bootconf was
   2063 	 *	involved in the boot.
   2064 	 *
   2065 	 *  2) The module that we want to match must be the boot device.
   2066 	 *
   2067 	 *  3) The instance of the module we are thinking of letting be
   2068 	 *	our match must be ignoring hardware nodes.
   2069 	 *
   2070 	 *  4) The name_addr we want to match must be the name_addr
   2071 	 *	specified in the 1275 bootpath.
   2072 	 */
   2073 	static char bootdev_module[MAXCOMPONENTLEN];
   2074 	static char bootdev_oldmod[MAXCOMPONENTLEN];
   2075 	static char bootdev_newaddr[MAXCOMPONENTLEN];
   2076 	static char bootdev_oldaddr[MAXCOMPONENTLEN];
   2077 	static int  quickexit;
   2078 
   2079 	char *daddr;
   2080 	int dlen;
   2081 
   2082 	char	*lkupname;
   2083 	int	rv = DDI_FAILURE;
   2084 
   2085 	if ((ddi_getlongprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
   2086 	    "devconf-addr", (caddr_t)&daddr, &dlen) == DDI_PROP_SUCCESS) &&
   2087 	    (ddi_getprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
   2088 	    "ignore-hardware-nodes", -1) != -1)) {
   2089 		if (strcmp(daddr, caddr) == 0) {
   2090 			return (DDI_SUCCESS);
   2091 		}
   2092 	}
   2093 
   2094 	if (quickexit)
   2095 		return (rv);
   2096 
   2097 	if (bootdev_module[0] == '\0') {
   2098 		char *addrp, *eoaddrp;
   2099 		char *busp, *modp, *atp;
   2100 		char *bp1275, *bp;
   2101 		int  bp1275len, bplen;
   2102 
   2103 		bp1275 = bp = addrp = eoaddrp = busp = modp = atp = NULL;
   2104 
   2105 		if (ddi_getlongprop(DDI_DEV_T_ANY,
   2106 		    ddi_root_node(), 0, "bootpath",
   2107 		    (caddr_t)&bp1275, &bp1275len) != DDI_PROP_SUCCESS ||
   2108 		    bp1275len <= 1) {
   2109 			/*
   2110 			 * We didn't boot from bootconf so we never need to
   2111 			 * do any special matches.
   2112 			 */
   2113 			quickexit = 1;
   2114 			if (bp1275)
   2115 				kmem_free(bp1275, bp1275len);
   2116 			return (rv);
   2117 		}
   2118 
   2119 		if (ddi_getlongprop(DDI_DEV_T_ANY,
   2120 		    ddi_root_node(), 0, "boot-path",
   2121 		    (caddr_t)&bp, &bplen) != DDI_PROP_SUCCESS || bplen <= 1) {
   2122 			/*
   2123 			 * No fallback position for matching. This is
   2124 			 * certainly unexpected, but we'll handle it
   2125 			 * just in case.
   2126 			 */
   2127 			quickexit = 1;
   2128 			kmem_free(bp1275, bp1275len);
   2129 			if (bp)
   2130 				kmem_free(bp, bplen);
   2131 			return (rv);
   2132 		}
   2133 
   2134 		/*
   2135 		 *  Determine boot device module and 1275 name_addr
   2136 		 *
   2137 		 *  bootpath assumed to be of the form /bus/module@name_addr
   2138 		 */
   2139 		if (busp = strchr(bp1275, '/')) {
   2140 			if (modp = strchr(busp + 1, '/')) {
   2141 				if (atp = strchr(modp + 1, '@')) {
   2142 					*atp = '\0';
   2143 					addrp = atp + 1;
   2144 					if (eoaddrp = strchr(addrp, '/'))
   2145 						*eoaddrp = '\0';
   2146 				}
   2147 			}
   2148 		}
   2149 
   2150 		if (modp && addrp) {
   2151 			(void) strncpy(bootdev_module, modp + 1,
   2152 			    MAXCOMPONENTLEN);
   2153 			bootdev_module[MAXCOMPONENTLEN - 1] = '\0';
   2154 
   2155 			(void) strncpy(bootdev_newaddr, addrp, MAXCOMPONENTLEN);
   2156 			bootdev_newaddr[MAXCOMPONENTLEN - 1] = '\0';
   2157 		} else {
   2158 			quickexit = 1;
   2159 			kmem_free(bp1275, bp1275len);
   2160 			kmem_free(bp, bplen);
   2161 			return (rv);
   2162 		}
   2163 
   2164 		/*
   2165 		 *  Determine fallback name_addr
   2166 		 *
   2167 		 *  10/3/96 - Also save fallback module name because it
   2168 		 *  might actually be different than the current module
   2169 		 *  name.  E.G., ISA pnp drivers have new names.
   2170 		 *
   2171 		 *  bootpath assumed to be of the form /bus/module@name_addr
   2172 		 */
   2173 		addrp = NULL;
   2174 		if (busp = strchr(bp, '/')) {
   2175 			if (modp = strchr(busp + 1, '/')) {
   2176 				if (atp = strchr(modp + 1, '@')) {
   2177 					*atp = '\0';
   2178 					addrp = atp + 1;
   2179 					if (eoaddrp = strchr(addrp, '/'))
   2180 						*eoaddrp = '\0';
   2181 				}
   2182 			}
   2183 		}
   2184 
   2185 		if (modp && addrp) {
   2186 			(void) strncpy(bootdev_oldmod, modp + 1,
   2187 			    MAXCOMPONENTLEN);
   2188 			bootdev_module[MAXCOMPONENTLEN - 1] = '\0';
   2189 
   2190 			(void) strncpy(bootdev_oldaddr, addrp, MAXCOMPONENTLEN);
   2191 			bootdev_oldaddr[MAXCOMPONENTLEN - 1] = '\0';
   2192 		}
   2193 
   2194 		/* Free up the bootpath storage now that we're done with it. */
   2195 		kmem_free(bp1275, bp1275len);
   2196 		kmem_free(bp, bplen);
   2197 
   2198 		if (bootdev_oldaddr[0] == '\0') {
   2199 			quickexit = 1;
   2200 			return (rv);
   2201 		}
   2202 	}
   2203 
   2204 	if (((lkupname = ddi_get_name(cdip)) != NULL) &&
   2205 	    (strcmp(bootdev_module, lkupname) == 0 ||
   2206 	    strcmp(bootdev_oldmod, lkupname) == 0) &&
   2207 	    ((ddi_getprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
   2208 	    "ignore-hardware-nodes", -1) != -1) ||
   2209 	    ignore_hardware_nodes) &&
   2210 	    strcmp(bootdev_newaddr, caddr) == 0 &&
   2211 	    strcmp(bootdev_oldaddr, naddr) == 0) {
   2212 		rv = DDI_SUCCESS;
   2213 	}
   2214 
   2215 	return (rv);
   2216 }
   2217 
   2218 /*
   2219  * Perform a copy from a memory mapped device (whose devinfo pointer is devi)
   2220  * separately mapped at devaddr in the kernel to a kernel buffer at kaddr.
   2221  */
   2222 /*ARGSUSED*/
   2223 int
   2224 e_ddi_copyfromdev(dev_info_t *devi,
   2225     off_t off, const void *devaddr, void *kaddr, size_t len)
   2226 {
   2227 	bcopy(devaddr, kaddr, len);
   2228 	return (0);
   2229 }
   2230 
   2231 /*
   2232  * Perform a copy to a memory mapped device (whose devinfo pointer is devi)
   2233  * separately mapped at devaddr in the kernel from a kernel buffer at kaddr.
   2234  */
   2235 /*ARGSUSED*/
   2236 int
   2237 e_ddi_copytodev(dev_info_t *devi,
   2238     off_t off, const void *kaddr, void *devaddr, size_t len)
   2239 {
   2240 	bcopy(kaddr, devaddr, len);
   2241 	return (0);
   2242 }
   2243 
   2244 
   2245 static int
   2246 poke_mem(peekpoke_ctlops_t *in_args)
   2247 {
   2248 	int err = DDI_SUCCESS;
   2249 	on_trap_data_t otd;
   2250 
   2251 	/* Set up protected environment. */
   2252 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
   2253 		switch (in_args->size) {
   2254 		case sizeof (uint8_t):
   2255 			*(uint8_t *)(in_args->dev_addr) =
   2256 			    *(uint8_t *)in_args->host_addr;
   2257 			break;
   2258 
   2259 		case sizeof (uint16_t):
   2260 			*(uint16_t *)(in_args->dev_addr) =
   2261 			    *(uint16_t *)in_args->host_addr;
   2262 			break;
   2263 
   2264 		case sizeof (uint32_t):
   2265 			*(uint32_t *)(in_args->dev_addr) =
   2266 			    *(uint32_t *)in_args->host_addr;
   2267 			break;
   2268 
   2269 		case sizeof (uint64_t):
   2270 			*(uint64_t *)(in_args->dev_addr) =
   2271 			    *(uint64_t *)in_args->host_addr;
   2272 			break;
   2273 
   2274 		default:
   2275 			err = DDI_FAILURE;
   2276 			break;
   2277 		}
   2278 	} else
   2279 		err = DDI_FAILURE;
   2280 
   2281 	/* Take down protected environment. */
   2282 	no_trap();
   2283 
   2284 	return (err);
   2285 }
   2286 
   2287 
   2288 static int
   2289 peek_mem(peekpoke_ctlops_t *in_args)
   2290 {
   2291 	int err = DDI_SUCCESS;
   2292 	on_trap_data_t otd;
   2293 
   2294 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
   2295 		switch (in_args->size) {
   2296 		case sizeof (uint8_t):
   2297 			*(uint8_t *)in_args->host_addr =
   2298 			    *(uint8_t *)in_args->dev_addr;
   2299 			break;
   2300 
   2301 		case sizeof (uint16_t):
   2302 			*(uint16_t *)in_args->host_addr =
   2303 			    *(uint16_t *)in_args->dev_addr;
   2304 			break;
   2305 
   2306 		case sizeof (uint32_t):
   2307 			*(uint32_t *)in_args->host_addr =
   2308 			    *(uint32_t *)in_args->dev_addr;
   2309 			break;
   2310 
   2311 		case sizeof (uint64_t):
   2312 			*(uint64_t *)in_args->host_addr =
   2313 			    *(uint64_t *)in_args->dev_addr;
   2314 			break;
   2315 
   2316 		default:
   2317 			err = DDI_FAILURE;
   2318 			break;
   2319 		}
   2320 	} else
   2321 		err = DDI_FAILURE;
   2322 
   2323 	no_trap();
   2324 	return (err);
   2325 }
   2326 
   2327 
   2328 /*
   2329  * This is called only to process peek/poke when the DIP is NULL.
   2330  * Assume that this is for memory, as nexi take care of device safe accesses.
   2331  */
   2332 int
   2333 peekpoke_mem(ddi_ctl_enum_t cmd, peekpoke_ctlops_t *in_args)
   2334 {
   2335 	return (cmd == DDI_CTLOPS_PEEK ? peek_mem(in_args) : poke_mem(in_args));
   2336 }
   2337 
   2338 /*
   2339  * we've just done a cautious put/get. Check if it was successful by
   2340  * calling pci_ereport_post() on all puts and for any gets that return -1
   2341  */
   2342 static int
   2343 pci_peekpoke_check_fma(dev_info_t *dip, void *arg, ddi_ctl_enum_t ctlop,
   2344     void (*scan)(dev_info_t *, ddi_fm_error_t *))
   2345 {
   2346 	int	rval = DDI_SUCCESS;
   2347 	peekpoke_ctlops_t *in_args = (peekpoke_ctlops_t *)arg;
   2348 	ddi_fm_error_t de;
   2349 	ddi_acc_impl_t *hp = (ddi_acc_impl_t *)in_args->handle;
   2350 	ddi_acc_hdl_t *hdlp = (ddi_acc_hdl_t *)in_args->handle;
   2351 	int check_err = 0;
   2352 	int repcount = in_args->repcount;
   2353 
   2354 	if (ctlop == DDI_CTLOPS_POKE &&
   2355 	    hdlp->ah_acc.devacc_attr_access != DDI_CAUTIOUS_ACC)
   2356 		return (DDI_SUCCESS);
   2357 
   2358 	if (ctlop == DDI_CTLOPS_PEEK &&
   2359 	    hdlp->ah_acc.devacc_attr_access != DDI_CAUTIOUS_ACC) {
   2360 		for (; repcount; repcount--) {
   2361 			switch (in_args->size) {
   2362 			case sizeof (uint8_t):
   2363 				if (*(uint8_t *)in_args->host_addr == 0xff)
   2364 					check_err = 1;
   2365 				break;
   2366 			case sizeof (uint16_t):
   2367 				if (*(uint16_t *)in_args->host_addr == 0xffff)
   2368 					check_err = 1;
   2369 				break;
   2370 			case sizeof (uint32_t):
   2371 				if (*(uint32_t *)in_args->host_addr ==
   2372 				    0xffffffff)
   2373 					check_err = 1;
   2374 				break;
   2375 			case sizeof (uint64_t):
   2376 				if (*(uint64_t *)in_args->host_addr ==
   2377 				    0xffffffffffffffff)
   2378 					check_err = 1;
   2379 				break;
   2380 			}
   2381 		}
   2382 		if (check_err == 0)
   2383 			return (DDI_SUCCESS);
   2384 	}
   2385 	/*
   2386 	 * for a cautious put or get or a non-cautious get that returned -1 call
   2387 	 * io framework to see if there really was an error
   2388 	 */
   2389 	bzero(&de, sizeof (ddi_fm_error_t));
   2390 	de.fme_version = DDI_FME_VERSION;
   2391 	de.fme_ena = fm_ena_generate(0, FM_ENA_FMT1);
   2392 	if (hdlp->ah_acc.devacc_attr_access == DDI_CAUTIOUS_ACC) {
   2393 		de.fme_flag = DDI_FM_ERR_EXPECTED;
   2394 		de.fme_acc_handle = in_args->handle;
   2395 	} else if (hdlp->ah_acc.devacc_attr_access == DDI_DEFAULT_ACC) {
   2396 		/*
   2397 		 * We only get here with DDI_DEFAULT_ACC for config space gets.
   2398 		 * Non-hardened drivers may be probing the hardware and
   2399 		 * expecting -1 returned. So need to treat errors on
   2400 		 * DDI_DEFAULT_ACC as DDI_FM_ERR_EXPECTED.
   2401 		 */
   2402 		de.fme_flag = DDI_FM_ERR_EXPECTED;
   2403 		de.fme_acc_handle = in_args->handle;
   2404 	} else {
   2405 		/*
   2406 		 * Hardened driver doing protected accesses shouldn't
   2407 		 * get errors unless there's a hardware problem. Treat
   2408 		 * as nonfatal if there's an error, but set UNEXPECTED
   2409 		 * so we raise ereports on any errors and potentially
   2410 		 * fault the device
   2411 		 */
   2412 		de.fme_flag = DDI_FM_ERR_UNEXPECTED;
   2413 	}
   2414 	(void) scan(dip, &de);
   2415 	if (hdlp->ah_acc.devacc_attr_access != DDI_DEFAULT_ACC &&
   2416 	    de.fme_status != DDI_FM_OK) {
   2417 		ndi_err_t *errp = (ndi_err_t *)hp->ahi_err;
   2418 		rval = DDI_FAILURE;
   2419 		errp->err_ena = de.fme_ena;
   2420 		errp->err_expected = de.fme_flag;
   2421 		errp->err_status = DDI_FM_NONFATAL;
   2422 	}
   2423 	return (rval);
   2424 }
   2425 
   2426 /*
   2427  * pci_peekpoke_check_nofma() is for when an error occurs on a register access
   2428  * during pci_ereport_post(). We can't call pci_ereport_post() again or we'd
   2429  * recurse, so assume all puts are OK and gets have failed if they return -1
   2430  */
   2431 static int
   2432 pci_peekpoke_check_nofma(void *arg, ddi_ctl_enum_t ctlop)
   2433 {
   2434 	int rval = DDI_SUCCESS;
   2435 	peekpoke_ctlops_t *in_args = (peekpoke_ctlops_t *)arg;
   2436 	ddi_acc_impl_t *hp = (ddi_acc_impl_t *)in_args->handle;
   2437 	ddi_acc_hdl_t *hdlp = (ddi_acc_hdl_t *)in_args->handle;
   2438 	int repcount = in_args->repcount;
   2439 
   2440 	if (ctlop == DDI_CTLOPS_POKE)
   2441 		return (rval);
   2442 
   2443 	for (; repcount; repcount--) {
   2444 		switch (in_args->size) {
   2445 		case sizeof (uint8_t):
   2446 			if (*(uint8_t *)in_args->host_addr == 0xff)
   2447 				rval = DDI_FAILURE;
   2448 			break;
   2449 		case sizeof (uint16_t):
   2450 			if (*(uint16_t *)in_args->host_addr == 0xffff)
   2451 				rval = DDI_FAILURE;
   2452 			break;
   2453 		case sizeof (uint32_t):
   2454 			if (*(uint32_t *)in_args->host_addr == 0xffffffff)
   2455 				rval = DDI_FAILURE;
   2456 			break;
   2457 		case sizeof (uint64_t):
   2458 			if (*(uint64_t *)in_args->host_addr ==
   2459 			    0xffffffffffffffff)
   2460 				rval = DDI_FAILURE;
   2461 			break;
   2462 		}
   2463 	}
   2464 	if (hdlp->ah_acc.devacc_attr_access != DDI_DEFAULT_ACC &&
   2465 	    rval == DDI_FAILURE) {
   2466 		ndi_err_t *errp = (ndi_err_t *)hp->ahi_err;
   2467 		errp->err_ena = fm_ena_generate(0, FM_ENA_FMT1);
   2468 		errp->err_expected = DDI_FM_ERR_UNEXPECTED;
   2469 		errp->err_status = DDI_FM_NONFATAL;
   2470 	}
   2471 	return (rval);
   2472 }
   2473 
   2474 int
   2475 pci_peekpoke_check(dev_info_t *dip, dev_info_t *rdip,
   2476 	ddi_ctl_enum_t ctlop, void *arg, void *result,
   2477 	int (*handler)(dev_info_t *, dev_info_t *, ddi_ctl_enum_t, void *,
   2478 	void *), kmutex_t *err_mutexp, kmutex_t *peek_poke_mutexp,
   2479 	void (*scan)(dev_info_t *, ddi_fm_error_t *))
   2480 {
   2481 	int rval;
   2482 	peekpoke_ctlops_t *in_args = (peekpoke_ctlops_t *)arg;
   2483 	ddi_acc_impl_t *hp = (ddi_acc_impl_t *)in_args->handle;
   2484 
   2485 	/*
   2486 	 * this function only supports cautious accesses, not peeks/pokes
   2487 	 * which don't have a handle
   2488 	 */
   2489 	if (hp == NULL)
   2490 		return (DDI_FAILURE);
   2491 
   2492 	if (hp->ahi_acc_attr & DDI_ACCATTR_CONFIG_SPACE) {
   2493 		if (!mutex_tryenter(err_mutexp)) {
   2494 			/*
   2495 			 * As this may be a recursive call from within
   2496 			 * pci_ereport_post() we can't wait for the mutexes.
   2497 			 * Fortunately we know someone is already calling
   2498 			 * pci_ereport_post() which will handle the error bits
   2499 			 * for us, and as this is a config space access we can
   2500 			 * just do the access and check return value for -1
   2501 			 * using pci_peekpoke_check_nofma().
   2502 			 */
   2503 			rval = handler(dip, rdip, ctlop, arg, result);
   2504 			if (rval == DDI_SUCCESS)
   2505 				rval = pci_peekpoke_check_nofma(arg, ctlop);
   2506 			return (rval);
   2507 		}
   2508 		/*
   2509 		 * This can't be a recursive call. Drop the err_mutex and get
   2510 		 * both mutexes in the right order. If an error hasn't already
   2511 		 * been detected by the ontrap code, use pci_peekpoke_check_fma
   2512 		 * which will call pci_ereport_post() to check error status.
   2513 		 */
   2514 		mutex_exit(err_mutexp);
   2515 	}
   2516 	mutex_enter(peek_poke_mutexp);
   2517 	rval = handler(dip, rdip, ctlop, arg, result);
   2518 	if (rval == DDI_SUCCESS) {
   2519 		mutex_enter(err_mutexp);
   2520 		rval = pci_peekpoke_check_fma(dip, arg, ctlop, scan);
   2521 		mutex_exit(err_mutexp);
   2522 	}
   2523 	mutex_exit(peek_poke_mutexp);
   2524 	return (rval);
   2525 }
   2526 
   2527 void
   2528 impl_setup_ddi(void)
   2529 {
   2530 #if !defined(__xpv)
   2531 	extern void startup_bios_disk(void);
   2532 	extern int post_fastreboot;
   2533 #endif
   2534 	dev_info_t *xdip, *isa_dip;
   2535 	rd_existing_t rd_mem_prop;
   2536 	int err;
   2537 
   2538 	ndi_devi_alloc_sleep(ddi_root_node(), "ramdisk",
   2539 	    (pnode_t)DEVI_SID_NODEID, &xdip);
   2540 
   2541 	(void) BOP_GETPROP(bootops,
   2542 	    "ramdisk_start", (void *)&ramdisk_start);
   2543 	(void) BOP_GETPROP(bootops,
   2544 	    "ramdisk_end", (void *)&ramdisk_end);
   2545 
   2546 #ifdef __xpv
   2547 	ramdisk_start -= ONE_GIG;
   2548 	ramdisk_end -= ONE_GIG;
   2549 #endif
   2550 	rd_mem_prop.phys = ramdisk_start;
   2551 	rd_mem_prop.size = ramdisk_end - ramdisk_start + 1;
   2552 
   2553 	(void) ndi_prop_update_byte_array(DDI_DEV_T_NONE, xdip,
   2554 	    RD_EXISTING_PROP_NAME, (uchar_t *)&rd_mem_prop,
   2555 	    sizeof (rd_mem_prop));
   2556 	err = ndi_devi_bind_driver(xdip, 0);
   2557 	ASSERT(err == 0);
   2558 
   2559 	/* isa node */
   2560 	if (pseudo_isa) {
   2561 		ndi_devi_alloc_sleep(ddi_root_node(), "isa",
   2562 		    (pnode_t)DEVI_SID_NODEID, &isa_dip);
   2563 		(void) ndi_prop_update_string(DDI_DEV_T_NONE, isa_dip,
   2564 		    "device_type", "isa");
   2565 		(void) ndi_prop_update_string(DDI_DEV_T_NONE, isa_dip,
   2566 		    "bus-type", "isa");
   2567 		(void) ndi_devi_bind_driver(isa_dip, 0);
   2568 	}
   2569 
   2570 	/*
   2571 	 * Read in the properties from the boot.
   2572 	 */
   2573 	get_boot_properties();
   2574 
   2575 	/* not framebuffer should be enumerated, if present */
   2576 	get_vga_properties();
   2577 
   2578 	/*
   2579 	 * Check for administratively disabled drivers.
   2580 	 */
   2581 	check_driver_disable();
   2582 
   2583 #if !defined(__xpv)
   2584 	if (!post_fastreboot)
   2585 		startup_bios_disk();
   2586 #endif
   2587 	/* do bus dependent probes. */
   2588 	impl_bus_initialprobe();
   2589 }
   2590 
   2591 dev_t
   2592 getrootdev(void)
   2593 {
   2594 	/*
   2595 	 * Precedence given to rootdev if set in /etc/system
   2596 	 */
   2597 	if (root_is_svm == B_TRUE) {
   2598 		return (ddi_pathname_to_dev_t(svm_bootpath));
   2599 	}
   2600 
   2601 	/*
   2602 	 * Usually rootfs.bo_name is initialized by the
   2603 	 * the bootpath property from bootenv.rc, but
   2604 	 * defaults to "/ramdisk:a" otherwise.
   2605 	 */
   2606 	return (ddi_pathname_to_dev_t(rootfs.bo_name));
   2607 }
   2608 
   2609 static struct bus_probe {
   2610 	struct bus_probe *next;
   2611 	void (*probe)(int);
   2612 } *bus_probes;
   2613 
   2614 void
   2615 impl_bus_add_probe(void (*func)(int))
   2616 {
   2617 	struct bus_probe *probe;
   2618 	struct bus_probe *lastprobe = NULL;
   2619 
   2620 	probe = kmem_alloc(sizeof (*probe), KM_SLEEP);
   2621 	probe->probe = func;
   2622 	probe->next = NULL;
   2623 
   2624 	if (!bus_probes) {
   2625 		bus_probes = probe;
   2626 		return;
   2627 	}
   2628 
   2629 	lastprobe = bus_probes;
   2630 	while (lastprobe->next)
   2631 		lastprobe = lastprobe->next;
   2632 	lastprobe->next = probe;
   2633 }
   2634 
   2635 /*ARGSUSED*/
   2636 void
   2637 impl_bus_delete_probe(void (*func)(int))
   2638 {
   2639 	struct bus_probe *prev = NULL;
   2640 	struct bus_probe *probe = bus_probes;
   2641 
   2642 	while (probe) {
   2643 		if (probe->probe == func)
   2644 			break;
   2645 		prev = probe;
   2646 		probe = probe->next;
   2647 	}
   2648 
   2649 	if (probe == NULL)
   2650 		return;
   2651 
   2652 	if (prev)
   2653 		prev->next = probe->next;
   2654 	else
   2655 		bus_probes = probe->next;
   2656 
   2657 	kmem_free(probe, sizeof (struct bus_probe));
   2658 }
   2659 
   2660 /*
   2661  * impl_bus_initialprobe
   2662  *	Modload the prom simulator, then let it probe to verify existence
   2663  *	and type of PCI support.
   2664  */
   2665 static void
   2666 impl_bus_initialprobe(void)
   2667 {
   2668 	struct bus_probe *probe;
   2669 
   2670 	/* load modules to install bus probes */
   2671 #if defined(__xpv)
   2672 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
   2673 		if (modload("misc", "pci_autoconfig") < 0) {
   2674 			panic("failed to load misc/pci_autoconfig");
   2675 		}
   2676 
   2677 		if (modload("drv", "isa") < 0)
   2678 			panic("failed to load drv/isa");
   2679 	}
   2680 
   2681 	(void) modload("misc", "xpv_autoconfig");
   2682 #else
   2683 	if (modload("misc", "pci_autoconfig") < 0) {
   2684 		panic("failed to load misc/pci_autoconfig");
   2685 	}
   2686 
   2687 	(void) modload("misc", "acpidev");
   2688 
   2689 	if (modload("drv", "isa") < 0)
   2690 		panic("failed to load drv/isa");
   2691 #endif
   2692 
   2693 	probe = bus_probes;
   2694 	while (probe) {
   2695 		/* run the probe functions */
   2696 		(*probe->probe)(0);
   2697 		probe = probe->next;
   2698 	}
   2699 }
   2700 
   2701 /*
   2702  * impl_bus_reprobe
   2703  *	Reprogram devices not set up by firmware.
   2704  */
   2705 static void
   2706 impl_bus_reprobe(void)
   2707 {
   2708 	struct bus_probe *probe;
   2709 
   2710 	probe = bus_probes;
   2711 	while (probe) {
   2712 		/* run the probe function */
   2713 		(*probe->probe)(1);
   2714 		probe = probe->next;
   2715 	}
   2716 }
   2717 
   2718 
   2719 /*
   2720  * The following functions ready a cautious request to go up to the nexus
   2721  * driver.  It is up to the nexus driver to decide how to process the request.
   2722  * It may choose to call i_ddi_do_caut_get/put in this file, or do it
   2723  * differently.
   2724  */
   2725 
   2726 static void
   2727 i_ddi_caut_getput_ctlops(ddi_acc_impl_t *hp, uint64_t host_addr,
   2728     uint64_t dev_addr, size_t size, size_t repcount, uint_t flags,
   2729     ddi_ctl_enum_t cmd)
   2730 {
   2731 	peekpoke_ctlops_t	cautacc_ctlops_arg;
   2732 
   2733 	cautacc_ctlops_arg.size = size;
   2734 	cautacc_ctlops_arg.dev_addr = dev_addr;
   2735 	cautacc_ctlops_arg.host_addr = host_addr;
   2736 	cautacc_ctlops_arg.handle = (ddi_acc_handle_t)hp;
   2737 	cautacc_ctlops_arg.repcount = repcount;
   2738 	cautacc_ctlops_arg.flags = flags;
   2739 
   2740 	(void) ddi_ctlops(hp->ahi_common.ah_dip, hp->ahi_common.ah_dip, cmd,
   2741 	    &cautacc_ctlops_arg, NULL);
   2742 }
   2743 
   2744 uint8_t
   2745 i_ddi_caut_get8(ddi_acc_impl_t *hp, uint8_t *addr)
   2746 {
   2747 	uint8_t value;
   2748 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
   2749 	    sizeof (uint8_t), 1, 0, DDI_CTLOPS_PEEK);
   2750 
   2751 	return (value);
   2752 }
   2753 
   2754 uint16_t
   2755 i_ddi_caut_get16(ddi_acc_impl_t *hp, uint16_t *addr)
   2756 {
   2757 	uint16_t value;
   2758 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
   2759 	    sizeof (uint16_t), 1, 0, DDI_CTLOPS_PEEK);
   2760 
   2761 	return (value);
   2762 }
   2763 
   2764 uint32_t
   2765 i_ddi_caut_get32(ddi_acc_impl_t *hp, uint32_t *addr)
   2766 {
   2767 	uint32_t value;
   2768 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
   2769 	    sizeof (uint32_t), 1, 0, DDI_CTLOPS_PEEK);
   2770 
   2771 	return (value);
   2772 }
   2773 
   2774 uint64_t
   2775 i_ddi_caut_get64(ddi_acc_impl_t *hp, uint64_t *addr)
   2776 {
   2777 	uint64_t value;
   2778 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
   2779 	    sizeof (uint64_t), 1, 0, DDI_CTLOPS_PEEK);
   2780 
   2781 	return (value);
   2782 }
   2783 
   2784 void
   2785 i_ddi_caut_put8(ddi_acc_impl_t *hp, uint8_t *addr, uint8_t value)
   2786 {
   2787 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
   2788 	    sizeof (uint8_t), 1, 0, DDI_CTLOPS_POKE);
   2789 }
   2790 
   2791 void
   2792 i_ddi_caut_put16(ddi_acc_impl_t *hp, uint16_t *addr, uint16_t value)
   2793 {
   2794 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
   2795 	    sizeof (uint16_t), 1, 0, DDI_CTLOPS_POKE);
   2796 }
   2797 
   2798 void
   2799 i_ddi_caut_put32(ddi_acc_impl_t *hp, uint32_t *addr, uint32_t value)
   2800 {
   2801 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
   2802 	    sizeof (uint32_t), 1, 0, DDI_CTLOPS_POKE);
   2803 }
   2804 
   2805 void
   2806 i_ddi_caut_put64(ddi_acc_impl_t *hp, uint64_t *addr, uint64_t value)
   2807 {
   2808 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
   2809 	    sizeof (uint64_t), 1, 0, DDI_CTLOPS_POKE);
   2810 }
   2811 
   2812 void
   2813 i_ddi_caut_rep_get8(ddi_acc_impl_t *hp, uint8_t *host_addr, uint8_t *dev_addr,
   2814 	size_t repcount, uint_t flags)
   2815 {
   2816 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
   2817 	    sizeof (uint8_t), repcount, flags, DDI_CTLOPS_PEEK);
   2818 }
   2819 
   2820 void
   2821 i_ddi_caut_rep_get16(ddi_acc_impl_t *hp, uint16_t *host_addr,
   2822     uint16_t *dev_addr, size_t repcount, uint_t flags)
   2823 {
   2824 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
   2825 	    sizeof (uint16_t), repcount, flags, DDI_CTLOPS_PEEK);
   2826 }
   2827 
   2828 void
   2829 i_ddi_caut_rep_get32(ddi_acc_impl_t *hp, uint32_t *host_addr,
   2830     uint32_t *dev_addr, size_t repcount, uint_t flags)
   2831 {
   2832 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
   2833 	    sizeof (uint32_t), repcount, flags, DDI_CTLOPS_PEEK);
   2834 }
   2835 
   2836 void
   2837 i_ddi_caut_rep_get64(ddi_acc_impl_t *hp, uint64_t *host_addr,
   2838     uint64_t *dev_addr, size_t repcount, uint_t flags)
   2839 {
   2840 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
   2841 	    sizeof (uint64_t), repcount, flags, DDI_CTLOPS_PEEK);
   2842 }
   2843 
   2844 void
   2845 i_ddi_caut_rep_put8(ddi_acc_impl_t *hp, uint8_t *host_addr, uint8_t *dev_addr,
   2846 	size_t repcount, uint_t flags)
   2847 {
   2848 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
   2849 	    sizeof (uint8_t), repcount, flags, DDI_CTLOPS_POKE);
   2850 }
   2851 
   2852 void
   2853 i_ddi_caut_rep_put16(ddi_acc_impl_t *hp, uint16_t *host_addr,
   2854     uint16_t *dev_addr, size_t repcount, uint_t flags)
   2855 {
   2856 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
   2857 	    sizeof (uint16_t), repcount, flags, DDI_CTLOPS_POKE);
   2858 }
   2859 
   2860 void
   2861 i_ddi_caut_rep_put32(ddi_acc_impl_t *hp, uint32_t *host_addr,
   2862     uint32_t *dev_addr, size_t repcount, uint_t flags)
   2863 {
   2864 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
   2865 	    sizeof (uint32_t), repcount, flags, DDI_CTLOPS_POKE);
   2866 }
   2867 
   2868 void
   2869 i_ddi_caut_rep_put64(ddi_acc_impl_t *hp, uint64_t *host_addr,
   2870     uint64_t *dev_addr, size_t repcount, uint_t flags)
   2871 {
   2872 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
   2873 	    sizeof (uint64_t), repcount, flags, DDI_CTLOPS_POKE);
   2874 }
   2875 
   2876 boolean_t
   2877 i_ddi_copybuf_required(ddi_dma_attr_t *attrp)
   2878 {
   2879 	uint64_t hi_pa;
   2880 
   2881 	hi_pa = ((uint64_t)physmax + 1ull) << PAGESHIFT;
   2882 	if (attrp->dma_attr_addr_hi < hi_pa) {
   2883 		return (B_TRUE);
   2884 	}
   2885 
   2886 	return (B_FALSE);
   2887 }
   2888 
   2889 size_t
   2890 i_ddi_copybuf_size()
   2891 {
   2892 	return (dma_max_copybuf_size);
   2893 }
   2894 
   2895 /*
   2896  * i_ddi_dma_max()
   2897  *    returns the maximum DMA size which can be performed in a single DMA
   2898  *    window taking into account the devices DMA contraints (attrp), the
   2899  *    maximum copy buffer size (if applicable), and the worse case buffer
   2900  *    fragmentation.
   2901  */
   2902 /*ARGSUSED*/
   2903 uint32_t
   2904 i_ddi_dma_max(dev_info_t *dip, ddi_dma_attr_t *attrp)
   2905 {
   2906 	uint64_t maxxfer;
   2907 
   2908 
   2909 	/*
   2910 	 * take the min of maxxfer and the the worse case fragementation
   2911 	 * (e.g. every cookie <= 1 page)
   2912 	 */
   2913 	maxxfer = MIN(attrp->dma_attr_maxxfer,
   2914 	    ((uint64_t)(attrp->dma_attr_sgllen - 1) << PAGESHIFT));
   2915 
   2916 	/*
   2917 	 * If the DMA engine can't reach all off memory, we also need to take
   2918 	 * the max size of the copybuf into consideration.
   2919 	 */
   2920 	if (i_ddi_copybuf_required(attrp)) {
   2921 		maxxfer = MIN(i_ddi_copybuf_size(), maxxfer);
   2922 	}
   2923 
   2924 	/*
   2925 	 * we only return a 32-bit value. Make sure it's not -1. Round to a
   2926 	 * page so it won't be mistaken for an error value during debug.
   2927 	 */
   2928 	if (maxxfer >= 0xFFFFFFFF) {
   2929 		maxxfer = 0xFFFFF000;
   2930 	}
   2931 
   2932 	/*
   2933 	 * make sure the value we return is a whole multiple of the
   2934 	 * granlarity.
   2935 	 */
   2936 	if (attrp->dma_attr_granular > 1) {
   2937 		maxxfer = maxxfer - (maxxfer % attrp->dma_attr_granular);
   2938 	}
   2939 
   2940 	return ((uint32_t)maxxfer);
   2941 }
   2942 
   2943 /*ARGSUSED*/
   2944 void
   2945 translate_devid(dev_info_t *dip)
   2946 {
   2947 }
   2948 
   2949 pfn_t
   2950 i_ddi_paddr_to_pfn(paddr_t paddr)
   2951 {
   2952 	pfn_t pfn;
   2953 
   2954 #ifdef __xpv
   2955 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
   2956 		pfn = xen_assign_pfn(mmu_btop(paddr));
   2957 	} else {
   2958 		pfn = mmu_btop(paddr);
   2959 	}
   2960 #else
   2961 	pfn = mmu_btop(paddr);
   2962 #endif
   2963 
   2964 	return (pfn);
   2965 }
   2966