Home | History | Annotate | Download | only in os
      1      0    stevel /*
      2      0    stevel  * CDDL HEADER START
      3      0    stevel  *
      4      0    stevel  * The contents of this file are subject to the terms of the
      5   1253  lq150181  * Common Development and Distribution License (the "License").
      6   1253  lq150181  * You may not use this file except in compliance with the License.
      7      0    stevel  *
      8      0    stevel  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9      0    stevel  * or http://www.opensolaris.org/os/licensing.
     10      0    stevel  * See the License for the specific language governing permissions
     11      0    stevel  * and limitations under the License.
     12      0    stevel  *
     13      0    stevel  * When distributing Covered Code, include this CDDL HEADER in each
     14      0    stevel  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15      0    stevel  * If applicable, add the following below this CDDL HEADER, with the
     16      0    stevel  * fields enclosed by brackets "[]" replaced with your own identifying
     17      0    stevel  * information: Portions Copyright [yyyy] [name of copyright owner]
     18      0    stevel  *
     19      0    stevel  * CDDL HEADER END
     20      0    stevel  */
     21   1253  lq150181 
     22      0    stevel /*
     23   9160    Sherry  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24      0    stevel  * Use is subject to license terms.
     25      0    stevel  */
     26      0    stevel 
     27      0    stevel #include <sys/types.h>
     28      0    stevel #include <sys/t_lock.h>
     29      0    stevel #include <sys/param.h>
     30   3939     sethg #include <sys/segments.h>
     31      0    stevel #include <sys/sysmacros.h>
     32      0    stevel #include <sys/signal.h>
     33      0    stevel #include <sys/systm.h>
     34      0    stevel #include <sys/user.h>
     35      0    stevel #include <sys/mman.h>
     36      0    stevel #include <sys/vm.h>
     37      0    stevel 
     38      0    stevel #include <sys/disp.h>
     39      0    stevel #include <sys/class.h>
     40      0    stevel 
     41      0    stevel #include <sys/proc.h>
     42      0    stevel #include <sys/buf.h>
     43      0    stevel #include <sys/kmem.h>
     44      0    stevel 
     45      0    stevel #include <sys/reboot.h>
     46      0    stevel #include <sys/uadmin.h>
     47      0    stevel #include <sys/callb.h>
     48      0    stevel 
     49      0    stevel #include <sys/cred.h>
     50      0    stevel #include <sys/vnode.h>
     51      0    stevel #include <sys/file.h>
     52      0    stevel 
     53      0    stevel #include <sys/procfs.h>
     54      0    stevel #include <sys/acct.h>
     55      0    stevel 
     56      0    stevel #include <sys/vfs.h>
     57      0    stevel #include <sys/dnlc.h>
     58      0    stevel #include <sys/var.h>
     59      0    stevel #include <sys/cmn_err.h>
     60      0    stevel #include <sys/utsname.h>
     61      0    stevel #include <sys/debug.h>
     62      0    stevel 
     63      0    stevel #include <sys/dumphdr.h>
     64      0    stevel #include <sys/bootconf.h>
     65      0    stevel #include <sys/varargs.h>
     66      0    stevel #include <sys/promif.h>
     67      0    stevel #include <sys/modctl.h>
     68      0    stevel 
     69      0    stevel #include <sys/consdev.h>
     70      0    stevel #include <sys/frame.h>
     71      0    stevel 
     72      0    stevel #include <sys/sunddi.h>
     73      0    stevel #include <sys/ddidmareq.h>
     74      0    stevel #include <sys/psw.h>
     75      0    stevel #include <sys/regset.h>
     76      0    stevel #include <sys/privregs.h>
     77      0    stevel #include <sys/clock.h>
     78      0    stevel #include <sys/tss.h>
     79      0    stevel #include <sys/cpu.h>
     80      0    stevel #include <sys/stack.h>
     81      0    stevel #include <sys/trap.h>
     82      0    stevel #include <sys/pic.h>
     83      0    stevel #include <vm/hat.h>
     84      0    stevel #include <vm/anon.h>
     85      0    stevel #include <vm/as.h>
     86      0    stevel #include <vm/page.h>
     87      0    stevel #include <vm/seg.h>
     88      0    stevel #include <vm/seg_kmem.h>
     89      0    stevel #include <vm/seg_map.h>
     90      0    stevel #include <vm/seg_vn.h>
     91      0    stevel #include <vm/seg_kp.h>
     92      0    stevel #include <vm/hat_i86.h>
     93      0    stevel #include <sys/swap.h>
     94      0    stevel #include <sys/thread.h>
     95      0    stevel #include <sys/sysconf.h>
     96      0    stevel #include <sys/vm_machparam.h>
     97      0    stevel #include <sys/archsystm.h>
     98      0    stevel #include <sys/machsystm.h>
     99      0    stevel #include <sys/machlock.h>
    100      0    stevel #include <sys/x_call.h>
    101      0    stevel #include <sys/instance.h>
    102      0    stevel 
    103      0    stevel #include <sys/time.h>
    104      0    stevel #include <sys/smp_impldefs.h>
    105      0    stevel #include <sys/psm_types.h>
    106      0    stevel #include <sys/atomic.h>
    107      0    stevel #include <sys/panic.h>
    108      0    stevel #include <sys/cpuvar.h>
    109      0    stevel #include <sys/dtrace.h>
    110      0    stevel #include <sys/bl.h>
    111      0    stevel #include <sys/nvpair.h>
    112      0    stevel #include <sys/x86_archext.h>
    113      0    stevel #include <sys/pool_pset.h>
    114      0    stevel #include <sys/autoconf.h>
    115   3446       mrj #include <sys/mem.h>
    116   3446       mrj #include <sys/dumphdr.h>
    117   3446       mrj #include <sys/compress.h>
    118   7532      Sean #include <sys/cpu_module.h>
    119   5084   johnlev #if defined(__xpv)
    120   5084   johnlev #include <sys/hypervisor.h>
    121   5084   johnlev #include <sys/xpv_panic.h>
    122   5084   johnlev #endif
    123      0    stevel 
    124   7656    Sherry #include <sys/fastboot.h>
    125   7656    Sherry #include <sys/machelf.h>
    126   7656    Sherry #include <sys/kobj.h>
    127   7656    Sherry #include <sys/multiboot.h>
    128   7656    Sherry 
    129      0    stevel #ifdef	TRAPTRACE
    130      0    stevel #include <sys/traptrace.h>
    131      0    stevel #endif	/* TRAPTRACE */
    132  11066    rafael 
    133  11066    rafael #include <sys/clock_impl.h>
    134      0    stevel 
    135      0    stevel extern void audit_enterprom(int);
    136      0    stevel extern void audit_exitprom(int);
    137      0    stevel 
    138      0    stevel /*
    139   6681   johnlev  * Occassionally the kernel knows better whether to power-off or reboot.
    140   6681   johnlev  */
    141   6681   johnlev int force_shutdown_method = AD_UNKNOWN;
    142   6681   johnlev 
    143   6681   johnlev /*
    144      0    stevel  * The panicbuf array is used to record messages and state:
    145      0    stevel  */
    146      0    stevel char panicbuf[PANICBUFSIZE];
    147      0    stevel 
    148      0    stevel /*
    149      0    stevel  * maxphys - used during physio
    150      0    stevel  * klustsize - used for klustering by swapfs and specfs
    151      0    stevel  */
    152      0    stevel int maxphys = 56 * 1024;    /* XXX See vm_subr.c - max b_count in physio */
    153      0    stevel int klustsize = 56 * 1024;
    154      0    stevel 
    155      0    stevel caddr_t	p0_va;		/* Virtual address for accessing physical page 0 */
    156      0    stevel 
    157      0    stevel /*
    158      0    stevel  * defined here, though unused on x86,
    159      0    stevel  * to make kstat_fr.c happy.
    160      0    stevel  */
    161      0    stevel int vac;
    162      0    stevel 
    163      0    stevel void debug_enter(char *);
    164      0    stevel 
    165      0    stevel extern void pm_cfb_check_and_powerup(void);
    166      0    stevel extern void pm_cfb_rele(void);
    167      0    stevel 
    168   7656    Sherry extern fastboot_info_t newkernel;
    169   7656    Sherry 
    170      0    stevel /*
    171      0    stevel  * Machine dependent code to reboot.
    172      0    stevel  * "mdep" is interpreted as a character pointer; if non-null, it is a pointer
    173      0    stevel  * to a string to be used as the argument string when rebooting.
    174    136  achartre  *
    175    136  achartre  * "invoke_cb" is a boolean. It is set to true when mdboot() can safely
    176    136  achartre  * invoke CB_CL_MDBOOT callbacks before shutting the system down, i.e. when
    177    136  achartre  * we are in a normal shutdown sequence (interrupts are not blocked, the
    178    136  achartre  * system is not panic'ing or being suspended).
    179      0    stevel  */
    180      0    stevel /*ARGSUSED*/
    181      0    stevel void
    182    136  achartre mdboot(int cmd, int fcn, char *mdep, boolean_t invoke_cb)
    183      0    stevel {
    184   7656    Sherry 	processorid_t bootcpuid = 0;
    185   7863    Sherry 	static int is_first_quiesce = 1;
    186   7863    Sherry 	static int is_first_reset = 1;
    187   7863    Sherry 	int reset_status = 0;
    188   9160    Sherry 	static char fallback_str[] = "Falling back to regular reboot.\n";
    189   7656    Sherry 
    190   7656    Sherry 	if (fcn == AD_FASTREBOOT && !newkernel.fi_valid)
    191   7656    Sherry 		fcn = AD_BOOT;
    192   7656    Sherry 
    193      0    stevel 	if (!panicstr) {
    194      0    stevel 		kpreempt_disable();
    195   7656    Sherry 		if (fcn == AD_FASTREBOOT) {
    196   7656    Sherry 			mutex_enter(&cpu_lock);
    197   7656    Sherry 			if (CPU_ACTIVE(cpu_get(bootcpuid))) {
    198   7656    Sherry 				affinity_set(bootcpuid);
    199   7656    Sherry 			}
    200   7656    Sherry 			mutex_exit(&cpu_lock);
    201   7656    Sherry 		} else {
    202   7656    Sherry 			affinity_set(CPU_CURRENT);
    203   7656    Sherry 		}
    204      0    stevel 	}
    205   6681   johnlev 
    206   6681   johnlev 	if (force_shutdown_method != AD_UNKNOWN)
    207   6681   johnlev 		fcn = force_shutdown_method;
    208   5630     jbeck 
    209   5630     jbeck 	/*
    210   5630     jbeck 	 * XXX - rconsvp is set to NULL to ensure that output messages
    211   5630     jbeck 	 * are sent to the underlying "hardware" device using the
    212   5630     jbeck 	 * monitor's printf routine since we are in the process of
    213   5630     jbeck 	 * either rebooting or halting the machine.
    214   5630     jbeck 	 */
    215   5630     jbeck 	rconsvp = NULL;
    216      0    stevel 
    217      0    stevel 	/*
    218      0    stevel 	 * Print the reboot message now, before pausing other cpus.
    219      0    stevel 	 * There is a race condition in the printing support that
    220      0    stevel 	 * can deadlock multiprocessor machines.
    221      0    stevel 	 */
    222      0    stevel 	if (!(fcn == AD_HALT || fcn == AD_POWEROFF))
    223      0    stevel 		prom_printf("rebooting...\n");
    224      0    stevel 
    225   5084   johnlev 	if (IN_XPV_PANIC())
    226   5084   johnlev 		reset();
    227   5084   johnlev 
    228      0    stevel 	/*
    229      0    stevel 	 * We can't bring up the console from above lock level, so do it now
    230      0    stevel 	 */
    231      0    stevel 	pm_cfb_check_and_powerup();
    232      0    stevel 
    233      0    stevel 	/* make sure there are no more changes to the device tree */
    234      0    stevel 	devtree_freeze();
    235    136  achartre 
    236    136  achartre 	if (invoke_cb)
    237    136  achartre 		(void) callb_execute_class(CB_CL_MDBOOT, NULL);
    238    917     elowe 
    239   3253       mec 	/*
    240   3253       mec 	 * Clear any unresolved UEs from memory.
    241   3253       mec 	 */
    242   3253       mec 	page_retire_mdboot();
    243   5084   johnlev 
    244   5084   johnlev #if defined(__xpv)
    245   5084   johnlev 	/*
    246   5084   johnlev 	 * XXPV	Should probably think some more about how we deal
    247   5084   johnlev 	 *	with panicing before it's really safe to panic.
    248   5084   johnlev 	 *	On hypervisors, we reboot very quickly..  Perhaps panic
    249   5084   johnlev 	 *	should only attempt to recover by rebooting if,
    250   5084   johnlev 	 *	say, we were able to mount the root filesystem,
    251   5084   johnlev 	 *	or if we successfully launched init(1m).
    252   5084   johnlev 	 */
    253   5084   johnlev 	if (panicstr && proc_init == NULL)
    254   5084   johnlev 		(void) HYPERVISOR_shutdown(SHUTDOWN_poweroff);
    255   5084   johnlev #endif
    256      0    stevel 	/*
    257      0    stevel 	 * stop other cpus and raise our priority.  since there is only
    258      0    stevel 	 * one active cpu after this, and our priority will be too high
    259      0    stevel 	 * for us to be preempted, we're essentially single threaded
    260      0    stevel 	 * from here on out.
    261      0    stevel 	 */
    262      0    stevel 	(void) spl6();
    263      0    stevel 	if (!panicstr) {
    264      0    stevel 		mutex_enter(&cpu_lock);
    265      0    stevel 		pause_cpus(NULL);
    266      0    stevel 		mutex_exit(&cpu_lock);
    267      0    stevel 	}
    268      0    stevel 
    269      0    stevel 	/*
    270  10916    Sherry 	 * If the system is panicking, the preloaded kernel is valid, and
    271  10916    Sherry 	 * fastreboot_onpanic has been set, and the system has been up for
    272  10916    Sherry 	 * longer than fastreboot_onpanic_uptime (default to 10 minutes),
    273  10916    Sherry 	 * choose Fast Reboot.
    274   9160    Sherry 	 */
    275   9160    Sherry 	if (fcn == AD_BOOT && panicstr && newkernel.fi_valid &&
    276  10916    Sherry 	    fastreboot_onpanic &&
    277  10916    Sherry 	    (panic_lbolt - lbolt_at_boot) > fastreboot_onpanic_uptime) {
    278   9160    Sherry 		fcn = AD_FASTREBOOT;
    279  10916    Sherry 	}
    280   9160    Sherry 
    281   9160    Sherry 	/*
    282   7656    Sherry 	 * Try to quiesce devices.
    283      0    stevel 	 */
    284   7863    Sherry 	if (is_first_quiesce) {
    285   7863    Sherry 		/*
    286   7863    Sherry 		 * Clear is_first_quiesce before calling quiesce_devices()
    287   7863    Sherry 		 * so that if quiesce_devices() causes panics, it will not
    288   7863    Sherry 		 * be invoked again.
    289   7863    Sherry 		 */
    290   7863    Sherry 		is_first_quiesce = 0;
    291   7656    Sherry 
    292   7656    Sherry 		quiesce_active = 1;
    293   7656    Sherry 		quiesce_devices(ddi_root_node(), &reset_status);
    294   7783    Sherry 		if (reset_status == -1) {
    295   7783    Sherry 			if (fcn == AD_FASTREBOOT && !force_fastreboot) {
    296   7783    Sherry 				prom_printf("Driver(s) not capable of fast "
    297   9160    Sherry 				    "reboot.\n");
    298   9160    Sherry 				prom_printf(fallback_str);
    299   7783    Sherry 				fastreboot_capable = 0;
    300   9160    Sherry 				fcn = AD_BOOT;
    301   7783    Sherry 			} else if (fcn != AD_FASTREBOOT)
    302   7783    Sherry 				fastreboot_capable = 0;
    303   7656    Sherry 		}
    304   7656    Sherry 		quiesce_active = 0;
    305   7656    Sherry 	}
    306   7656    Sherry 
    307   7656    Sherry 	/*
    308   7863    Sherry 	 * Try to reset devices. reset_leaves() should only be called
    309   7863    Sherry 	 * a) when there are no other threads that could be accessing devices,
    310   7863    Sherry 	 *    and
    311   7863    Sherry 	 * b) on a system that's not capable of fast reboot (fastreboot_capable
    312   7863    Sherry 	 *    being 0), or on a system where quiesce_devices() failed to
    313   7863    Sherry 	 *    complete (quiesce_active being 1).
    314   7656    Sherry 	 */
    315   7863    Sherry 	if (is_first_reset && (!fastreboot_capable || quiesce_active)) {
    316   7863    Sherry 		/*
    317   7863    Sherry 		 * Clear is_first_reset before calling reset_devices()
    318   7863    Sherry 		 * so that if reset_devices() causes panics, it will not
    319   7863    Sherry 		 * be invoked again.
    320   7863    Sherry 		 */
    321   7863    Sherry 		is_first_reset = 0;
    322   7656    Sherry 		reset_leaves();
    323   7863    Sherry 	}
    324      0    stevel 
    325   9160    Sherry 	/* Verify newkernel checksum */
    326   9160    Sherry 	if (fastreboot_capable && fcn == AD_FASTREBOOT &&
    327   9160    Sherry 	    fastboot_cksum_verify(&newkernel) != 0) {
    328   9160    Sherry 		fastreboot_capable = 0;
    329   9160    Sherry 		prom_printf("Fast reboot: checksum failed for the new "
    330   9160    Sherry 		    "kernel.\n");
    331   9160    Sherry 		prom_printf(fallback_str);
    332   9160    Sherry 	}
    333   9160    Sherry 
    334      0    stevel 	(void) spl8();
    335      0    stevel 
    336   9160    Sherry 	if (fastreboot_capable && fcn == AD_FASTREBOOT) {
    337   9160    Sherry 		/*
    338   9160    Sherry 		 * psm_shutdown is called within fast_reboot()
    339   9160    Sherry 		 */
    340   7656    Sherry 		fast_reboot();
    341   9160    Sherry 	} else {
    342   9160    Sherry 		(*psm_shutdownf)(cmd, fcn);
    343   9160    Sherry 
    344   9160    Sherry 		if (fcn == AD_HALT || fcn == AD_POWEROFF)
    345   9160    Sherry 			halt((char *)NULL);
    346   9160    Sherry 		else
    347   9160    Sherry 			prom_reboot("");
    348   9160    Sherry 	}
    349      0    stevel 	/*NOTREACHED*/
    350      0    stevel }
    351      0    stevel 
    352      0    stevel /* mdpreboot - may be called prior to mdboot while root fs still mounted */
    353      0    stevel /*ARGSUSED*/
    354      0    stevel void
    355      0    stevel mdpreboot(int cmd, int fcn, char *mdep)
    356      0    stevel {
    357   7656    Sherry 	if (fcn == AD_FASTREBOOT && !fastreboot_capable) {
    358   7656    Sherry 		fcn = AD_BOOT;
    359   7656    Sherry #ifdef	__xpv
    360   9160    Sherry 		cmn_err(CE_WARN, "Fast reboot is not supported on xVM");
    361   7656    Sherry #else
    362   9160    Sherry 		cmn_err(CE_WARN,
    363   9160    Sherry 		    "Fast reboot is not supported on this platform");
    364   7656    Sherry #endif
    365   7656    Sherry 	}
    366   7656    Sherry 
    367   7656    Sherry 	if (fcn == AD_FASTREBOOT) {
    368   9160    Sherry 		fastboot_load_kernel(mdep);
    369   7656    Sherry 		if (!newkernel.fi_valid)
    370   7656    Sherry 			fcn = AD_BOOT;
    371   7656    Sherry 	}
    372   7656    Sherry 
    373      0    stevel 	(*psm_preshutdownf)(cmd, fcn);
    374      0    stevel }
    375      0    stevel 
    376   9489       Joe static void
    377   9489       Joe stop_other_cpus(void)
    378      0    stevel {
    379   9489       Joe 	ulong_t s = clear_int_flag(); /* fast way to keep CPU from changing */
    380      0    stevel 	cpuset_t xcset;
    381      0    stevel 
    382   9489       Joe 	CPUSET_ALL_BUT(xcset, CPU->cpu_id);
    383   9489       Joe 	xc_priority(0, 0, 0, CPUSET2BV(xcset), (xc_func_t)mach_cpu_halt);
    384   9489       Joe 	restore_int_flag(s);
    385      0    stevel }
    386      0    stevel 
    387      0    stevel /*
    388      0    stevel  *	Machine dependent abort sequence handling
    389      0    stevel  */
    390      0    stevel void
    391      0    stevel abort_sequence_enter(char *msg)
    392      0    stevel {
    393      0    stevel 	if (abort_enable == 0) {
    394      0    stevel 		if (audit_active)
    395      0    stevel 			audit_enterprom(0);
    396      0    stevel 		return;
    397      0    stevel 	}
    398      0    stevel 	if (audit_active)
    399      0    stevel 		audit_enterprom(1);
    400      0    stevel 	debug_enter(msg);
    401      0    stevel 	if (audit_active)
    402      0    stevel 		audit_exitprom(1);
    403      0    stevel }
    404      0    stevel 
    405      0    stevel /*
    406      0    stevel  * Enter debugger.  Called when the user types ctrl-alt-d or whenever
    407      0    stevel  * code wants to enter the debugger and possibly resume later.
    408      0    stevel  */
    409      0    stevel void
    410      0    stevel debug_enter(
    411      0    stevel 	char	*msg)		/* message to print, possibly NULL */
    412      0    stevel {
    413      0    stevel 	if (dtrace_debugger_init != NULL)
    414      0    stevel 		(*dtrace_debugger_init)();
    415      0    stevel 
    416      0    stevel 	if (msg)
    417      0    stevel 		prom_printf("%s\n", msg);
    418      0    stevel 
    419      0    stevel 	if (boothowto & RB_DEBUG)
    420   3446       mrj 		kmdb_enter();
    421      0    stevel 
    422      0    stevel 	if (dtrace_debugger_fini != NULL)
    423      0    stevel 		(*dtrace_debugger_fini)();
    424      0    stevel }
    425      0    stevel 
    426      0    stevel void
    427      0    stevel reset(void)
    428      0    stevel {
    429  10457   Saurabh 	extern	void acpi_reset_system();
    430   5084   johnlev #if !defined(__xpv)
    431      0    stevel 	ushort_t *bios_memchk;
    432      0    stevel 
    433      0    stevel 	/*
    434  10457   Saurabh 	 * Can't use psm_map_phys or acpi_reset_system before the hat is
    435  10457   Saurabh 	 * initialized.
    436      0    stevel 	 */
    437      0    stevel 	if (khat_running) {
    438      0    stevel 		bios_memchk = (ushort_t *)psm_map_phys(0x472,
    439      0    stevel 		    sizeof (ushort_t), PROT_READ | PROT_WRITE);
    440      0    stevel 		if (bios_memchk)
    441      0    stevel 			*bios_memchk = 0x1234;	/* bios memory check disable */
    442  10457   Saurabh 
    443  10457   Saurabh 		if (options_dip != NULL &&
    444  10457   Saurabh 		    ddi_prop_exists(DDI_DEV_T_ANY, ddi_root_node(), 0,
    445  10457   Saurabh 		    "efi-systab")) {
    446  10457   Saurabh 			efi_reset();
    447  10457   Saurabh 		}
    448  10457   Saurabh 
    449  10457   Saurabh 		/*
    450  10457   Saurabh 		 * The problem with using stubs is that we can call
    451  10457   Saurabh 		 * acpi_reset_system only after the kernel is up and running.
    452  10457   Saurabh 		 *
    453  10457   Saurabh 		 * We should create a global state to keep track of how far
    454  10457   Saurabh 		 * up the kernel is but for the time being we will depend on
    455  10457   Saurabh 		 * bootops. bootops cleared in startup_end().
    456  10457   Saurabh 		 */
    457  10457   Saurabh 		if (bootops == NULL)
    458  10457   Saurabh 			acpi_reset_system();
    459      0    stevel 	}
    460      0    stevel 
    461      0    stevel 	pc_reset();
    462   5084   johnlev #else
    463  10457   Saurabh 	if (IN_XPV_PANIC()) {
    464  10457   Saurabh 		if (khat_running && bootops == NULL) {
    465  10457   Saurabh 			acpi_reset_system();
    466  10457   Saurabh 		}
    467  10457   Saurabh 
    468   5084   johnlev 		pc_reset();
    469  10457   Saurabh 	}
    470  10457   Saurabh 
    471   5084   johnlev 	(void) HYPERVISOR_shutdown(SHUTDOWN_reboot);
    472   5084   johnlev 	panic("HYPERVISOR_shutdown() failed");
    473   5084   johnlev #endif
    474      0    stevel 	/*NOTREACHED*/
    475      0    stevel }
    476      0    stevel 
    477      0    stevel /*
    478      0    stevel  * Halt the machine and return to the monitor
    479      0    stevel  */
    480      0    stevel void
    481      0    stevel halt(char *s)
    482      0    stevel {
    483      0    stevel 	stop_other_cpus();	/* send stop signal to other CPUs */
    484      0    stevel 	if (s)
    485      0    stevel 		prom_printf("(%s) \n", s);
    486      0    stevel 	prom_exit_to_mon();
    487      0    stevel 	/*NOTREACHED*/
    488      0    stevel }
    489      0    stevel 
    490      0    stevel /*
    491      0    stevel  * Initiate interrupt redistribution.
    492      0    stevel  */
    493      0    stevel void
    494      0    stevel i_ddi_intr_redist_all_cpus()
    495      0    stevel {
    496      0    stevel }
    497      0    stevel 
    498      0    stevel /*
    499      0    stevel  * XXX These probably ought to live somewhere else
    500      0    stevel  * XXX They are called from mem.c
    501      0    stevel  */
    502      0    stevel 
    503      0    stevel /*
    504      0    stevel  * Convert page frame number to an OBMEM page frame number
    505      0    stevel  * (i.e. put in the type bits -- zero for this implementation)
    506      0    stevel  */
    507      0    stevel pfn_t
    508      0    stevel impl_obmem_pfnum(pfn_t pf)
    509      0    stevel {
    510      0    stevel 	return (pf);
    511      0    stevel }
    512      0    stevel 
    513      0    stevel #ifdef	NM_DEBUG
    514      0    stevel int nmi_test = 0;	/* checked in intentry.s during clock int */
    515      0    stevel int nmtest = -1;
    516      0    stevel nmfunc1(arg, rp)
    517      0    stevel int	arg;
    518      0    stevel struct regs *rp;
    519      0    stevel {
    520      0    stevel 	printf("nmi called with arg = %x, regs = %x\n", arg, rp);
    521      0    stevel 	nmtest += 50;
    522      0    stevel 	if (arg == nmtest) {
    523      0    stevel 		printf("ip = %x\n", rp->r_pc);
    524      0    stevel 		return (1);
    525      0    stevel 	}
    526      0    stevel 	return (0);
    527      0    stevel }
    528      0    stevel 
    529      0    stevel #endif
    530      0    stevel 
    531      0    stevel #include <sys/bootsvcs.h>
    532      0    stevel 
    533      0    stevel /* Hacked up initialization for initial kernel check out is HERE. */
    534      0    stevel /* The basic steps are: */
    535      0    stevel /*	kernel bootfuncs definition/initialization for KADB */
    536      0    stevel /*	kadb bootfuncs pointer initialization */
    537      0    stevel /*	putchar/getchar (interrupts disabled) */
    538      0    stevel 
    539      0    stevel /* kadb bootfuncs pointer initialization */
    540      0    stevel 
    541      0    stevel int
    542      0    stevel sysp_getchar()
    543      0    stevel {
    544      0    stevel 	int i;
    545   3446       mrj 	ulong_t s;
    546      0    stevel 
    547      0    stevel 	if (cons_polledio == NULL) {
    548      0    stevel 		/* Uh oh */
    549      0    stevel 		prom_printf("getchar called with no console\n");
    550      0    stevel 		for (;;)
    551      0    stevel 			/* LOOP FOREVER */;
    552      0    stevel 	}
    553      0    stevel 
    554      0    stevel 	s = clear_int_flag();
    555      0    stevel 	i = cons_polledio->cons_polledio_getchar(
    556   5084   johnlev 	    cons_polledio->cons_polledio_argument);
    557      0    stevel 	restore_int_flag(s);
    558      0    stevel 	return (i);
    559      0    stevel }
    560      0    stevel 
    561      0    stevel void
    562      0    stevel sysp_putchar(int c)
    563      0    stevel {
    564   3446       mrj 	ulong_t s;
    565      0    stevel 
    566      0    stevel 	/*
    567      0    stevel 	 * We have no alternative but to drop the output on the floor.
    568      0    stevel 	 */
    569   1253  lq150181 	if (cons_polledio == NULL ||
    570   1253  lq150181 	    cons_polledio->cons_polledio_putchar == NULL)
    571      0    stevel 		return;
    572      0    stevel 
    573      0    stevel 	s = clear_int_flag();
    574      0    stevel 	cons_polledio->cons_polledio_putchar(
    575   5084   johnlev 	    cons_polledio->cons_polledio_argument, c);
    576      0    stevel 	restore_int_flag(s);
    577      0    stevel }
    578      0    stevel 
    579      0    stevel int
    580      0    stevel sysp_ischar()
    581      0    stevel {
    582      0    stevel 	int i;
    583   3446       mrj 	ulong_t s;
    584      0    stevel 
    585   1253  lq150181 	if (cons_polledio == NULL ||
    586   1253  lq150181 	    cons_polledio->cons_polledio_ischar == NULL)
    587      0    stevel 		return (0);
    588      0    stevel 
    589      0    stevel 	s = clear_int_flag();
    590      0    stevel 	i = cons_polledio->cons_polledio_ischar(
    591   5084   johnlev 	    cons_polledio->cons_polledio_argument);
    592      0    stevel 	restore_int_flag(s);
    593      0    stevel 	return (i);
    594      0    stevel }
    595      0    stevel 
    596      0    stevel int
    597      0    stevel goany(void)
    598      0    stevel {
    599      0    stevel 	prom_printf("Type any key to continue ");
    600      0    stevel 	(void) prom_getchar();
    601      0    stevel 	prom_printf("\n");
    602      0    stevel 	return (1);
    603      0    stevel }
    604      0    stevel 
    605      0    stevel static struct boot_syscalls kern_sysp = {
    606      0    stevel 	sysp_getchar,	/*	unchar	(*getchar)();	7  */
    607      0    stevel 	sysp_putchar,	/*	int	(*putchar)();	8  */
    608      0    stevel 	sysp_ischar,	/*	int	(*ischar)();	9  */
    609      0    stevel };
    610      0    stevel 
    611   5084   johnlev #if defined(__xpv)
    612   5084   johnlev int using_kern_polledio;
    613   5084   johnlev #endif
    614   5084   johnlev 
    615      0    stevel void
    616      0    stevel kadb_uses_kernel()
    617      0    stevel {
    618      0    stevel 	/*
    619      0    stevel 	 * This routine is now totally misnamed, since it does not in fact
    620      0    stevel 	 * control kadb's I/O; it only controls the kernel's prom_* I/O.
    621      0    stevel 	 */
    622      0    stevel 	sysp = &kern_sysp;
    623   5084   johnlev #if defined(__xpv)
    624   5084   johnlev 	using_kern_polledio = 1;
    625   5084   johnlev #endif
    626      0    stevel }
    627      0    stevel 
    628      0    stevel /*
    629      0    stevel  *	the interface to the outside world
    630      0    stevel  */
    631      0    stevel 
    632      0    stevel /*
    633      0    stevel  * poll_port -- wait for a register to achieve a
    634      0    stevel  *		specific state.  Arguments are a mask of bits we care about,
    635      0    stevel  *		and two sub-masks.  To return normally, all the bits in the
    636      0    stevel  *		first sub-mask must be ON, all the bits in the second sub-
    637      0    stevel  *		mask must be OFF.  If about seconds pass without the register
    638      0    stevel  *		achieving the desired bit configuration, we return 1, else
    639      0    stevel  *		0.
    640      0    stevel  */
    641      0    stevel int
    642      0    stevel poll_port(ushort_t port, ushort_t mask, ushort_t onbits, ushort_t offbits)
    643      0    stevel {
    644      0    stevel 	int i;
    645      0    stevel 	ushort_t maskval;
    646      0    stevel 
    647      0    stevel 	for (i = 500000; i; i--) {
    648      0    stevel 		maskval = inb(port) & mask;
    649      0    stevel 		if (((maskval & onbits) == onbits) &&
    650   5084   johnlev 		    ((maskval & offbits) == 0))
    651      0    stevel 			return (0);
    652      0    stevel 		drv_usecwait(10);
    653      0    stevel 	}
    654      0    stevel 	return (1);
    655      0    stevel }
    656      0    stevel 
    657      0    stevel /*
    658      0    stevel  * set_idle_cpu is called from idle() when a CPU becomes idle.
    659      0    stevel  */
    660      0    stevel /*LINTED: static unused */
    661      0    stevel static uint_t last_idle_cpu;
    662      0    stevel 
    663      0    stevel /*ARGSUSED*/
    664      0    stevel void
    665      0    stevel set_idle_cpu(int cpun)
    666      0    stevel {
    667      0    stevel 	last_idle_cpu = cpun;
    668      0    stevel 	(*psm_set_idle_cpuf)(cpun);
    669      0    stevel }
    670      0    stevel 
    671      0    stevel /*
    672      0    stevel  * unset_idle_cpu is called from idle() when a CPU is no longer idle.
    673      0    stevel  */
    674      0    stevel /*ARGSUSED*/
    675      0    stevel void
    676      0    stevel unset_idle_cpu(int cpun)
    677      0    stevel {
    678      0    stevel 	(*psm_unset_idle_cpuf)(cpun);
    679      0    stevel }
    680      0    stevel 
    681      0    stevel /*
    682      0    stevel  * This routine is almost correct now, but not quite.  It still needs the
    683      0    stevel  * equivalent concept of "hres_last_tick", just like on the sparc side.
    684      0    stevel  * The idea is to take a snapshot of the hi-res timer while doing the
    685      0    stevel  * hrestime_adj updates under hres_lock in locore, so that the small
    686      0    stevel  * interval between interrupt assertion and interrupt processing is
    687      0    stevel  * accounted for correctly.  Once we have this, the code below should
    688      0    stevel  * be modified to subtract off hres_last_tick rather than hrtime_base.
    689      0    stevel  *
    690      0    stevel  * I'd have done this myself, but I don't have source to all of the
    691      0    stevel  * vendor-specific hi-res timer routines (grrr...).  The generic hook I
    692      0    stevel  * need is something like "gethrtime_unlocked()", which would be just like
    693      0    stevel  * gethrtime() but would assume that you're already holding CLOCK_LOCK().
    694      0    stevel  * This is what the GET_HRTIME() macro is for on sparc (although it also
    695      0    stevel  * serves the function of making time available without a function call
    696      0    stevel  * so you don't take a register window overflow while traps are disabled).
    697      0    stevel  */
    698      0    stevel void
    699      0    stevel pc_gethrestime(timestruc_t *tp)
    700      0    stevel {
    701      0    stevel 	int lock_prev;
    702      0    stevel 	timestruc_t now;
    703      0    stevel 	int nslt;		/* nsec since last tick */
    704      0    stevel 	int adj;		/* amount of adjustment to apply */
    705      0    stevel 
    706      0    stevel loop:
    707      0    stevel 	lock_prev = hres_lock;
    708      0    stevel 	now = hrestime;
    709      0    stevel 	nslt = (int)(gethrtime() - hres_last_tick);
    710      0    stevel 	if (nslt < 0) {
    711      0    stevel 		/*
    712      0    stevel 		 * nslt < 0 means a tick came between sampling
    713      0    stevel 		 * gethrtime() and hres_last_tick; restart the loop
    714      0    stevel 		 */
    715      0    stevel 
    716      0    stevel 		goto loop;
    717      0    stevel 	}
    718      0    stevel 	now.tv_nsec += nslt;
    719      0    stevel 	if (hrestime_adj != 0) {
    720      0    stevel 		if (hrestime_adj > 0) {
    721      0    stevel 			adj = (nslt >> ADJ_SHIFT);
    722      0    stevel 			if (adj > hrestime_adj)
    723      0    stevel 				adj = (int)hrestime_adj;
    724      0    stevel 		} else {
    725      0    stevel 			adj = -(nslt >> ADJ_SHIFT);
    726      0    stevel 			if (adj < hrestime_adj)
    727      0    stevel 				adj = (int)hrestime_adj;
    728      0    stevel 		}
    729      0    stevel 		now.tv_nsec += adj;
    730      0    stevel 	}
    731      0    stevel 	while ((unsigned long)now.tv_nsec >= NANOSEC) {
    732      0    stevel 
    733      0    stevel 		/*
    734      0    stevel 		 * We might have a large adjustment or have been in the
    735      0    stevel 		 * debugger for a long time; take care of (at most) four
    736      0    stevel 		 * of those missed seconds (tv_nsec is 32 bits, so
    737      0    stevel 		 * anything >4s will be wrapping around).  However,
    738      0    stevel 		 * anything more than 2 seconds out of sync will trigger
    739      0    stevel 		 * timedelta from clock() to go correct the time anyway,
    740      0    stevel 		 * so do what we can, and let the big crowbar do the
    741      0    stevel 		 * rest.  A similar correction while loop exists inside
    742      0    stevel 		 * hres_tick(); in all cases we'd like tv_nsec to
    743      0    stevel 		 * satisfy 0 <= tv_nsec < NANOSEC to avoid confusing
    744      0    stevel 		 * user processes, but if tv_sec's a little behind for a
    745      0    stevel 		 * little while, that's OK; time still monotonically
    746      0    stevel 		 * increases.
    747      0    stevel 		 */
    748      0    stevel 
    749      0    stevel 		now.tv_nsec -= NANOSEC;
    750      0    stevel 		now.tv_sec++;
    751      0    stevel 	}
    752      0    stevel 	if ((hres_lock & ~1) != lock_prev)
    753      0    stevel 		goto loop;
    754      0    stevel 
    755      0    stevel 	*tp = now;
    756      0    stevel }
    757      0    stevel 
    758      0    stevel void
    759      0    stevel gethrestime_lasttick(timespec_t *tp)
    760      0    stevel {
    761      0    stevel 	int s;
    762      0    stevel 
    763      0    stevel 	s = hr_clock_lock();
    764      0    stevel 	*tp = hrestime;
    765      0    stevel 	hr_clock_unlock(s);
    766      0    stevel }
    767      0    stevel 
    768      0    stevel time_t
    769      0    stevel gethrestime_sec(void)
    770      0    stevel {
    771      0    stevel 	timestruc_t now;
    772      0    stevel 
    773      0    stevel 	gethrestime(&now);
    774      0    stevel 	return (now.tv_sec);
    775      0    stevel }
    776      0    stevel 
    777      0    stevel /*
    778      0    stevel  * Initialize a kernel thread's stack
    779      0    stevel  */
    780      0    stevel 
    781      0    stevel caddr_t
    782      0    stevel thread_stk_init(caddr_t stk)
    783      0    stevel {
    784      0    stevel 	ASSERT(((uintptr_t)stk & (STACK_ALIGN - 1)) == 0);
    785      0    stevel 	return (stk - SA(MINFRAME));
    786      0    stevel }
    787      0    stevel 
    788      0    stevel /*
    789      0    stevel  * Initialize lwp's kernel stack.
    790      0    stevel  */
    791      0    stevel 
    792      0    stevel #ifdef TRAPTRACE
    793      0    stevel /*
    794      0    stevel  * There's a tricky interdependency here between use of sysenter and
    795      0    stevel  * TRAPTRACE which needs recording to avoid future confusion (this is
    796      0    stevel  * about the third time I've re-figured this out ..)
    797      0    stevel  *
    798      0    stevel  * Here's how debugging lcall works with TRAPTRACE.
    799      0    stevel  *
    800      0    stevel  * 1 We're in userland with a breakpoint on the lcall instruction.
    801      0    stevel  * 2 We execute the instruction - the instruction pushes the userland
    802      0    stevel  *   %ss, %esp, %efl, %cs, %eip on the stack and zips into the kernel
    803      0    stevel  *   via the call gate.
    804      0    stevel  * 3 The hardware raises a debug trap in kernel mode, the hardware
    805      0    stevel  *   pushes %efl, %cs, %eip and gets to dbgtrap via the idt.
    806      0    stevel  * 4 dbgtrap pushes the error code and trapno and calls cmntrap
    807      0    stevel  * 5 cmntrap finishes building a trap frame
    808      0    stevel  * 6 The TRACE_REGS macros in cmntrap copy a REGSIZE worth chunk
    809      0    stevel  *   off the stack into the traptrace buffer.
    810      0    stevel  *
    811      0    stevel  * This means that the traptrace buffer contains the wrong values in
    812      0    stevel  * %esp and %ss, but everything else in there is correct.
    813      0    stevel  *
    814      0    stevel  * Here's how debugging sysenter works with TRAPTRACE.
    815      0    stevel  *
    816      0    stevel  * a We're in userland with a breakpoint on the sysenter instruction.
    817      0    stevel  * b We execute the instruction - the instruction pushes -nothing-
    818      0    stevel  *   on the stack, but sets %cs, %eip, %ss, %esp to prearranged
    819      0    stevel  *   values to take us to sys_sysenter, at the top of the lwp's
    820      0    stevel  *   stack.
    821      0    stevel  * c goto 3
    822      0    stevel  *
    823      0    stevel  * At this point, because we got into the kernel without the requisite
    824      0    stevel  * five pushes on the stack, if we didn't make extra room, we'd
    825      0    stevel  * end up with the TRACE_REGS macro fetching the saved %ss and %esp
    826      0    stevel  * values from negative (unmapped) stack addresses -- which really bites.
    827      0    stevel  * That's why we do the '-= 8' below.
    828      0    stevel  *
    829      0    stevel  * XXX	Note that reading "up" lwp0's stack works because t0 is declared
    830      0    stevel  *	right next to t0stack in locore.s
    831      0    stevel  */
    832      0    stevel #endif
    833      0    stevel 
    834      0    stevel caddr_t
    835      0    stevel lwp_stk_init(klwp_t *lwp, caddr_t stk)
    836      0    stevel {
    837      0    stevel 	caddr_t oldstk;
    838      0    stevel 	struct pcb *pcb = &lwp->lwp_pcb;
    839      0    stevel 
    840      0    stevel 	oldstk = stk;
    841      0    stevel 	stk -= SA(sizeof (struct regs) + SA(MINFRAME));
    842      0    stevel #ifdef TRAPTRACE
    843      0    stevel 	stk -= 2 * sizeof (greg_t); /* space for phony %ss:%sp (see above) */
    844      0    stevel #endif
    845      0    stevel 	stk = (caddr_t)((uintptr_t)stk & ~(STACK_ALIGN - 1ul));
    846      0    stevel 	bzero(stk, oldstk - stk);
    847      0    stevel 	lwp->lwp_regs = (void *)(stk + SA(MINFRAME));
    848      0    stevel 
    849      0    stevel 	/*
    850      0    stevel 	 * Arrange that the virtualized %fs and %gs GDT descriptors
    851      0    stevel 	 * have a well-defined initial state (present, ring 3
    852      0    stevel 	 * and of type data).
    853      0    stevel 	 */
    854      0    stevel #if defined(__amd64)
    855      0    stevel 	if (lwp_getdatamodel(lwp) == DATAMODEL_NATIVE)
    856      0    stevel 		pcb->pcb_fsdesc = pcb->pcb_gsdesc = zero_udesc;
    857      0    stevel 	else
    858      0    stevel 		pcb->pcb_fsdesc = pcb->pcb_gsdesc = zero_u32desc;
    859      0    stevel #elif defined(__i386)
    860      0    stevel 	pcb->pcb_fsdesc = pcb->pcb_gsdesc = zero_udesc;
    861      0    stevel #endif	/* __i386 */
    862      0    stevel 	lwp_installctx(lwp);
    863      0    stevel 	return (stk);
    864      0    stevel }
    865      0    stevel 
    866      0    stevel /*ARGSUSED*/
    867      0    stevel void
    868      0    stevel lwp_stk_fini(klwp_t *lwp)
    869      0    stevel {}
    870      0    stevel 
    871      0    stevel /*
    872   1389     dmick  * If we're not the panic CPU, we wait in panic_idle for reboot.
    873      0    stevel  */
    874   9489       Joe void
    875      0    stevel panic_idle(void)
    876      0    stevel {
    877      0    stevel 	splx(ipltospl(CLOCK_LEVEL));
    878      0    stevel 	(void) setjmp(&curthread->t_pcb);
    879      0    stevel 
    880  10843      Dave 	dumpsys_helper();
    881  10843      Dave 
    882   9489       Joe #ifndef __xpv
    883   9489       Joe 	for (;;)
    884   9489       Joe 		i86_halt();
    885   9489       Joe #else
    886   3446       mrj 	for (;;)
    887   3446       mrj 		;
    888   9489       Joe #endif
    889      0    stevel }
    890      0    stevel 
    891      0    stevel /*
    892      0    stevel  * Stop the other CPUs by cross-calling them and forcing them to enter
    893      0    stevel  * the panic_idle() loop above.
    894      0    stevel  */
    895      0    stevel /*ARGSUSED*/
    896      0    stevel void
    897      0    stevel panic_stopcpus(cpu_t *cp, kthread_t *t, int spl)
    898      0    stevel {
    899      0    stevel 	processorid_t i;
    900      0    stevel 	cpuset_t xcset;
    901      0    stevel 
    902   5084   johnlev 	/*
    903   5084   johnlev 	 * In the case of a Xen panic, the hypervisor has already stopped
    904   5084   johnlev 	 * all of the CPUs.
    905   5084   johnlev 	 */
    906   5084   johnlev 	if (!IN_XPV_PANIC()) {
    907   5084   johnlev 		(void) splzs();
    908      0    stevel 
    909   5084   johnlev 		CPUSET_ALL_BUT(xcset, cp->cpu_id);
    910   9489       Joe 		xc_priority(0, 0, 0, CPUSET2BV(xcset), (xc_func_t)panic_idle);
    911   5084   johnlev 	}
    912      0    stevel 
    913      0    stevel 	for (i = 0; i < NCPU; i++) {
    914      0    stevel 		if (i != cp->cpu_id && cpu[i] != NULL &&
    915      0    stevel 		    (cpu[i]->cpu_flags & CPU_EXISTS))
    916      0    stevel 			cpu[i]->cpu_flags |= CPU_QUIESCED;
    917      0    stevel 	}
    918      0    stevel }
    919      0    stevel 
    920      0    stevel /*
    921      0    stevel  * Platform callback following each entry to panicsys().
    922      0    stevel  */
    923      0    stevel /*ARGSUSED*/
    924      0    stevel void
    925      0    stevel panic_enter_hw(int spl)
    926      0    stevel {
    927      0    stevel 	/* Nothing to do here */
    928      0    stevel }
    929      0    stevel 
    930      0    stevel /*
    931      0    stevel  * Platform-specific code to execute after panicstr is set: we invoke
    932      0    stevel  * the PSM entry point to indicate that a panic has occurred.
    933      0    stevel  */
    934      0    stevel /*ARGSUSED*/
    935      0    stevel void
    936      0    stevel panic_quiesce_hw(panic_data_t *pdp)
    937      0    stevel {
    938      0    stevel 	psm_notifyf(PSM_PANIC_ENTER);
    939      0    stevel 
    940   7532      Sean 	cmi_panic_callback();
    941   7532      Sean 
    942      0    stevel #ifdef	TRAPTRACE
    943      0    stevel 	/*
    944      0    stevel 	 * Turn off TRAPTRACE
    945      0    stevel 	 */
    946      0    stevel 	TRAPTRACE_FREEZE;
    947      0    stevel #endif	/* TRAPTRACE */
    948      0    stevel }
    949      0    stevel 
    950      0    stevel /*
    951      0    stevel  * Platform callback prior to writing crash dump.
    952      0    stevel  */
    953      0    stevel /*ARGSUSED*/
    954      0    stevel void
    955      0    stevel panic_dump_hw(int spl)
    956      0    stevel {
    957      0    stevel 	/* Nothing to do here */
    958   5084   johnlev }
    959   5084   johnlev 
    960   5084   johnlev void *
    961   5084   johnlev plat_traceback(void *fpreg)
    962   5084   johnlev {
    963   5084   johnlev #ifdef __xpv
    964   5084   johnlev 	if (IN_XPV_PANIC())
    965   5084   johnlev 		return (xpv_traceback(fpreg));
    966   5084   johnlev #endif
    967   5084   johnlev 	return (fpreg);
    968      0    stevel }
    969      0    stevel 
    970      0    stevel /*ARGSUSED*/
    971      0    stevel void
    972      0    stevel plat_tod_fault(enum tod_fault_type tod_bad)
    973   3446       mrj {}
    974      0    stevel 
    975      0    stevel /*ARGSUSED*/
    976      0    stevel int
    977      0    stevel blacklist(int cmd, const char *scheme, nvlist_t *fmri, const char *class)
    978      0    stevel {
    979      0    stevel 	return (ENOTSUP);
    980      0    stevel }
    981      0    stevel 
    982      0    stevel /*
    983      0    stevel  * The underlying console output routines are protected by raising IPL in case
    984      0    stevel  * we are still calling into the early boot services.  Once we start calling
    985      0    stevel  * the kernel console emulator, it will disable interrupts completely during
    986      0    stevel  * character rendering (see sysp_putchar, for example).  Refer to the comments
    987      0    stevel  * and code in common/os/console.c for more information on these callbacks.
    988      0    stevel  */
    989      0    stevel /*ARGSUSED*/
    990      0    stevel int
    991      0    stevel console_enter(int busy)
    992      0    stevel {
    993      0    stevel 	return (splzs());
    994      0    stevel }
    995      0    stevel 
    996      0    stevel /*ARGSUSED*/
    997      0    stevel void
    998      0    stevel console_exit(int busy, int spl)
    999      0    stevel {
   1000      0    stevel 	splx(spl);
   1001      0    stevel }
   1002      0    stevel 
   1003      0    stevel /*
   1004      0    stevel  * Allocate a region of virtual address space, unmapped.
   1005      0    stevel  * Stubbed out except on sparc, at least for now.
   1006      0    stevel  */
   1007      0    stevel /*ARGSUSED*/
   1008      0    stevel void *
   1009      0    stevel boot_virt_alloc(void *addr, size_t size)
   1010      0    stevel {
   1011      0    stevel 	return (addr);
   1012      0    stevel }
   1013      0    stevel 
   1014      0    stevel volatile unsigned long	tenmicrodata;
   1015      0    stevel 
   1016      0    stevel void
   1017      0    stevel tenmicrosec(void)
   1018      0    stevel {
   1019   5084   johnlev 	extern int gethrtime_hires;
   1020      0    stevel 
   1021   5084   johnlev 	if (gethrtime_hires) {
   1022      0    stevel 		hrtime_t start, end;
   1023      0    stevel 		start = end =  gethrtime();
   1024      0    stevel 		while ((end - start) < (10 * (NANOSEC / MICROSEC))) {
   1025      0    stevel 			SMT_PAUSE();
   1026      0    stevel 			end = gethrtime();
   1027      0    stevel 		}
   1028      0    stevel 	} else {
   1029   5084   johnlev #if defined(__xpv)
   1030   5084   johnlev 		hrtime_t newtime;
   1031   5084   johnlev 
   1032   5084   johnlev 		newtime = xpv_gethrtime() + 10000; /* now + 10 us */
   1033   5084   johnlev 		while (xpv_gethrtime() < newtime)
   1034   5084   johnlev 			SMT_PAUSE();
   1035   5084   johnlev #else	/* __xpv */
   1036   3446       mrj 		int i;
   1037   3446       mrj 
   1038      0    stevel 		/*
   1039      0    stevel 		 * Artificial loop to induce delay.
   1040      0    stevel 		 */
   1041      0    stevel 		for (i = 0; i < microdata; i++)
   1042      0    stevel 			tenmicrodata = microdata;
   1043   5084   johnlev #endif	/* __xpv */
   1044      0    stevel 	}
   1045      0    stevel }
   1046    590    esolom 
   1047    590    esolom /*
   1048    590    esolom  * get_cpu_mstate() is passed an array of timestamps, NCMSTATES
   1049    590    esolom  * long, and it fills in the array with the time spent on cpu in
   1050    590    esolom  * each of the mstates, where time is returned in nsec.
   1051    590    esolom  *
   1052    590    esolom  * No guarantee is made that the returned values in times[] will
   1053    590    esolom  * monotonically increase on sequential calls, although this will
   1054    590    esolom  * be true in the long run. Any such guarantee must be handled by
   1055    590    esolom  * the caller, if needed. This can happen if we fail to account
   1056    590    esolom  * for elapsed time due to a generation counter conflict, yet we
   1057    590    esolom  * did account for it on a prior call (see below).
   1058    590    esolom  *
   1059    590    esolom  * The complication is that the cpu in question may be updating
   1060    590    esolom  * its microstate at the same time that we are reading it.
   1061    590    esolom  * Because the microstate is only updated when the CPU's state
   1062    590    esolom  * changes, the values in cpu_intracct[] can be indefinitely out
   1063    590    esolom  * of date. To determine true current values, it is necessary to
   1064    590    esolom  * compare the current time with cpu_mstate_start, and add the
   1065    590    esolom  * difference to times[cpu_mstate].
   1066    590    esolom  *
   1067    590    esolom  * This can be a problem if those values are changing out from
   1068    590    esolom  * under us. Because the code path in new_cpu_mstate() is
   1069    590    esolom  * performance critical, we have not added a lock to it. Instead,
   1070    590    esolom  * we have added a generation counter. Before beginning
   1071    590    esolom  * modifications, the counter is set to 0. After modifications,
   1072    590    esolom  * it is set to the old value plus one.
   1073    590    esolom  *
   1074    590    esolom  * get_cpu_mstate() will not consider the values of cpu_mstate
   1075    590    esolom  * and cpu_mstate_start to be usable unless the value of
   1076    590    esolom  * cpu_mstate_gen is both non-zero and unchanged, both before and
   1077    590    esolom  * after reading the mstate information. Note that we must
   1078    590    esolom  * protect against out-of-order loads around accesses to the
   1079    590    esolom  * generation counter. Also, this is a best effort approach in
   1080    590    esolom  * that we do not retry should the counter be found to have
   1081    590    esolom  * changed.
   1082    590    esolom  *
   1083    590    esolom  * cpu_intracct[] is used to identify time spent in each CPU
   1084    590    esolom  * mstate while handling interrupts. Such time should be reported
   1085    590    esolom  * against system time, and so is subtracted out from its
   1086    590    esolom  * corresponding cpu_acct[] time and added to
   1087    590    esolom  * cpu_acct[CMS_SYSTEM].
   1088    590    esolom  */
   1089    590    esolom 
   1090    590    esolom void
   1091    590    esolom get_cpu_mstate(cpu_t *cpu, hrtime_t *times)
   1092    590    esolom {
   1093    590    esolom 	int i;
   1094    590    esolom 	hrtime_t now, start;
   1095    590    esolom 	uint16_t gen;
   1096    590    esolom 	uint16_t state;
   1097    590    esolom 	hrtime_t intracct[NCMSTATES];
   1098    590    esolom 
   1099    590    esolom 	/*
   1100    590    esolom 	 * Load all volatile state under the protection of membar.
   1101    590    esolom 	 * cpu_acct[cpu_mstate] must be loaded to avoid double counting
   1102    590    esolom 	 * of (now - cpu_mstate_start) by a change in CPU mstate that
   1103    590    esolom 	 * arrives after we make our last check of cpu_mstate_gen.
   1104    590    esolom 	 */
   1105    590    esolom 
   1106    590    esolom 	now = gethrtime_unscaled();
   1107    590    esolom 	gen = cpu->cpu_mstate_gen;
   1108    590    esolom 
   1109    590    esolom 	membar_consumer();	/* guarantee load ordering */
   1110    590    esolom 	start = cpu->cpu_mstate_start;
   1111    590    esolom 	state = cpu->cpu_mstate;
   1112    590    esolom 	for (i = 0; i < NCMSTATES; i++) {
   1113    590    esolom 		intracct[i] = cpu->cpu_intracct[i];
   1114    590    esolom 		times[i] = cpu->cpu_acct[i];
   1115    590    esolom 	}
   1116    590    esolom 	membar_consumer();	/* guarantee load ordering */
   1117    590    esolom 
   1118    590    esolom 	if (gen != 0 && gen == cpu->cpu_mstate_gen && now > start)
   1119    590    esolom 		times[state] += now - start;
   1120    590    esolom 
   1121    590    esolom 	for (i = 0; i < NCMSTATES; i++) {
   1122    590    esolom 		if (i == CMS_SYSTEM)
   1123    590    esolom 			continue;
   1124    590    esolom 		times[i] -= intracct[i];
   1125    590    esolom 		if (times[i] < 0) {
   1126    590    esolom 			intracct[i] += times[i];
   1127    590    esolom 			times[i] = 0;
   1128    590    esolom 		}
   1129    590    esolom 		times[CMS_SYSTEM] += intracct[i];
   1130    590    esolom 		scalehrtime(&times[i]);
   1131    590    esolom 	}
   1132    590    esolom 	scalehrtime(&times[CMS_SYSTEM]);
   1133    590    esolom }
   1134   3446       mrj 
   1135   3446       mrj /*
   1136   3446       mrj  * This is a version of the rdmsr instruction that allows
   1137   3446       mrj  * an error code to be returned in the case of failure.
   1138   3446       mrj  */
   1139   3446       mrj int
   1140   3446       mrj checked_rdmsr(uint_t msr, uint64_t *value)
   1141   3446       mrj {
   1142   3446       mrj 	if ((x86_feature & X86_MSR) == 0)
   1143   3446       mrj 		return (ENOTSUP);
   1144   3446       mrj 	*value = rdmsr(msr);
   1145   3446       mrj 	return (0);
   1146   3446       mrj }
   1147   3446       mrj 
   1148   3446       mrj /*
   1149   3446       mrj  * This is a version of the wrmsr instruction that allows
   1150   3446       mrj  * an error code to be returned in the case of failure.
   1151   3446       mrj  */
   1152   3446       mrj int
   1153   3446       mrj checked_wrmsr(uint_t msr, uint64_t value)
   1154   3446       mrj {
   1155   3446       mrj 	if ((x86_feature & X86_MSR) == 0)
   1156   3446       mrj 		return (ENOTSUP);
   1157   3446       mrj 	wrmsr(msr, value);
   1158   3446       mrj 	return (0);
   1159   3446       mrj }
   1160   3446       mrj 
   1161   3446       mrj /*
   1162   5084   johnlev  * The mem driver's usual method of using hat_devload() to establish a
   1163   5084   johnlev  * temporary mapping will not work for foreign pages mapped into this
   1164   5084   johnlev  * domain or for the special hypervisor-provided pages.  For the foreign
   1165   5084   johnlev  * pages, we often don't know which domain owns them, so we can't ask the
   1166   5084   johnlev  * hypervisor to set up a new mapping.  For the other pages, we don't have
   1167   5084   johnlev  * a pfn, so we can't create a new PTE.  For these special cases, we do a
   1168   5084   johnlev  * direct uiomove() from the existing kernel virtual address.
   1169   3446       mrj  */
   1170   3446       mrj /*ARGSUSED*/
   1171   3446       mrj int
   1172   5084   johnlev plat_mem_do_mmio(struct uio *uio, enum uio_rw rw)
   1173   3446       mrj {
   1174   5084   johnlev #if defined(__xpv)
   1175   5084   johnlev 	void *va = (void *)(uintptr_t)uio->uio_loffset;
   1176   5084   johnlev 	off_t pageoff = uio->uio_loffset & PAGEOFFSET;
   1177   5084   johnlev 	size_t nbytes = MIN((size_t)(PAGESIZE - pageoff),
   1178   5084   johnlev 	    (size_t)uio->uio_iov->iov_len);
   1179   5084   johnlev 
   1180   5084   johnlev 	if ((rw == UIO_READ &&
   1181   5084   johnlev 	    (va == HYPERVISOR_shared_info || va == xen_info)) ||
   1182   5084   johnlev 	    (pfn_is_foreign(hat_getpfnum(kas.a_hat, va))))
   1183   5084   johnlev 		return (uiomove(va, nbytes, rw, uio));
   1184   5084   johnlev #endif
   1185   5084   johnlev 	return (ENOTSUP);
   1186   5084   johnlev }
   1187   5084   johnlev 
   1188   5084   johnlev pgcnt_t
   1189   5084   johnlev num_phys_pages()
   1190   5084   johnlev {
   1191   5084   johnlev 	pgcnt_t npages = 0;
   1192   5084   johnlev 	struct memlist *mp;
   1193   5084   johnlev 
   1194   5084   johnlev #if defined(__xpv)
   1195  10175    Stuart 	if (DOMAIN_IS_INITDOMAIN(xen_info))
   1196  10175    Stuart 		return (xpv_nr_phys_pages());
   1197   5084   johnlev #endif /* __xpv */
   1198   5084   johnlev 
   1199   5084   johnlev 	for (mp = phys_install; mp != NULL; mp = mp->next)
   1200   5084   johnlev 		npages += mp->size >> PAGESHIFT;
   1201   5084   johnlev 
   1202   5084   johnlev 	return (npages);
   1203   3446       mrj }
   1204   3446       mrj 
   1205  10843      Dave /* cpu threshold for compressed dumps */
   1206  10843      Dave #ifdef _LP64
   1207  10843      Dave uint_t dump_plat_mincpu = DUMP_PLAT_X86_64_MINCPU;
   1208  10843      Dave #else
   1209  10843      Dave uint_t dump_plat_mincpu = DUMP_PLAT_X86_32_MINCPU;
   1210  10843      Dave #endif
   1211  10843      Dave 
   1212   3446       mrj int
   1213   3446       mrj dump_plat_addr()
   1214   3446       mrj {
   1215   5084   johnlev #ifdef __xpv
   1216   5084   johnlev 	pfn_t pfn = mmu_btop(xen_info->shared_info) | PFN_IS_FOREIGN_MFN;
   1217   5084   johnlev 	mem_vtop_t mem_vtop;
   1218   5084   johnlev 	int cnt;
   1219   5084   johnlev 
   1220   5084   johnlev 	/*
   1221   5084   johnlev 	 * On the hypervisor, we want to dump the page with shared_info on it.
   1222   5084   johnlev 	 */
   1223   5084   johnlev 	if (!IN_XPV_PANIC()) {
   1224   5084   johnlev 		mem_vtop.m_as = &kas;
   1225   5084   johnlev 		mem_vtop.m_va = HYPERVISOR_shared_info;
   1226   5084   johnlev 		mem_vtop.m_pfn = pfn;
   1227   5084   johnlev 		dumpvp_write(&mem_vtop, sizeof (mem_vtop_t));
   1228   5084   johnlev 		cnt = 1;
   1229   5084   johnlev 	} else {
   1230   5084   johnlev 		cnt = dump_xpv_addr();
   1231   5084   johnlev 	}
   1232   5084   johnlev 	return (cnt);
   1233   5084   johnlev #else
   1234   3446       mrj 	return (0);
   1235   5084   johnlev #endif
   1236   3446       mrj }
   1237   3446       mrj 
   1238   3446       mrj void
   1239   3446       mrj dump_plat_pfn()
   1240   3446       mrj {
   1241   5084   johnlev #ifdef __xpv
   1242   5084   johnlev 	pfn_t pfn = mmu_btop(xen_info->shared_info) | PFN_IS_FOREIGN_MFN;
   1243   5084   johnlev 
   1244   5084   johnlev 	if (!IN_XPV_PANIC())
   1245   5084   johnlev 		dumpvp_write(&pfn, sizeof (pfn));
   1246   5084   johnlev 	else
   1247   5084   johnlev 		dump_xpv_pfn();
   1248   5084   johnlev #endif
   1249   3446       mrj }
   1250   3446       mrj 
   1251   3446       mrj /*ARGSUSED*/
   1252   3446       mrj int
   1253   3446       mrj dump_plat_data(void *dump_cbuf)
   1254   3446       mrj {
   1255   5084   johnlev #ifdef __xpv
   1256   5084   johnlev 	uint32_t csize;
   1257   5084   johnlev 	int cnt;
   1258   5084   johnlev 
   1259   5084   johnlev 	if (!IN_XPV_PANIC()) {
   1260   5084   johnlev 		csize = (uint32_t)compress(HYPERVISOR_shared_info, dump_cbuf,
   1261   5084   johnlev 		    PAGESIZE);
   1262   5084   johnlev 		dumpvp_write(&csize, sizeof (uint32_t));
   1263   5084   johnlev 		dumpvp_write(dump_cbuf, csize);
   1264   5084   johnlev 		cnt = 1;
   1265   5084   johnlev 	} else {
   1266   5084   johnlev 		cnt = dump_xpv_data(dump_cbuf);
   1267   5084   johnlev 	}
   1268   5084   johnlev 	return (cnt);
   1269   5084   johnlev #else
   1270   3446       mrj 	return (0);
   1271   5084   johnlev #endif
   1272   3446       mrj }
   1273   3939     sethg 
   1274   3939     sethg /*
   1275   3939     sethg  * Calculates a linear address, given the CS selector and PC values
   1276   3939     sethg  * by looking up the %cs selector process's LDT or the CPU's GDT.
   1277   3939     sethg  * proc->p_ldtlock must be held across this call.
   1278   3939     sethg  */
   1279   3939     sethg int
   1280   3939     sethg linear_pc(struct regs *rp, proc_t *p, caddr_t *linearp)
   1281   3939     sethg {
   1282   3939     sethg 	user_desc_t	*descrp;
   1283   3939     sethg 	caddr_t		baseaddr;
   1284   3939     sethg 	uint16_t	idx = SELTOIDX(rp->r_cs);
   1285   3939     sethg 
   1286   3939     sethg 	ASSERT(rp->r_cs <= 0xFFFF);
   1287   3939     sethg 	ASSERT(MUTEX_HELD(&p->p_ldtlock));
   1288   3939     sethg 
   1289   3939     sethg 	if (SELISLDT(rp->r_cs)) {
   1290   3939     sethg 		/*
   1291   3939     sethg 		 * Currently 64 bit processes cannot have private LDTs.
   1292   3939     sethg 		 */
   1293   3939     sethg 		ASSERT(p->p_model != DATAMODEL_LP64);
   1294   3939     sethg 
   1295   3939     sethg 		if (p->p_ldt == NULL)
   1296   3939     sethg 			return (-1);
   1297   3939     sethg 
   1298   3939     sethg 		descrp = &p->p_ldt[idx];
   1299   3939     sethg 		baseaddr = (caddr_t)(uintptr_t)USEGD_GETBASE(descrp);
   1300   3939     sethg 
   1301   3939     sethg 		/*
   1302   3939     sethg 		 * Calculate the linear address (wraparound is not only ok,
   1303   3939     sethg 		 * it's expected behavior).  The cast to uint32_t is because
   1304   3939     sethg 		 * LDT selectors are only allowed in 32-bit processes.
   1305   3939     sethg 		 */
   1306   3939     sethg 		*linearp = (caddr_t)(uintptr_t)(uint32_t)((uintptr_t)baseaddr +
   1307   3939     sethg 		    rp->r_pc);
   1308   3939     sethg 	} else {
   1309   3939     sethg #ifdef DEBUG
   1310   3939     sethg 		descrp = &CPU->cpu_gdt[idx];
   1311   3939     sethg 		baseaddr = (caddr_t)(uintptr_t)USEGD_GETBASE(descrp);
   1312   3939     sethg 		/* GDT-based descriptors' base addresses should always be 0 */
   1313   3939     sethg 		ASSERT(baseaddr == 0);
   1314   3939     sethg #endif
   1315   3939     sethg 		*linearp = (caddr_t)(uintptr_t)rp->r_pc;
   1316   3939     sethg 	}
   1317   3939     sethg 
   1318   3939     sethg 	return (0);
   1319   3939     sethg }
   1320   3939     sethg 
   1321   3939     sethg /*
   1322   3939     sethg  * The implementation of dtrace_linear_pc is similar to the that of
   1323   3939     sethg  * linear_pc, above, but here we acquire p_ldtlock before accessing
   1324   3939     sethg  * p_ldt.  This implementation is used by the pid provider; we prefix
   1325   3939     sethg  * it with "dtrace_" to avoid inducing spurious tracing events.
   1326   3939     sethg  */
   1327   3939     sethg int
   1328   3939     sethg dtrace_linear_pc(struct regs *rp, proc_t *p, caddr_t *linearp)
   1329   3939     sethg {
   1330   3939     sethg 	user_desc_t	*descrp;
   1331   3939     sethg 	caddr_t		baseaddr;
   1332   3939     sethg 	uint16_t	idx = SELTOIDX(rp->r_cs);
   1333   3939     sethg 
   1334   3939     sethg 	ASSERT(rp->r_cs <= 0xFFFF);
   1335   3939     sethg 
   1336   3939     sethg 	if (SELISLDT(rp->r_cs)) {
   1337   3939     sethg 		/*
   1338   3939     sethg 		 * Currently 64 bit processes cannot have private LDTs.
   1339   3939     sethg 		 */
   1340   3939     sethg 		ASSERT(p->p_model != DATAMODEL_LP64);
   1341   3939     sethg 
   1342   3939     sethg 		mutex_enter(&p->p_ldtlock);
   1343   3939     sethg 		if (p->p_ldt == NULL) {
   1344   3939     sethg 			mutex_exit(&p->p_ldtlock);
   1345   3939     sethg 			return (-1);
   1346   3939     sethg 		}
   1347   3939     sethg 		descrp = &p->p_ldt[idx];
   1348   3939     sethg 		baseaddr = (caddr_t)(uintptr_t)USEGD_GETBASE(descrp);
   1349   3939     sethg 		mutex_exit(&p->p_ldtlock);
   1350   3939     sethg 
   1351   3939     sethg 		/*
   1352   3939     sethg 		 * Calculate the linear address (wraparound is not only ok,
   1353   3939     sethg 		 * it's expected behavior).  The cast to uint32_t is because
   1354   3939     sethg 		 * LDT selectors are only allowed in 32-bit processes.
   1355   3939     sethg 		 */
   1356   3939     sethg 		*linearp = (caddr_t)(uintptr_t)(uint32_t)((uintptr_t)baseaddr +
   1357   3939     sethg 		    rp->r_pc);
   1358   3939     sethg 	} else {
   1359   3939     sethg #ifdef DEBUG
   1360   3939     sethg 		descrp = &CPU->cpu_gdt[idx];
   1361   3939     sethg 		baseaddr = (caddr_t)(uintptr_t)USEGD_GETBASE(descrp);
   1362   3939     sethg 		/* GDT-based descriptors' base addresses should always be 0 */
   1363   3939     sethg 		ASSERT(baseaddr == 0);
   1364   3939     sethg #endif
   1365   3939     sethg 		*linearp = (caddr_t)(uintptr_t)rp->r_pc;
   1366   3939     sethg 	}
   1367   3939     sethg 
   1368   3939     sethg 	return (0);
   1369   3939     sethg }
   1370  11066    rafael 
   1371  11066    rafael /*
   1372  11066    rafael  * We need to post a soft interrupt to reprogram the lbolt cyclic when
   1373  11066    rafael  * switching from event to cyclic driven lbolt. The following code adds
   1374  11066    rafael  * and posts the softint for x86.
   1375  11066    rafael  */
   1376  11066    rafael static ddi_softint_hdl_impl_t lbolt_softint_hdl =
   1377  11066    rafael 	{0, NULL, NULL, NULL, 0, NULL, NULL, NULL};
   1378  11066    rafael 
   1379  11066    rafael void
   1380  11066    rafael lbolt_softint_add(void)
   1381  11066    rafael {
   1382  11066    rafael 	(void) add_avsoftintr((void *)&lbolt_softint_hdl, LOCK_LEVEL,
   1383  11066    rafael 	    (avfunc)lbolt_ev_to_cyclic, "lbolt_ev_to_cyclic", NULL, NULL);
   1384  11066    rafael }
   1385  11066    rafael 
   1386  11066    rafael void
   1387  11066    rafael lbolt_softint_post(void)
   1388  11066    rafael {
   1389  11066    rafael 	(*setsoftint)(CBE_LOCK_PIL, lbolt_softint_hdl.ih_pending);
   1390  11066    rafael }
   1391