Home | History | Annotate | Download | only in os
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #include <sys/types.h>
     27 #include <sys/param.h>
     28 #include <sys/t_lock.h>
     29 #include <sys/thread.h>
     30 #include <sys/cpuvar.h>
     31 #include <sys/x_call.h>
     32 #include <sys/xc_levels.h>
     33 #include <sys/cpu.h>
     34 #include <sys/psw.h>
     35 #include <sys/sunddi.h>
     36 #include <sys/debug.h>
     37 #include <sys/systm.h>
     38 #include <sys/archsystm.h>
     39 #include <sys/machsystm.h>
     40 #include <sys/mutex_impl.h>
     41 #include <sys/stack.h>
     42 #include <sys/promif.h>
     43 #include <sys/x86_archext.h>
     44 
     45 /*
     46  * Implementation for cross-processor calls via interprocessor interrupts
     47  *
     48  * This implementation uses a message passing architecture to allow multiple
     49  * concurrent cross calls to be in flight at any given time. We use the cmpxchg
     50  * instruction, aka casptr(), to implement simple efficient work queues for
     51  * message passing between CPUs with almost no need for regular locking.
     52  * See xc_extract() and xc_insert() below.
     53  *
     54  * The general idea is that initiating a cross call means putting a message
     55  * on a target(s) CPU's work queue. Any synchronization is handled by passing
     56  * the message back and forth between initiator and target(s).
     57  *
     58  * Every CPU has xc_work_cnt, which indicates it has messages to process.
     59  * This value is incremented as message traffic is initiated and decremented
     60  * with every message that finishes all processing.
     61  *
     62  * The code needs no mfence or other membar_*() calls. The uses of
     63  * casptr(), cas32() and atomic_dec_32() for the message passing are
     64  * implemented with LOCK prefix instructions which are equivalent to mfence.
     65  *
     66  * One interesting aspect of this implmentation is that it allows 2 or more
     67  * CPUs to initiate cross calls to intersecting sets of CPUs at the same time.
     68  * The cross call processing by the CPUs will happen in any order with only
     69  * a guarantee, for xc_call() and xc_sync(), that an initiator won't return
     70  * from cross calls before all slaves have invoked the function.
     71  *
     72  * The reason for this asynchronous approach is to allow for fast global
     73  * TLB shootdowns. If all CPUs, say N, tried to do a global TLB invalidation
     74  * on a different Virtual Address at the same time. The old code required
     75  * N squared IPIs. With this method, depending on timing, it could happen
     76  * with just N IPIs.
     77  */
     78 
     79 /*
     80  * The default is to not enable collecting counts of IPI information, since
     81  * the updating of shared cachelines could cause excess bus traffic.
     82  */
     83 uint_t xc_collect_enable = 0;
     84 uint64_t xc_total_cnt = 0;	/* total #IPIs sent for cross calls */
     85 uint64_t xc_multi_cnt = 0;	/* # times we piggy backed on another IPI */
     86 
     87 /*
     88  * Values for message states. Here are the normal transitions. A transition
     89  * of "->" happens in the slave cpu and "=>" happens in the master cpu as
     90  * the messages are passed back and forth.
     91  *
     92  * FREE => ASYNC ->                       DONE => FREE
     93  * FREE => CALL ->                        DONE => FREE
     94  * FREE => SYNC -> WAITING => RELEASED -> DONE => FREE
     95  *
     96  * The interesing one above is ASYNC. You might ask, why not go directly
     97  * to FREE, instead of DONE. If it did that, it might be possible to exhaust
     98  * the master's xc_free list if a master can generate ASYNC messages faster
     99  * then the slave can process them. That could be handled with more complicated
    100  * handling. However since nothing important uses ASYNC, I've not bothered.
    101  */
    102 #define	XC_MSG_FREE	(0)	/* msg in xc_free queue */
    103 #define	XC_MSG_ASYNC	(1)	/* msg in slave xc_msgbox */
    104 #define	XC_MSG_CALL	(2)	/* msg in slave xc_msgbox */
    105 #define	XC_MSG_SYNC	(3)	/* msg in slave xc_msgbox */
    106 #define	XC_MSG_WAITING	(4)	/* msg in master xc_msgbox or xc_waiters */
    107 #define	XC_MSG_RELEASED	(5)	/* msg in slave xc_msgbox */
    108 #define	XC_MSG_DONE	(6)	/* msg in master xc_msgbox */
    109 
    110 /*
    111  * We allow for one high priority message at a time to happen in the system.
    112  * This is used for panic, kmdb, etc., so no locking is done.
    113  */
    114 static volatile cpuset_t xc_priority_set_store;
    115 static volatile ulong_t *xc_priority_set = CPUSET2BV(xc_priority_set_store);
    116 static xc_data_t xc_priority_data;
    117 
    118 /*
    119  * Wrappers to avoid C compiler warnings due to volatile. The atomic bit
    120  * operations don't accept volatile bit vectors - which is a bit silly.
    121  */
    122 #define	XC_BT_SET(vector, b)	BT_ATOMIC_SET((ulong_t *)(vector), (b))
    123 #define	XC_BT_CLEAR(vector, b)	BT_ATOMIC_CLEAR((ulong_t *)(vector), (b))
    124 
    125 /*
    126  * Decrement a CPU's work count
    127  */
    128 static void
    129 xc_decrement(struct machcpu *mcpu)
    130 {
    131 	atomic_dec_32(&mcpu->xc_work_cnt);
    132 }
    133 
    134 /*
    135  * Increment a CPU's work count and return the old value
    136  */
    137 static int
    138 xc_increment(struct machcpu *mcpu)
    139 {
    140 	int old;
    141 	do {
    142 		old = mcpu->xc_work_cnt;
    143 	} while (cas32((uint32_t *)&mcpu->xc_work_cnt, old, old + 1) != old);
    144 	return (old);
    145 }
    146 
    147 /*
    148  * Put a message into a queue. The insertion is atomic no matter
    149  * how many different inserts/extracts to the same queue happen.
    150  */
    151 static void
    152 xc_insert(void *queue, xc_msg_t *msg)
    153 {
    154 	xc_msg_t *old_head;
    155 
    156 	/*
    157 	 * FREE messages should only ever be getting inserted into
    158 	 * the xc_master CPUs xc_free queue.
    159 	 */
    160 	ASSERT(msg->xc_command != XC_MSG_FREE ||
    161 	    cpu[msg->xc_master] == NULL || /* possible only during init */
    162 	    queue == &cpu[msg->xc_master]->cpu_m.xc_free);
    163 
    164 	do {
    165 		old_head = (xc_msg_t *)*(volatile xc_msg_t **)queue;
    166 		msg->xc_next = old_head;
    167 	} while (casptr(queue, old_head, msg) != old_head);
    168 }
    169 
    170 /*
    171  * Extract a message from a queue. The extraction is atomic only
    172  * when just one thread does extractions from the queue.
    173  * If the queue is empty, NULL is returned.
    174  */
    175 static xc_msg_t *
    176 xc_extract(xc_msg_t **queue)
    177 {
    178 	xc_msg_t *old_head;
    179 
    180 	do {
    181 		old_head = (xc_msg_t *)*(volatile xc_msg_t **)queue;
    182 		if (old_head == NULL)
    183 			return (old_head);
    184 	} while (casptr(queue, old_head, old_head->xc_next) != old_head);
    185 	old_head->xc_next = NULL;
    186 	return (old_head);
    187 }
    188 
    189 
    190 /*
    191  * Initialize the machcpu fields used for cross calls
    192  */
    193 static uint_t xc_initialized = 0;
    194 void
    195 xc_init_cpu(struct cpu *cpup)
    196 {
    197 	xc_msg_t *msg;
    198 	int c;
    199 
    200 	/*
    201 	 * add a new msg to each existing CPU's free list, as well as one for
    202 	 * my list for each of them. ncpus has an inconsistent value when this
    203 	 * function is called, so use cpup->cpu_id.
    204 	 */
    205 	for (c = 0; c < cpup->cpu_id; ++c) {
    206 		if (cpu[c] == NULL)
    207 			continue;
    208 		msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
    209 		msg->xc_command = XC_MSG_FREE;
    210 		msg->xc_master = c;
    211 		xc_insert(&cpu[c]->cpu_m.xc_free, msg);
    212 
    213 		msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
    214 		msg->xc_command = XC_MSG_FREE;
    215 		msg->xc_master = cpup->cpu_id;
    216 		xc_insert(&cpup->cpu_m.xc_free, msg);
    217 	}
    218 
    219 	/*
    220 	 * Add one for self messages
    221 	 */
    222 	msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
    223 	msg->xc_command = XC_MSG_FREE;
    224 	msg->xc_master = cpup->cpu_id;
    225 	xc_insert(&cpup->cpu_m.xc_free, msg);
    226 
    227 	if (!xc_initialized)
    228 		xc_initialized = 1;
    229 }
    230 
    231 /*
    232  * X-call message processing routine. Note that this is used by both
    233  * senders and recipients of messages.
    234  *
    235  * We're protected against changing CPUs by either being in a high-priority
    236  * interrupt, having preemption disabled or by having a raised SPL.
    237  */
    238 /*ARGSUSED*/
    239 uint_t
    240 xc_serv(caddr_t arg1, caddr_t arg2)
    241 {
    242 	struct machcpu *mcpup = &(CPU->cpu_m);
    243 	xc_msg_t *msg;
    244 	xc_data_t *data;
    245 	xc_msg_t *xc_waiters = NULL;
    246 	uint32_t num_waiting = 0;
    247 	xc_func_t func;
    248 	xc_arg_t a1;
    249 	xc_arg_t a2;
    250 	xc_arg_t a3;
    251 	uint_t rc = DDI_INTR_UNCLAIMED;
    252 
    253 	while (mcpup->xc_work_cnt != 0) {
    254 		rc = DDI_INTR_CLAIMED;
    255 
    256 		/*
    257 		 * We may have to wait for a message to arrive.
    258 		 */
    259 		for (msg = NULL; msg == NULL;
    260 		    msg = xc_extract(&mcpup->xc_msgbox)) {
    261 
    262 			/*
    263 			 * Alway check for and handle a priority message.
    264 			 */
    265 			if (BT_TEST(xc_priority_set, CPU->cpu_id)) {
    266 				func = xc_priority_data.xc_func;
    267 				a1 = xc_priority_data.xc_a1;
    268 				a2 = xc_priority_data.xc_a2;
    269 				a3 = xc_priority_data.xc_a3;
    270 				XC_BT_CLEAR(xc_priority_set, CPU->cpu_id);
    271 				xc_decrement(mcpup);
    272 				func(a1, a2, a3);
    273 				if (mcpup->xc_work_cnt == 0)
    274 					return (rc);
    275 			}
    276 
    277 			/*
    278 			 * wait for a message to arrive
    279 			 */
    280 			SMT_PAUSE();
    281 		}
    282 
    283 
    284 		/*
    285 		 * process the message
    286 		 */
    287 		switch (msg->xc_command) {
    288 
    289 		/*
    290 		 * ASYNC gives back the message immediately, then we do the
    291 		 * function and return with no more waiting.
    292 		 */
    293 		case XC_MSG_ASYNC:
    294 			data = &cpu[msg->xc_master]->cpu_m.xc_data;
    295 			func = data->xc_func;
    296 			a1 = data->xc_a1;
    297 			a2 = data->xc_a2;
    298 			a3 = data->xc_a3;
    299 			msg->xc_command = XC_MSG_DONE;
    300 			xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
    301 			if (func != NULL)
    302 				(void) (*func)(a1, a2, a3);
    303 			xc_decrement(mcpup);
    304 			break;
    305 
    306 		/*
    307 		 * SYNC messages do the call, then send it back to the master
    308 		 * in WAITING mode
    309 		 */
    310 		case XC_MSG_SYNC:
    311 			data = &cpu[msg->xc_master]->cpu_m.xc_data;
    312 			if (data->xc_func != NULL)
    313 				(void) (*data->xc_func)(data->xc_a1,
    314 				    data->xc_a2, data->xc_a3);
    315 			msg->xc_command = XC_MSG_WAITING;
    316 			xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
    317 			break;
    318 
    319 		/*
    320 		 * WAITING messsages are collected by the master until all
    321 		 * have arrived. Once all arrive, we release them back to
    322 		 * the slaves
    323 		 */
    324 		case XC_MSG_WAITING:
    325 			xc_insert(&xc_waiters, msg);
    326 			if (++num_waiting < mcpup->xc_wait_cnt)
    327 				break;
    328 			while ((msg = xc_extract(&xc_waiters)) != NULL) {
    329 				msg->xc_command = XC_MSG_RELEASED;
    330 				xc_insert(&cpu[msg->xc_slave]->cpu_m.xc_msgbox,
    331 				    msg);
    332 				--num_waiting;
    333 			}
    334 			if (num_waiting != 0)
    335 				panic("wrong number waiting");
    336 			mcpup->xc_wait_cnt = 0;
    337 			break;
    338 
    339 		/*
    340 		 * CALL messages do the function and then, like RELEASE,
    341 		 * send the message is back to master as DONE.
    342 		 */
    343 		case XC_MSG_CALL:
    344 			data = &cpu[msg->xc_master]->cpu_m.xc_data;
    345 			if (data->xc_func != NULL)
    346 				(void) (*data->xc_func)(data->xc_a1,
    347 				    data->xc_a2, data->xc_a3);
    348 			/*FALLTHROUGH*/
    349 		case XC_MSG_RELEASED:
    350 			msg->xc_command = XC_MSG_DONE;
    351 			xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
    352 			xc_decrement(mcpup);
    353 			break;
    354 
    355 		/*
    356 		 * DONE means a slave has completely finished up.
    357 		 * Once we collect all the DONE messages, we'll exit
    358 		 * processing too.
    359 		 */
    360 		case XC_MSG_DONE:
    361 			msg->xc_command = XC_MSG_FREE;
    362 			xc_insert(&mcpup->xc_free, msg);
    363 			xc_decrement(mcpup);
    364 			break;
    365 
    366 		case XC_MSG_FREE:
    367 			panic("free message 0x%p in msgbox", (void *)msg);
    368 			break;
    369 
    370 		default:
    371 			panic("bad message 0x%p in msgbox", (void *)msg);
    372 			break;
    373 		}
    374 	}
    375 	return (rc);
    376 }
    377 
    378 /*
    379  * Initiate cross call processing.
    380  */
    381 static void
    382 xc_common(
    383 	xc_func_t func,
    384 	xc_arg_t arg1,
    385 	xc_arg_t arg2,
    386 	xc_arg_t arg3,
    387 	ulong_t *set,
    388 	uint_t command)
    389 {
    390 	int c;
    391 	struct cpu *cpup;
    392 	xc_msg_t *msg;
    393 	xc_data_t *data;
    394 	int cnt;
    395 	int save_spl;
    396 
    397 	if (!xc_initialized) {
    398 		if (BT_TEST(set, CPU->cpu_id) && (CPU->cpu_flags & CPU_READY) &&
    399 		    func != NULL)
    400 			(void) (*func)(arg1, arg2, arg3);
    401 		return;
    402 	}
    403 
    404 	save_spl = splr(ipltospl(XC_HI_PIL));
    405 
    406 	/*
    407 	 * fill in cross call data
    408 	 */
    409 	data = &CPU->cpu_m.xc_data;
    410 	data->xc_func = func;
    411 	data->xc_a1 = arg1;
    412 	data->xc_a2 = arg2;
    413 	data->xc_a3 = arg3;
    414 
    415 	/*
    416 	 * Post messages to all CPUs involved that are CPU_READY
    417 	 */
    418 	CPU->cpu_m.xc_wait_cnt = 0;
    419 	for (c = 0; c < ncpus; ++c) {
    420 		if (!BT_TEST(set, c))
    421 			continue;
    422 		cpup = cpu[c];
    423 		if (cpup == NULL || !(cpup->cpu_flags & CPU_READY))
    424 			continue;
    425 
    426 		/*
    427 		 * Fill out a new message.
    428 		 */
    429 		msg = xc_extract(&CPU->cpu_m.xc_free);
    430 		if (msg == NULL)
    431 			panic("Ran out of free xc_msg_t's");
    432 		msg->xc_command = command;
    433 		if (msg->xc_master != CPU->cpu_id)
    434 			panic("msg %p has wrong xc_master", (void *)msg);
    435 		msg->xc_slave = c;
    436 
    437 		/*
    438 		 * Increment my work count for all messages that I'll
    439 		 * transition from DONE to FREE.
    440 		 * Also remember how many XC_MSG_WAITINGs to look for
    441 		 */
    442 		(void) xc_increment(&CPU->cpu_m);
    443 		if (command == XC_MSG_SYNC)
    444 			++CPU->cpu_m.xc_wait_cnt;
    445 
    446 		/*
    447 		 * Increment the target CPU work count then insert the message
    448 		 * in the target msgbox. If I post the first bit of work
    449 		 * for the target to do, send an IPI to the target CPU.
    450 		 */
    451 		cnt = xc_increment(&cpup->cpu_m);
    452 		xc_insert(&cpup->cpu_m.xc_msgbox, msg);
    453 		if (cpup != CPU) {
    454 			if (cnt == 0) {
    455 				CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
    456 				send_dirint(c, XC_HI_PIL);
    457 				if (xc_collect_enable)
    458 					++xc_total_cnt;
    459 			} else if (xc_collect_enable) {
    460 				++xc_multi_cnt;
    461 			}
    462 		}
    463 	}
    464 
    465 	/*
    466 	 * Now drop into the message handler until all work is done
    467 	 */
    468 	(void) xc_serv(NULL, NULL);
    469 	splx(save_spl);
    470 }
    471 
    472 /*
    473  * Push out a priority cross call.
    474  */
    475 static void
    476 xc_priority_common(
    477 	xc_func_t func,
    478 	xc_arg_t arg1,
    479 	xc_arg_t arg2,
    480 	xc_arg_t arg3,
    481 	ulong_t *set)
    482 {
    483 	int i;
    484 	int c;
    485 	struct cpu *cpup;
    486 
    487 	/*
    488 	 * Wait briefly for any previous xc_priority to have finished.
    489 	 */
    490 	for (c = 0; c < ncpus; ++c) {
    491 		cpup = cpu[c];
    492 		if (cpup == NULL || !(cpup->cpu_flags & CPU_READY))
    493 			continue;
    494 
    495 		/*
    496 		 * The value of 40000 here is from old kernel code. It
    497 		 * really should be changed to some time based value, since
    498 		 * under a hypervisor, there's no guarantee a remote CPU
    499 		 * is even scheduled.
    500 		 */
    501 		for (i = 0; BT_TEST(xc_priority_set, c) && i < 40000; ++i)
    502 			SMT_PAUSE();
    503 
    504 		/*
    505 		 * Some CPU did not respond to a previous priority request. It's
    506 		 * probably deadlocked with interrupts blocked or some such
    507 		 * problem. We'll just erase the previous request - which was
    508 		 * most likely a kmdb_enter that has already expired - and plow
    509 		 * ahead.
    510 		 */
    511 		if (BT_TEST(xc_priority_set, c)) {
    512 			XC_BT_CLEAR(xc_priority_set, c);
    513 			if (cpup->cpu_m.xc_work_cnt > 0)
    514 				xc_decrement(&cpup->cpu_m);
    515 		}
    516 	}
    517 
    518 	/*
    519 	 * fill in cross call data
    520 	 */
    521 	xc_priority_data.xc_func = func;
    522 	xc_priority_data.xc_a1 = arg1;
    523 	xc_priority_data.xc_a2 = arg2;
    524 	xc_priority_data.xc_a3 = arg3;
    525 
    526 	/*
    527 	 * Post messages to all CPUs involved that are CPU_READY
    528 	 * We'll always IPI, plus bang on the xc_msgbox for i86_mwait()
    529 	 */
    530 	for (c = 0; c < ncpus; ++c) {
    531 		if (!BT_TEST(set, c))
    532 			continue;
    533 		cpup = cpu[c];
    534 		if (cpup == NULL || !(cpup->cpu_flags & CPU_READY) ||
    535 		    cpup == CPU)
    536 			continue;
    537 		(void) xc_increment(&cpup->cpu_m);
    538 		XC_BT_SET(xc_priority_set, c);
    539 		send_dirint(c, XC_HI_PIL);
    540 		for (i = 0; i < 10; ++i) {
    541 			(void) casptr(&cpup->cpu_m.xc_msgbox,
    542 			    cpup->cpu_m.xc_msgbox, cpup->cpu_m.xc_msgbox);
    543 		}
    544 	}
    545 }
    546 
    547 /*
    548  * Do cross call to all other CPUs with absolutely no waiting or handshaking.
    549  * This should only be used for extraordinary operations, like panic(), which
    550  * need to work, in some fashion, in a not completely functional system.
    551  * All other uses that want minimal waiting should use xc_call_nowait().
    552  */
    553 void
    554 xc_priority(
    555 	xc_arg_t arg1,
    556 	xc_arg_t arg2,
    557 	xc_arg_t arg3,
    558 	ulong_t *set,
    559 	xc_func_t func)
    560 {
    561 	extern int IGNORE_KERNEL_PREEMPTION;
    562 	int save_spl = splr(ipltospl(XC_HI_PIL));
    563 	int save_kernel_preemption = IGNORE_KERNEL_PREEMPTION;
    564 
    565 	IGNORE_KERNEL_PREEMPTION = 1;
    566 	xc_priority_common((xc_func_t)func, arg1, arg2, arg3, set);
    567 	IGNORE_KERNEL_PREEMPTION = save_kernel_preemption;
    568 	splx(save_spl);
    569 }
    570 
    571 /*
    572  * Wrapper for kmdb to capture other CPUs, causing them to enter the debugger.
    573  */
    574 void
    575 kdi_xc_others(int this_cpu, void (*func)(void))
    576 {
    577 	extern int IGNORE_KERNEL_PREEMPTION;
    578 	int save_kernel_preemption;
    579 	cpuset_t set;
    580 
    581 	if (!xc_initialized)
    582 		return;
    583 
    584 	save_kernel_preemption = IGNORE_KERNEL_PREEMPTION;
    585 	IGNORE_KERNEL_PREEMPTION = 1;
    586 	CPUSET_ALL_BUT(set, this_cpu);
    587 	xc_priority_common((xc_func_t)func, 0, 0, 0, CPUSET2BV(set));
    588 	IGNORE_KERNEL_PREEMPTION = save_kernel_preemption;
    589 }
    590 
    591 
    592 
    593 /*
    594  * Invoke function on specified processors. Remotes may continue after
    595  * service with no waiting. xc_call_nowait() may return immediately too.
    596  */
    597 void
    598 xc_call_nowait(
    599 	xc_arg_t arg1,
    600 	xc_arg_t arg2,
    601 	xc_arg_t arg3,
    602 	ulong_t *set,
    603 	xc_func_t func)
    604 {
    605 	xc_common(func, arg1, arg2, arg3, set, XC_MSG_ASYNC);
    606 }
    607 
    608 /*
    609  * Invoke function on specified processors. Remotes may continue after
    610  * service with no waiting. xc_call() returns only after remotes have finished.
    611  */
    612 void
    613 xc_call(
    614 	xc_arg_t arg1,
    615 	xc_arg_t arg2,
    616 	xc_arg_t arg3,
    617 	ulong_t *set,
    618 	xc_func_t func)
    619 {
    620 	xc_common(func, arg1, arg2, arg3, set, XC_MSG_CALL);
    621 }
    622 
    623 /*
    624  * Invoke function on specified processors. Remotes wait until all have
    625  * finished. xc_sync() also waits until all remotes have finished.
    626  */
    627 void
    628 xc_sync(
    629 	xc_arg_t arg1,
    630 	xc_arg_t arg2,
    631 	xc_arg_t arg3,
    632 	ulong_t *set,
    633 	xc_func_t func)
    634 {
    635 	xc_common(func, arg1, arg2, arg3, set, XC_MSG_SYNC);
    636 }
    637