Home | History | Annotate | Download | only in portfs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     28 
     29 #include <sys/types.h>
     30 #include <sys/systm.h>
     31 #include <sys/cred.h>
     32 #include <sys/modctl.h>
     33 #include <sys/vfs.h>
     34 #include <sys/vfs_opreg.h>
     35 #include <sys/sysmacros.h>
     36 #include <sys/cmn_err.h>
     37 #include <sys/stat.h>
     38 #include <sys/errno.h>
     39 #include <sys/kmem.h>
     40 #include <sys/file.h>
     41 #include <sys/kstat.h>
     42 #include <sys/port_impl.h>
     43 #include <sys/task.h>
     44 #include <sys/project.h>
     45 
     46 /*
     47  * Event Ports can be shared across threads or across processes.
     48  * Every thread/process can use an own event port or a group of them
     49  * can use a single port. A major request was also to get the ability
     50  * to submit user-defined events to a port. The idea of the
     51  * user-defined events is to use the event ports for communication between
     52  * threads/processes (like message queues). User defined-events are queued
     53  * in a port with the same priority as other event types.
     54  *
     55  * Events are delivered only once. The thread/process which is waiting
     56  * for events with the "highest priority" (priority here is related to the
     57  * internal strategy to wakeup waiting threads) will retrieve the event,
     58  * all other threads/processes will not be notified. There is also
     59  * the requirement to have events which should be submitted immediately
     60  * to all "waiting" threads. That is the main task of the alert event.
     61  * The alert event is submitted by the application to a port. The port
     62  * changes from a standard mode to the alert mode. Now all waiting threads
     63  * will be awaken immediately and they will return with the alert event.
     64  * Threads trying to retrieve events from a port in alert mode will
     65  * return immediately with the alert event.
     66  *
     67  *
     68  * An event port is like a kernel queue, which accept events submitted from
     69  * user level as well as events submitted from kernel sub-systems. Sub-systems
     70  * able to submit events to a port are the so-called "event sources".
     71  * Current event sources:
     72  * PORT_SOURCE_AIO	 : events submitted per transaction completion from
     73  *			   POSIX-I/O framework.
     74  * PORT_SOURCE_TIMER	 : events submitted when a timer fires
     75  *			   (see timer_create(3RT)).
     76  * PORT_SOURCE_FD	 : events submitted per file descriptor (see poll(2)).
     77  * PORT_SOURCE_ALERT	 : events submitted from user. This is not really a
     78  *			   single event, this is actually a port mode
     79  *			   (see port_alert(3c)).
     80  * PORT_SOURCE_USER	 : events submitted by applications with
     81  *			   port_send(3c) or port_sendn(3c).
     82  * PORT_SOURCE_FILE	 : events submitted per file being watched for file
     83  *			   change events  (see port_create(3c).
     84  *
     85  * There is a user API implemented in the libc library as well as a
     86  * kernel API implemented in port_subr.c in genunix.
     87  * The available user API functions are:
     88  * port_create() : create a port as a file descriptor of portfs file system
     89  *		   The standard close(2) function closes a port.
     90  * port_associate() : associate a file descriptor with a port to be able to
     91  *		      retrieve events from that file descriptor.
     92  * port_dissociate(): remove the association of a file descriptor with a port.
     93  * port_alert()	 : set/unset a port in alert mode
     94  * port_send()	 : send an event of type PORT_SOURCE_USER to a port
     95  * port_sendn()	 : send an event of type PORT_SOURCE_USER to a list of ports
     96  * port_get()	 : retrieve a single event from a port
     97  * port_getn()	 : retrieve a list of events from a port
     98  *
     99  * The available kernel API functions are:
    100  * port_allocate_event(): allocate an event slot/structure of/from a port
    101  * port_init_event()    : set event data in the event structure
    102  * port_send_event()    : send event to a port
    103  * port_free_event()    : deliver allocated slot/structure back to a port
    104  * port_associate_ksource(): associate a kernel event source with a port
    105  * port_dissociate_ksource(): dissociate a kernel event source from a port
    106  *
    107  * The libc implementation consists of small functions which pass the
    108  * arguments to the kernel using the "portfs" system call. It means, all the
    109  * synchronisation work is being done in the kernel. The "portfs" system
    110  * call loads the portfs file system into the kernel.
    111  *
    112  * PORT CREATION
    113  * The first function to be used is port_create() which internally creates
    114  * a vnode and a portfs node. The portfs node is represented by the port_t
    115  * structure, which again includes all the data necessary to control a port.
    116  * port_create() returns a file descriptor, which needs to be used in almost
    117  * all other event port functions.
    118  * The maximum number of ports per system is controlled by the resource
    119  * control: project:port-max-ids.
    120  *
    121  * EVENT GENERATION
    122  * The second step is the triggering of events, which could be sent to a port.
    123  * Every event source implements an own method to generate events for a port:
    124  * PORT_SOURCE_AIO:
    125  * 	The sigevent structure of the standard POSIX-IO functions
    126  * 	was extended by an additional notification type.
    127  * 	Standard notification types:
    128  * 	SIGEV_NONE, SIGEV_SIGNAL and SIGEV_THREAD
    129  * 	Event ports introduced now SIGEV_PORT.
    130  * 	The notification type SIGEV_PORT specifies that a structure
    131  * 	of type port_notify_t has to be attached to the sigev_value.
    132  * 	The port_notify_t structure contains the event port file
    133  * 	descriptor and a user-defined pointer.
    134  * 	Internally the AIO implementation will use the kernel API
    135  * 	functions to allocate an event port slot per transaction (aiocb)
    136  * 	and sent the event to the port as soon as the transaction completes.
    137  * 	All the events submitted per transaction are of type
    138  * 	PORT_SOURCE_AIO.
    139  * PORT_SOURCE_TIMER:
    140  * 	The timer_create() function uses the same method as the
    141  * 	PORT_SOURCE_AIO event source. It also uses the sigevent structure
    142  * 	to deliver the port information.
    143  * 	Internally the timer code will allocate a single event slot/struct
    144  * 	per timer and it will send the timer event as soon as the timer
    145  * 	fires. If the timer-fired event is not delivered to the application
    146  * 	before the next period elapsed, then an overrun counter will be
    147  * 	incremented. The timer event source uses a callback function to
    148  * 	detect the delivery of the event to the application. At that time
    149  * 	the timer callback function will update the event overrun counter.
    150  * PORT_SOURCE_FD:
    151  * 	This event source uses the port_associate() function to allocate
    152  * 	an event slot/struct from a port. The application defines in the
    153  * 	events argument of port_associate() the type of events which it is
    154  * 	interested on.
    155  * 	The internal pollwakeup() function is used by all the file
    156  * 	systems --which are supporting the VOP_POLL() interface- to notify
    157  * 	the upper layer (poll(2), devpoll(7d) and now event ports) about
    158  * 	the event triggered (see valid events in poll(2)).
    159  * 	The pollwakeup() function forwards the event to the layer registered
    160  * 	to receive the current event.
    161  * 	The port_dissociate() function can be used to free the allocated
    162  * 	event slot from the port. Anyway, file descriptors deliver events
    163  * 	only one time and remain deactivated until the application
    164  * 	reactivates the association of a file descriptor with port_associate().
    165  * 	If an associated file descriptor is closed then the file descriptor
    166  * 	will be dissociated automatically from the port.
    167  *
    168  * PORT_SOURCE_ALERT:
    169  * 	This event type is generated when the port was previously set in
    170  * 	alert mode using the port_alert() function.
    171  * 	A single alert event is delivered to every thread which tries to
    172  * 	retrieve events from a port.
    173  * PORT_SOURCE_USER:
    174  * 	This type of event is generated from user level using the port_send()
    175  * 	function to send a user event to a port or the port_sendn() function
    176  * 	to send an event to a list of ports.
    177  * PORT_SOURCE_FILE:
    178  *	This event source uses the port_associate() interface to register
    179  *	a file to be monitored for changes. The file name that needs to be
    180  *	monitored is specified in the file_obj_t structure, a pointer to which
    181  *	is passed as an argument. The event types to be monitored are specified
    182  *	in the events argument.
    183  *	A file events monitor is represented internal per port per object
    184  *	address(the file_obj_t pointer). Which means there can be multiple
    185  *	watches registered on the same file using different file_obj_t
    186  *	structure pointer. With the help of the	FEM(File Event Monitoring)
    187  *	hooks, the file's vnode ops are intercepted and relevant events
    188  *	delivered. The port_dissociate() function is used to de-register a
    189  *	file events monitor on a file. When the specified file is
    190  *	removed/renamed, the file events watch/monitor is automatically
    191  *	removed.
    192  *
    193  * EVENT DELIVERY / RETRIEVING EVENTS
    194  * Events remain in the port queue until:
    195  * - the application uses port_get() or port_getn() to retrieve events,
    196  * - the event source cancel the event,
    197  * - the event port is closed or
    198  * - the process exits.
    199  * The maximal number of events in a port queue is the maximal number
    200  * of event slots/structures which can be allocated by event sources.
    201  * The allocation of event slots/structures is controlled by the resource
    202  * control: process.port-max-events.
    203  * The port_get() function retrieves a single event and the port_getn()
    204  * function retrieves a list of events.
    205  * Events are classified as shareable and non-shareable events across processes.
    206  * Non-shareable events are invisible for the port_get(n)() functions of
    207  * processes other than the owner of the event.
    208  *    Shareable event types are:
    209  *    PORT_SOURCE_USER events
    210  * 	This type of event is unconditionally shareable and without
    211  * 	limitations. If the parent process sends a user event and closes
    212  * 	the port afterwards, the event remains in the port and the child
    213  * 	process will still be able to retrieve the user event.
    214  *    PORT_SOURCE_ALERT events
    215  * 	This type of event is shareable between processes.
    216  * 	Limitation:	The alert mode of the port is removed if the owner
    217  * 			(process which set the port in alert mode) of the
    218  * 			alert event closes the port.
    219  *    PORT_SOURCE_FD events
    220  * 	This type of event is conditional shareable between processes.
    221  * 	After fork(2) all forked file descriptors are shareable between
    222  * 	the processes. The child process is allowed to retrieve events
    223  * 	from the associated file descriptors and it can also re-associate
    224  * 	the fd with the port.
    225  * 	Limitations:	The child process is not allowed to dissociate
    226  * 			the file descriptor from the port. Only the
    227  * 			owner (process) of the association is allowed to
    228  * 			dissociate the file descriptor from the port.
    229  * 			If the owner of the association closes the port
    230  * 			the association will be removed.
    231  *    PORT_SOURCE_AIO events
    232  * 	This type of event is not shareable between processes.
    233  *    PORT_SOURCE_TIMER events
    234  * 	This type of event is not shareable between processes.
    235  *    PORT_SOURCE_FILE events
    236  * 	This type of event is not shareable between processes.
    237  *
    238  * FORK BEHAVIOUR
    239  * On fork(2) the child process inherits all opened file descriptors from
    240  * the parent process. This is also valid for port file descriptors.
    241  * Associated file descriptors with a port maintain the association across the
    242  * fork(2). It means, the child process gets full access to the port and
    243  * it can retrieve events from all common associated file descriptors.
    244  * Events of file descriptors created and associated with a port after the
    245  * fork(2) are non-shareable and can only be retrieved by the same process.
    246  *
    247  * If the parent or the child process closes an exported port (using fork(2)
    248  * or I_SENDFD) all the file descriptors associated with the port by the
    249  * process will be dissociated from the port. Events of dissociated file
    250  * descriptors as well as all non-shareable events will be discarded.
    251  * The other process can continue working with the port as usual.
    252  *
    253  * CLOSING A PORT
    254  * close(2) has to be used to close a port. See FORK BEHAVIOUR for details.
    255  *
    256  * PORT EVENT STRUCTURES
    257  * The global control structure of the event ports framework is port_control_t.
    258  * port_control_t keeps track of the number of created ports in the system.
    259  * The cache of the port event structures is also located in port_control_t.
    260  *
    261  * On port_create() the vnode and the portfs node is also created.
    262  * The portfs node is represented by the port_t structure.
    263  * The port_t structure manages all port specific tasks:
    264  * - management of resource control values
    265  * - port VOP_POLL interface
    266  * - creation time
    267  * - uid and gid of the port
    268  *
    269  * The port_t structure contains the port_queue_t structure.
    270  * The port_queue_t structure contains all the data necessary for the
    271  * queue management:
    272  * - locking
    273  * - condition variables
    274  * - event counters
    275  * - submitted events	(represented by port_kevent_t structures)
    276  * - threads waiting for event delivery (check portget_t structure)
    277  * - PORT_SOURCE_FD cache	(managed by the port_fdcache_t structure)
    278  * - event source management (managed by the port_source_t structure)
    279  * - alert mode management	(check port_alert_t structure)
    280  *
    281  * EVENT MANAGEMENT
    282  * The event port file system creates a kmem_cache for internal allocation of
    283  * event port structures.
    284  *
    285  * 1. Event source association with a port:
    286  * The first step to do for event sources is to get associated with a port
    287  * using the port_associate_ksource() function or adding an entry to the
    288  * port_ksource_tab[]. An event source can get dissociated from a port
    289  * using the port_dissociate_ksource() function. An entry in the
    290  * port_ksource_tab[] implies that the source will be associated
    291  * automatically with every new created port.
    292  * The event source can deliver a callback function, which is used by the
    293  * port to notify the event source about close(2). The idea is that
    294  * in such a case the event source should free all allocated resources
    295  * and it must return to the port all allocated slots/structures.
    296  * The port_close() function will wait until all allocated event
    297  * structures/slots are returned to the port.
    298  * The callback function is not necessary when the event source does not
    299  * maintain local resources, a second condition is that the event source
    300  * can guarantee that allocated event slots will be returned without
    301  * delay to the port (it will not block and sleep somewhere).
    302  *
    303  * 2. Reservation of an event slot / event structure
    304  * The event port reliability is based on the reservation of an event "slot"
    305  * (allocation of an event structure) by the event source as part of the
    306  * application call. If the maximal number of event slots is exhausted then
    307  * the event source can return a corresponding error code to the application.
    308  *
    309  * The port_alloc_event() function has to be used by event sources to
    310  * allocate an event slot (reserve an event structure). The port_alloc_event()
    311  * doesn not block and it will return a 0 value on success or an error code
    312  * if it fails.
    313  * An argument of port_alloc_event() is a flag which determines the behavior
    314  * of the event after it was delivered to the application:
    315  * PORT_ALLOC_DEFAULT	: event slot becomes free after delivery to the
    316  *			  application.
    317  * PORT_ALLOC_PRIVATE	: event slot remains under the control of the event
    318  *			  source. This kind of slots can not be used for
    319  *			  event delivery and should only be used internally
    320  *			  by the event source.
    321  * PORT_KEV_CACHED	: event slot remains under the control of an event
    322  *			  port cache. It does not become free after delivery
    323  *			  to the application.
    324  * PORT_ALLOC_SCACHED	: event slot remains under the control of the event
    325  *			  source. The event source takes the control over
    326  *			  the slot after the event is delivered to the
    327  *			  application.
    328  *
    329  * 3. Delivery of events to the event port
    330  * Earlier allocated event structure/slot has to be used to deliver
    331  * event data to the port. Event source has to use the function
    332  * port_send_event(). The single argument is a pointer to the previously
    333  * reserved event structure/slot.
    334  * The portkev_events field of the port_kevent_t structure can be updated/set
    335  * in two ways:
    336  * 1. using the port_set_event() function, or
    337  * 2. updating the portkev_events field out of the callback function:
    338  *    The event source can deliver a callback function to the port as an
    339  *    argument of port_init_event().
    340  *    One of the arguments of the callback function is a pointer to the
    341  *    events field, which will be delivered to the application.
    342  *    (see Delivery of events to the application).
    343  * Event structures/slots can be delivered to the event port only one time,
    344  * they remain blocked until the data is delivered to the application and the
    345  * slot becomes free or it is delivered back to the event source
    346  * (PORT_ALLOC_SCACHED). The activation of the callback function mentioned above
    347  * is at the same time the indicator for the event source that the event
    348  * structure/slot is free for reuse.
    349  *
    350  * 4. Delivery of events to the application
    351  * The events structures/slots delivered by event sources remain in the
    352  * port queue until they are retrieved by the application or the port
    353  * is closed (exit(2) also closes all opened file descriptors)..
    354  * The application uses port_get() or port_getn() to retrieve events from
    355  * a port. port_get() retrieves a single event structure/slot and port_getn()
    356  * retrieves a list of event structures/slots.
    357  * Both functions are able to poll for events and return immediately or they
    358  * can specify a timeout value.
    359  * Before the events are delivered to the application they are moved to a
    360  * second temporary internal queue. The idea is to avoid lock collisions or
    361  * contentions of the global queue lock.
    362  * The global queue lock is used every time when an event source delivers
    363  * new events to the port.
    364  * The port_get() and port_getn() functions
    365  * a) retrieve single events from the temporary queue,
    366  * b) prepare the data to be passed to the application memory,
    367  * c) activate the callback function of the event sources:
    368  *    - to get the latest event data,
    369  *    - the event source can free all allocated resources associated with the
    370  *      current event,
    371  *    - the event source can re-use the current event slot/structure
    372  *    - the event source can deny the delivery of the event to the application
    373  *      (e.g. because of the wrong process).
    374  * d) put the event back to the temporary queue if the event delivery was denied
    375  * e) repeat a) until d) as long as there are events in the queue and
    376  *    there is enough user space available.
    377  *
    378  * The loop described above could block for a very long time the global mutex,
    379  * to avoid that a second mutex was introduced to synchronized concurrent
    380  * threads accessing the temporary queue.
    381  */
    382 
    383 static int64_t portfs(int, uintptr_t, uintptr_t, uintptr_t, uintptr_t,
    384     uintptr_t);
    385 
    386 static struct sysent port_sysent = {
    387 	6,
    388 	SE_ARGC | SE_64RVAL | SE_NOUNLOAD,
    389 	(int (*)())portfs,
    390 };
    391 
    392 static struct modlsys modlsys = {
    393 	&mod_syscallops, "event ports", &port_sysent
    394 };
    395 
    396 #ifdef _SYSCALL32_IMPL
    397 
    398 static int64_t
    399 portfs32(uint32_t arg1, int32_t arg2, uint32_t arg3, uint32_t arg4,
    400     uint32_t arg5, uint32_t arg6);
    401 
    402 static struct sysent port_sysent32 = {
    403 	6,
    404 	SE_ARGC | SE_64RVAL | SE_NOUNLOAD,
    405 	(int (*)())portfs32,
    406 };
    407 
    408 static struct modlsys modlsys32 = {
    409 	&mod_syscallops32,
    410 	"32-bit event ports syscalls",
    411 	&port_sysent32
    412 };
    413 #endif	/* _SYSCALL32_IMPL */
    414 
    415 static struct modlinkage modlinkage = {
    416 	MODREV_1,
    417 	&modlsys,
    418 #ifdef _SYSCALL32_IMPL
    419 	&modlsys32,
    420 #endif
    421 	NULL
    422 };
    423 
    424 port_kstat_t port_kstat = {
    425 	{ "ports",	KSTAT_DATA_UINT32 }
    426 };
    427 
    428 dev_t	portdev;
    429 struct	vnodeops *port_vnodeops;
    430 struct	vfs port_vfs;
    431 
    432 extern	rctl_hndl_t rc_process_portev;
    433 extern	rctl_hndl_t rc_project_portids;
    434 extern	void aio_close_port(void *, int, pid_t, int);
    435 
    436 /*
    437  * This table contains a list of event sources which need a static
    438  * association with a port (every port).
    439  * The last NULL entry in the table is required to detect "end of table".
    440  */
    441 struct port_ksource port_ksource_tab[] = {
    442 	{PORT_SOURCE_AIO, aio_close_port, NULL, NULL},
    443 	{0, NULL, NULL, NULL}
    444 };
    445 
    446 /* local functions */
    447 static int port_getn(port_t *, port_event_t *, uint_t, uint_t *,
    448     port_gettimer_t *);
    449 static int port_sendn(int [], int [], uint_t, int, void *, uint_t *);
    450 static int port_alert(port_t *, int, int, void *);
    451 static int port_dispatch_event(port_t *, int, int, int, uintptr_t, void *);
    452 static int port_send(port_t *, int, int, void *);
    453 static int port_create(int *);
    454 static int port_get_alert(port_alert_t *, port_event_t *);
    455 static int port_copy_event(port_event_t *, port_kevent_t *, list_t *);
    456 static int *port_errorn(int *, int, int, int);
    457 static int port_noshare(void *, int *, pid_t, int, void *);
    458 static int port_get_timeout(timespec_t *, timespec_t *, timespec_t **, int *,
    459     int);
    460 static void port_init(port_t *);
    461 static void port_remove_alert(port_queue_t *);
    462 static void port_add_ksource_local(port_t *, port_ksource_t *);
    463 static void port_check_return_cond(port_queue_t *);
    464 static void port_dequeue_thread(port_queue_t *, portget_t *);
    465 static portget_t *port_queue_thread(port_queue_t *, uint_t);
    466 static void port_kstat_init(void);
    467 
    468 #ifdef	_SYSCALL32_IMPL
    469 static int port_copy_event32(port_event32_t *, port_kevent_t *, list_t *);
    470 #endif
    471 
    472 int
    473 _init(void)
    474 {
    475 	static const fs_operation_def_t port_vfsops_template[] = {
    476 		NULL, NULL
    477 	};
    478 	extern const	fs_operation_def_t port_vnodeops_template[];
    479 	vfsops_t	*port_vfsops;
    480 	int		error;
    481 	major_t 	major;
    482 
    483 	if ((major = getudev()) == (major_t)-1)
    484 		return (ENXIO);
    485 	portdev = makedevice(major, 0);
    486 
    487 	/* Create a dummy vfs */
    488 	error = vfs_makefsops(port_vfsops_template, &port_vfsops);
    489 	if (error) {
    490 		cmn_err(CE_WARN, "port init: bad vfs ops");
    491 		return (error);
    492 	}
    493 	vfs_setops(&port_vfs, port_vfsops);
    494 	port_vfs.vfs_flag = VFS_RDONLY;
    495 	port_vfs.vfs_dev = portdev;
    496 	vfs_make_fsid(&(port_vfs.vfs_fsid), portdev, 0);
    497 
    498 	error = vn_make_ops("portfs", port_vnodeops_template, &port_vnodeops);
    499 	if (error) {
    500 		vfs_freevfsops(port_vfsops);
    501 		cmn_err(CE_WARN, "port init: bad vnode ops");
    502 		return (error);
    503 	}
    504 
    505 	mutex_init(&port_control.pc_mutex, NULL, MUTEX_DEFAULT, NULL);
    506 	port_control.pc_nents = 0;	/* number of active ports */
    507 
    508 	/* create kmem_cache for port event structures */
    509 	port_control.pc_cache = kmem_cache_create("port_cache",
    510 	    sizeof (port_kevent_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
    511 
    512 	port_kstat_init();		/* init port kstats */
    513 	return (mod_install(&modlinkage));
    514 }
    515 
    516 int
    517 _info(struct modinfo *modinfop)
    518 {
    519 	return (mod_info(&modlinkage, modinfop));
    520 }
    521 
    522 /*
    523  * System call wrapper for all port related system calls from 32-bit programs.
    524  */
    525 #ifdef _SYSCALL32_IMPL
    526 static int64_t
    527 portfs32(uint32_t opcode, int32_t a0, uint32_t a1, uint32_t a2, uint32_t a3,
    528     uint32_t a4)
    529 {
    530 	int64_t	error;
    531 
    532 	switch (opcode & PORT_CODE_MASK) {
    533 	case PORT_GET:
    534 		error = portfs(PORT_GET, a0, a1, (int)a2, (int)a3, a4);
    535 		break;
    536 	case PORT_SENDN:
    537 		error = portfs(opcode, (uint32_t)a0, a1, a2, a3, a4);
    538 		break;
    539 	default:
    540 		error = portfs(opcode, a0, a1, a2, a3, a4);
    541 		break;
    542 	}
    543 	return (error);
    544 }
    545 #endif	/* _SYSCALL32_IMPL */
    546 
    547 /*
    548  * System entry point for port functions.
    549  * a0 is a port file descriptor (except for PORT_SENDN and PORT_CREATE).
    550  * The libc uses PORT_SYS_NOPORT in functions which do not deliver a
    551  * port file descriptor as first argument.
    552  */
    553 static int64_t
    554 portfs(int opcode, uintptr_t a0, uintptr_t a1, uintptr_t a2, uintptr_t a3,
    555     uintptr_t a4)
    556 {
    557 	rval_t		r;
    558 	port_t		*pp;
    559 	int 		error = 0;
    560 	uint_t		nget;
    561 	file_t		*fp;
    562 	port_gettimer_t	port_timer;
    563 
    564 	r.r_vals = 0;
    565 	if (opcode & PORT_SYS_NOPORT) {
    566 		opcode &= PORT_CODE_MASK;
    567 		if (opcode == PORT_SENDN) {
    568 			error = port_sendn((int *)a0, (int *)a1, (uint_t)a2,
    569 			    (int)a3, (void *)a4, (uint_t *)&r.r_val1);
    570 			if (error && (error != EIO))
    571 				return ((int64_t)set_errno(error));
    572 			return (r.r_vals);
    573 		}
    574 
    575 		if (opcode == PORT_CREATE) {
    576 			error = port_create(&r.r_val1);
    577 			if (error)
    578 				return ((int64_t)set_errno(error));
    579 			return (r.r_vals);
    580 		}
    581 	}
    582 
    583 	/* opcodes using port as first argument (a0) */
    584 
    585 	if ((fp = getf((int)a0)) == NULL)
    586 		return ((uintptr_t)set_errno(EBADF));
    587 
    588 	if (fp->f_vnode->v_type != VPORT) {
    589 		releasef((int)a0);
    590 		return ((uintptr_t)set_errno(EBADFD));
    591 	}
    592 
    593 	pp = VTOEP(fp->f_vnode);
    594 
    595 	switch (opcode & PORT_CODE_MASK) {
    596 	case	PORT_GET:
    597 	{
    598 		/* see PORT_GETN description */
    599 		struct	timespec timeout;
    600 
    601 		port_timer.pgt_flags = PORTGET_ONE;
    602 		port_timer.pgt_loop = 0;
    603 		port_timer.pgt_rqtp = NULL;
    604 		if (a4 != NULL) {
    605 			port_timer.pgt_timeout = &timeout;
    606 			timeout.tv_sec = (time_t)a2;
    607 			timeout.tv_nsec = (long)a3;
    608 		} else {
    609 			port_timer.pgt_timeout = NULL;
    610 		}
    611 		do {
    612 			nget = 1;
    613 			error = port_getn(pp, (port_event_t *)a1, 1,
    614 			    (uint_t *)&nget, &port_timer);
    615 		} while (nget == 0 && error == 0 && port_timer.pgt_loop);
    616 		break;
    617 	}
    618 	case	PORT_GETN:
    619 	{
    620 		/*
    621 		 * port_getn() can only retrieve own or shareable events from
    622 		 * other processes. The port_getn() function remains in the
    623 		 * kernel until own or shareable events are available or the
    624 		 * timeout elapses.
    625 		 */
    626 		port_timer.pgt_flags = 0;
    627 		port_timer.pgt_loop = 0;
    628 		port_timer.pgt_rqtp = NULL;
    629 		port_timer.pgt_timeout = (struct timespec *)a4;
    630 		do {
    631 			nget = a3;
    632 			error = port_getn(pp, (port_event_t *)a1, (uint_t)a2,
    633 			    (uint_t *)&nget, &port_timer);
    634 		} while (nget == 0 && error == 0 && port_timer.pgt_loop);
    635 		r.r_val1 = nget;
    636 		r.r_val2 = error;
    637 		releasef((int)a0);
    638 		if (error && error != ETIME)
    639 			return ((int64_t)set_errno(error));
    640 		return (r.r_vals);
    641 	}
    642 	case	PORT_ASSOCIATE:
    643 	{
    644 		switch ((int)a1) {
    645 		case PORT_SOURCE_FD:
    646 			error = port_associate_fd(pp, (int)a1, (uintptr_t)a2,
    647 			    (int)a3, (void *)a4);
    648 			break;
    649 		case PORT_SOURCE_FILE:
    650 			error = port_associate_fop(pp, (int)a1, (uintptr_t)a2,
    651 			    (int)a3, (void *)a4);
    652 			break;
    653 		default:
    654 			error = EINVAL;
    655 			break;
    656 		}
    657 		break;
    658 	}
    659 	case	PORT_SEND:
    660 	{
    661 		/* user-defined events */
    662 		error = port_send(pp, PORT_SOURCE_USER, (int)a1, (void *)a2);
    663 		break;
    664 	}
    665 	case	PORT_DISPATCH:
    666 	{
    667 		/*
    668 		 * library events, blocking
    669 		 * Only events of type PORT_SOURCE_AIO or PORT_SOURCE_MQ
    670 		 * are currently allowed.
    671 		 */
    672 		if ((int)a1 != PORT_SOURCE_AIO && (int)a1 != PORT_SOURCE_MQ) {
    673 			error = EINVAL;
    674 			break;
    675 		}
    676 		error = port_dispatch_event(pp, (int)opcode, (int)a1, (int)a2,
    677 		    (uintptr_t)a3, (void *)a4);
    678 		break;
    679 	}
    680 	case	PORT_DISSOCIATE:
    681 	{
    682 		switch ((int)a1) {
    683 		case PORT_SOURCE_FD:
    684 			error = port_dissociate_fd(pp, (uintptr_t)a2);
    685 			break;
    686 		case PORT_SOURCE_FILE:
    687 			error = port_dissociate_fop(pp, (uintptr_t)a2);
    688 			break;
    689 		default:
    690 			error = EINVAL;
    691 			break;
    692 		}
    693 		break;
    694 	}
    695 	case	PORT_ALERT:
    696 	{
    697 		if ((int)a2)	/* a2 = events */
    698 			error = port_alert(pp, (int)a1, (int)a2, (void *)a3);
    699 		else
    700 			port_remove_alert(&pp->port_queue);
    701 		break;
    702 	}
    703 	default:
    704 		error = EINVAL;
    705 		break;
    706 	}
    707 
    708 	releasef((int)a0);
    709 	if (error)
    710 		return ((int64_t)set_errno(error));
    711 	return (r.r_vals);
    712 }
    713 
    714 /*
    715  * System call to create a port.
    716  *
    717  * The port_create() function creates a vnode of type VPORT per port.
    718  * The port control data is associated with the vnode as vnode private data.
    719  * The port_create() function returns an event port file descriptor.
    720  */
    721 static int
    722 port_create(int *fdp)
    723 {
    724 	port_t		*pp;
    725 	vnode_t		*vp;
    726 	struct file	*fp;
    727 	proc_t		*p = curproc;
    728 
    729 	/* initialize vnode and port private data */
    730 	pp = kmem_zalloc(sizeof (port_t), KM_SLEEP);
    731 
    732 	pp->port_vnode = vn_alloc(KM_SLEEP);
    733 	vp = EPTOV(pp);
    734 	vn_setops(vp, port_vnodeops);
    735 	vp->v_type = VPORT;
    736 	vp->v_vfsp = &port_vfs;
    737 	vp->v_data = (caddr_t)pp;
    738 
    739 	mutex_enter(&port_control.pc_mutex);
    740 	/*
    741 	 * Retrieve the maximal number of event ports allowed per system from
    742 	 * the resource control: project.port-max-ids.
    743 	 */
    744 	mutex_enter(&p->p_lock);
    745 	if (rctl_test(rc_project_portids, p->p_task->tk_proj->kpj_rctls, p,
    746 	    port_control.pc_nents + 1, RCA_SAFE) & RCT_DENY) {
    747 		mutex_exit(&p->p_lock);
    748 		vn_free(vp);
    749 		kmem_free(pp, sizeof (port_t));
    750 		mutex_exit(&port_control.pc_mutex);
    751 		return (EAGAIN);
    752 	}
    753 
    754 	/*
    755 	 * Retrieve the maximal number of events allowed per port from
    756 	 * the resource control: process.port-max-events.
    757 	 */
    758 	pp->port_max_events = rctl_enforced_value(rc_process_portev,
    759 	    p->p_rctls, p);
    760 	mutex_exit(&p->p_lock);
    761 
    762 	/* allocate a new user file descriptor and a file structure */
    763 	if (falloc(vp, 0, &fp, fdp)) {
    764 		/*
    765 		 * If the file table is full, free allocated resources.
    766 		 */
    767 		vn_free(vp);
    768 		kmem_free(pp, sizeof (port_t));
    769 		mutex_exit(&port_control.pc_mutex);
    770 		return (EMFILE);
    771 	}
    772 
    773 	mutex_exit(&fp->f_tlock);
    774 
    775 	pp->port_fd = *fdp;
    776 	port_control.pc_nents++;
    777 	p->p_portcnt++;
    778 	port_kstat.pks_ports.value.ui32++;
    779 	mutex_exit(&port_control.pc_mutex);
    780 
    781 	/* initializes port private data */
    782 	port_init(pp);
    783 	/* set user file pointer */
    784 	setf(*fdp, fp);
    785 	return (0);
    786 }
    787 
    788 /*
    789  * port_init() initializes event port specific data
    790  */
    791 static void
    792 port_init(port_t *pp)
    793 {
    794 	port_queue_t	*portq;
    795 	port_ksource_t	*pks;
    796 
    797 	mutex_init(&pp->port_mutex, NULL, MUTEX_DEFAULT, NULL);
    798 	portq = &pp->port_queue;
    799 	mutex_init(&portq->portq_mutex, NULL, MUTEX_DEFAULT, NULL);
    800 	pp->port_flags |= PORT_INIT;
    801 
    802 	/*
    803 	 * If it is not enough memory available to satisfy a user
    804 	 * request using a single port_getn() call then port_getn()
    805 	 * will reduce the size of the list to PORT_MAX_LIST.
    806 	 */
    807 	pp->port_max_list = port_max_list;
    808 
    809 	/* Set timestamp entries required for fstat(2) requests */
    810 	gethrestime(&pp->port_ctime);
    811 	pp->port_uid = crgetuid(curproc->p_cred);
    812 	pp->port_gid = crgetgid(curproc->p_cred);
    813 
    814 	/* initialize port queue structs */
    815 	list_create(&portq->portq_list, sizeof (port_kevent_t),
    816 	    offsetof(port_kevent_t, portkev_node));
    817 	list_create(&portq->portq_get_list, sizeof (port_kevent_t),
    818 	    offsetof(port_kevent_t, portkev_node));
    819 	portq->portq_flags = 0;
    820 	pp->port_pid = curproc->p_pid;
    821 
    822 	/* Allocate cache skeleton for PORT_SOURCE_FD events */
    823 	portq->portq_pcp = kmem_zalloc(sizeof (port_fdcache_t), KM_SLEEP);
    824 	mutex_init(&portq->portq_pcp->pc_lock, NULL, MUTEX_DEFAULT, NULL);
    825 
    826 	/*
    827 	 * Allocate cache skeleton for association of event sources.
    828 	 */
    829 	mutex_init(&portq->portq_source_mutex, NULL, MUTEX_DEFAULT, NULL);
    830 	portq->portq_scache = kmem_zalloc(
    831 	    PORT_SCACHE_SIZE * sizeof (port_source_t *), KM_SLEEP);
    832 
    833 	/*
    834 	 * pre-associate some kernel sources with this port.
    835 	 * The pre-association is required to create port_source_t
    836 	 * structures for object association.
    837 	 * Some sources can not get associated with a port before the first
    838 	 * object association is requested. Another reason to pre_associate
    839 	 * a particular source with a port is because of performance.
    840 	 */
    841 
    842 	for (pks = port_ksource_tab; pks->pks_source != 0; pks++)
    843 		port_add_ksource_local(pp, pks);
    844 }
    845 
    846 /*
    847  * The port_add_ksource_local() function is being used to associate
    848  * event sources with every new port.
    849  * The event sources need to be added to port_ksource_tab[].
    850  */
    851 static void
    852 port_add_ksource_local(port_t *pp, port_ksource_t *pks)
    853 {
    854 	port_source_t	*pse;
    855 	port_source_t	**ps;
    856 
    857 	mutex_enter(&pp->port_queue.portq_source_mutex);
    858 	ps = &pp->port_queue.portq_scache[PORT_SHASH(pks->pks_source)];
    859 	for (pse = *ps; pse != NULL; pse = pse->portsrc_next) {
    860 		if (pse->portsrc_source == pks->pks_source)
    861 			break;
    862 	}
    863 
    864 	if (pse == NULL) {
    865 		/* associate new source with the port */
    866 		pse = kmem_zalloc(sizeof (port_source_t), KM_SLEEP);
    867 		pse->portsrc_source = pks->pks_source;
    868 		pse->portsrc_close = pks->pks_close;
    869 		pse->portsrc_closearg = pks->pks_closearg;
    870 		pse->portsrc_cnt = 1;
    871 
    872 		pks->pks_portsrc = pse;
    873 		if (*ps != NULL)
    874 			pse->portsrc_next = (*ps)->portsrc_next;
    875 		*ps = pse;
    876 	}
    877 	mutex_exit(&pp->port_queue.portq_source_mutex);
    878 }
    879 
    880 /*
    881  * The port_send() function sends an event of type "source" to a
    882  * port. This function is non-blocking. An event can be sent to
    883  * a port as long as the number of events per port does not achieve the
    884  * maximal allowed number of events. The max. number of events per port is
    885  * defined by the resource control process.max-port-events.
    886  * This function is used by the port library function port_send()
    887  * and port_dispatch(). The port_send(3c) function is part of the
    888  * event ports API and submits events of type PORT_SOURCE_USER. The
    889  * port_dispatch() function is project private and it is used by library
    890  * functions to submit events of other types than PORT_SOURCE_USER
    891  * (e.g. PORT_SOURCE_AIO).
    892  */
    893 static int
    894 port_send(port_t *pp, int source, int events, void *user)
    895 {
    896 	port_kevent_t	*pev;
    897 	int		error;
    898 
    899 	error = port_alloc_event_local(pp, source, PORT_ALLOC_DEFAULT, &pev);
    900 	if (error)
    901 		return (error);
    902 
    903 	pev->portkev_object = 0;
    904 	pev->portkev_events = events;
    905 	pev->portkev_user = user;
    906 	pev->portkev_callback = NULL;
    907 	pev->portkev_arg = NULL;
    908 	pev->portkev_flags = 0;
    909 
    910 	port_send_event(pev);
    911 	return (0);
    912 }
    913 
    914 /*
    915  * The port_noshare() function returns 0 if the current event was generated
    916  * by the same process. Otherwise is returns a value other than 0 and the
    917  * event should not be delivered to the current processe.
    918  * The port_noshare() function is normally used by the port_dispatch()
    919  * function. The port_dispatch() function is project private and can only be
    920  * used within the event port project.
    921  * Currently the libaio uses the port_dispatch() function to deliver events
    922  * of types PORT_SOURCE_AIO.
    923  */
    924 /* ARGSUSED */
    925 static int
    926 port_noshare(void *arg, int *events, pid_t pid, int flag, void *evp)
    927 {
    928 	if (flag == PORT_CALLBACK_DEFAULT && curproc->p_pid != pid)
    929 		return (1);
    930 	return (0);
    931 }
    932 
    933 /*
    934  * The port_dispatch_event() function is project private and it is used by
    935  * libraries involved in the project to deliver events to the port.
    936  * port_dispatch will sleep and wait for enough resources to satisfy the
    937  * request, if necessary.
    938  * The library can specify if the delivered event is shareable with other
    939  * processes (see PORT_SYS_NOSHARE flag).
    940  */
    941 static int
    942 port_dispatch_event(port_t *pp, int opcode, int source, int events,
    943     uintptr_t object, void *user)
    944 {
    945 	port_kevent_t	*pev;
    946 	int		error;
    947 
    948 	error = port_alloc_event_block(pp, source, PORT_ALLOC_DEFAULT, &pev);
    949 	if (error)
    950 		return (error);
    951 
    952 	pev->portkev_object = object;
    953 	pev->portkev_events = events;
    954 	pev->portkev_user = user;
    955 	pev->portkev_arg = NULL;
    956 	if (opcode & PORT_SYS_NOSHARE) {
    957 		pev->portkev_flags = PORT_KEV_NOSHARE;
    958 		pev->portkev_callback = port_noshare;
    959 	} else {
    960 		pev->portkev_flags = 0;
    961 		pev->portkev_callback = NULL;
    962 	}
    963 
    964 	port_send_event(pev);
    965 	return (0);
    966 }
    967 
    968 
    969 /*
    970  * The port_sendn() function is the kernel implementation of the event
    971  * port API function port_sendn(3c).
    972  * This function is able to send an event to a list of event ports.
    973  */
    974 static int
    975 port_sendn(int ports[], int errors[], uint_t nent, int events, void *user,
    976     uint_t *nget)
    977 {
    978 	port_kevent_t	*pev;
    979 	int		errorcnt = 0;
    980 	int		error = 0;
    981 	int		count;
    982 	int		port;
    983 	int		*plist;
    984 	int		*elist = NULL;
    985 	file_t		*fp;
    986 	port_t		*pp;
    987 
    988 	if (nent == 0 || nent > port_max_list)
    989 		return (EINVAL);
    990 
    991 	plist = kmem_alloc(nent * sizeof (int), KM_SLEEP);
    992 	if (copyin((void *)ports, plist, nent * sizeof (int))) {
    993 		kmem_free(plist, nent * sizeof (int));
    994 		return (EFAULT);
    995 	}
    996 
    997 	/*
    998 	 * Scan the list for event port file descriptors and send the
    999 	 * attached user event data embedded in a event of type
   1000 	 * PORT_SOURCE_USER to every event port in the list.
   1001 	 * If a list entry is not a valid event port then the corresponding
   1002 	 * error code will be stored in the errors[] list with the same
   1003 	 * list offset as in the ports[] list.
   1004 	 */
   1005 
   1006 	for (count = 0; count < nent; count++) {
   1007 		port = plist[count];
   1008 		if ((fp = getf(port)) == NULL) {
   1009 			elist = port_errorn(elist, nent, EBADF, count);
   1010 			errorcnt++;
   1011 			continue;
   1012 		}
   1013 
   1014 		pp = VTOEP(fp->f_vnode);
   1015 		if (fp->f_vnode->v_type != VPORT) {
   1016 			releasef(port);
   1017 			elist = port_errorn(elist, nent, EBADFD, count);
   1018 			errorcnt++;
   1019 			continue;
   1020 		}
   1021 
   1022 		error = port_alloc_event_local(pp, PORT_SOURCE_USER,
   1023 		    PORT_ALLOC_DEFAULT, &pev);
   1024 		if (error) {
   1025 			releasef(port);
   1026 			elist = port_errorn(elist, nent, error, count);
   1027 			errorcnt++;
   1028 			continue;
   1029 		}
   1030 
   1031 		pev->portkev_object = 0;
   1032 		pev->portkev_events = events;
   1033 		pev->portkev_user = user;
   1034 		pev->portkev_callback = NULL;
   1035 		pev->portkev_arg = NULL;
   1036 		pev->portkev_flags = 0;
   1037 
   1038 		port_send_event(pev);
   1039 		releasef(port);
   1040 	}
   1041 	if (errorcnt) {
   1042 		error = EIO;
   1043 		if (copyout(elist, (void *)errors, nent * sizeof (int)))
   1044 			error = EFAULT;
   1045 		kmem_free(elist, nent * sizeof (int));
   1046 	}
   1047 	*nget = nent - errorcnt;
   1048 	kmem_free(plist, nent * sizeof (int));
   1049 	return (error);
   1050 }
   1051 
   1052 static int *
   1053 port_errorn(int *elist, int nent, int error, int index)
   1054 {
   1055 	if (elist == NULL)
   1056 		elist = kmem_zalloc(nent * sizeof (int), KM_SLEEP);
   1057 	elist[index] = error;
   1058 	return (elist);
   1059 }
   1060 
   1061 /*
   1062  * port_alert()
   1063  * The port_alert() funcion is a high priority event and it is always set
   1064  * on top of the queue. It is also delivered as single event.
   1065  * flags:
   1066  *	- SET	:overwrite current alert data
   1067  *	- UPDATE:set alert data or return EBUSY if alert mode is already set
   1068  *
   1069  * - set the ALERT flag
   1070  * - wakeup all sleeping threads
   1071  */
   1072 static int
   1073 port_alert(port_t *pp, int flags, int events, void *user)
   1074 {
   1075 	port_queue_t	*portq;
   1076 	portget_t	*pgetp;
   1077 	port_alert_t	*pa;
   1078 
   1079 	if ((flags & PORT_ALERT_INVALID) == PORT_ALERT_INVALID)
   1080 		return (EINVAL);
   1081 
   1082 	portq = &pp->port_queue;
   1083 	pa = &portq->portq_alert;
   1084 	mutex_enter(&portq->portq_mutex);
   1085 
   1086 	/* check alert conditions */
   1087 	if (flags == PORT_ALERT_UPDATE) {
   1088 		if (portq->portq_flags & PORTQ_ALERT) {
   1089 			mutex_exit(&portq->portq_mutex);
   1090 			return (EBUSY);
   1091 		}
   1092 	}
   1093 
   1094 	/*
   1095 	 * Store alert data in the port to be delivered to threads
   1096 	 * which are using port_get(n) to retrieve events.
   1097 	 */
   1098 
   1099 	portq->portq_flags |= PORTQ_ALERT;
   1100 	pa->portal_events = events;		/* alert info */
   1101 	pa->portal_pid = curproc->p_pid;	/* process owner */
   1102 	pa->portal_object = 0;			/* no object */
   1103 	pa->portal_user = user;			/* user alert data */
   1104 
   1105 	/* alert and deliver alert data to waiting threads */
   1106 	pgetp = portq->portq_thread;
   1107 	if (pgetp == NULL) {
   1108 		/* no threads waiting for events */
   1109 		mutex_exit(&portq->portq_mutex);
   1110 		return (0);
   1111 	}
   1112 
   1113 	/*
   1114 	 * Set waiting threads in alert mode (PORTGET_ALERT)..
   1115 	 * Every thread waiting for events already allocated a portget_t
   1116 	 * structure to sleep on.
   1117 	 * The port alert arguments are stored in the portget_t structure.
   1118 	 * The PORTGET_ALERT flag is set to indicate the thread to return
   1119 	 * immediately with the alert event.
   1120 	 */
   1121 	do {
   1122 		if ((pgetp->portget_state & PORTGET_ALERT) == 0) {
   1123 			pa = &pgetp->portget_alert;
   1124 			pa->portal_events = events;
   1125 			pa->portal_object = 0;
   1126 			pa->portal_user = user;
   1127 			pgetp->portget_state |= PORTGET_ALERT;
   1128 			cv_signal(&pgetp->portget_cv);
   1129 		}
   1130 	} while ((pgetp = pgetp->portget_next) != portq->portq_thread);
   1131 	mutex_exit(&portq->portq_mutex);
   1132 	return (0);
   1133 }
   1134 
   1135 /*
   1136  * Clear alert state of the port
   1137  */
   1138 static void
   1139 port_remove_alert(port_queue_t *portq)
   1140 {
   1141 	mutex_enter(&portq->portq_mutex);
   1142 	portq->portq_flags &= ~PORTQ_ALERT;
   1143 	mutex_exit(&portq->portq_mutex);
   1144 }
   1145 
   1146 /*
   1147  * The port_getn() function is used to retrieve events from a port.
   1148  *
   1149  * The port_getn() function returns immediately if there are enough events
   1150  * available in the port to satisfy the request or if the port is in alert
   1151  * mode (see port_alert(3c)).
   1152  * The timeout argument of port_getn(3c) -which is embedded in the
   1153  * port_gettimer_t structure- specifies if the system call should block or if it
   1154  * should return immediately depending on the number of events available.
   1155  * This function is internally used by port_getn(3c) as well as by
   1156  * port_get(3c).
   1157  */
   1158 static int
   1159 port_getn(port_t *pp, port_event_t *uevp, uint_t max, uint_t *nget,
   1160     port_gettimer_t *pgt)
   1161 {
   1162 	port_queue_t	*portq;
   1163 	port_kevent_t 	*pev;
   1164 	port_kevent_t 	*lev;
   1165 	int		error = 0;
   1166 	uint_t		nmax;
   1167 	uint_t		nevents;
   1168 	uint_t		eventsz;
   1169 	port_event_t	*kevp;
   1170 	list_t		*glist;
   1171 	uint_t		tnent;
   1172 	int		rval;
   1173 	int		blocking = -1;
   1174 	int		timecheck;
   1175 	int		flag;
   1176 	timespec_t	rqtime;
   1177 	timespec_t	*rqtp = NULL;
   1178 	portget_t	*pgetp;
   1179 	void		*results;
   1180 	model_t		model = get_udatamodel();
   1181 
   1182 	flag = pgt->pgt_flags;
   1183 
   1184 	if (*nget > max && max > 0)
   1185 		return (EINVAL);
   1186 
   1187 	portq = &pp->port_queue;
   1188 	mutex_enter(&portq->portq_mutex);
   1189 	if (max == 0) {
   1190 		/*
   1191 		 * Return number of objects with events.
   1192 		 * The port_block() call is required to synchronize this
   1193 		 * thread with another possible thread, which could be
   1194 		 * retrieving events from the port queue.
   1195 		 */
   1196 		port_block(portq);
   1197 		/*
   1198 		 * Check if a second thread is currently retrieving events
   1199 		 * and it is using the temporary event queue.
   1200 		 */
   1201 		if (portq->portq_tnent) {
   1202 			/* put remaining events back to the port queue */
   1203 			port_push_eventq(portq);
   1204 		}
   1205 		*nget = portq->portq_nent;
   1206 		port_unblock(portq);
   1207 		mutex_exit(&portq->portq_mutex);
   1208 		return (0);
   1209 	}
   1210 
   1211 	if (uevp == NULL) {
   1212 		mutex_exit(&portq->portq_mutex);
   1213 		return (EFAULT);
   1214 	}
   1215 	if (*nget == 0) {		/* no events required */
   1216 		mutex_exit(&portq->portq_mutex);
   1217 		return (0);
   1218 	}
   1219 
   1220 	/* port is being closed ... */
   1221 	if (portq->portq_flags & PORTQ_CLOSE) {
   1222 		mutex_exit(&portq->portq_mutex);
   1223 		return (EBADFD);
   1224 	}
   1225 
   1226 	/* return immediately if port in alert mode */
   1227 	if (portq->portq_flags & PORTQ_ALERT) {
   1228 		error = port_get_alert(&portq->portq_alert, uevp);
   1229 		if (error == 0)
   1230 			*nget = 1;
   1231 		mutex_exit(&portq->portq_mutex);
   1232 		return (error);
   1233 	}
   1234 
   1235 	portq->portq_thrcnt++;
   1236 
   1237 	/*
   1238 	 * Now check if the completed events satisfy the
   1239 	 * "wait" requirements of the current thread:
   1240 	 */
   1241 
   1242 	if (pgt->pgt_loop) {
   1243 		/*
   1244 		 * loop entry of same thread
   1245 		 * pgt_loop is set when the current thread returns
   1246 		 * prematurely from this function. That could happen
   1247 		 * when a port is being shared between processes and
   1248 		 * this thread could not find events to return.
   1249 		 * It is not allowed to a thread to retrieve non-shareable
   1250 		 * events generated in other processes.
   1251 		 * PORTQ_WAIT_EVENTS is set when a thread already
   1252 		 * checked the current event queue and no new events
   1253 		 * are added to the queue.
   1254 		 */
   1255 		if (((portq->portq_flags & PORTQ_WAIT_EVENTS) == 0) &&
   1256 		    (portq->portq_nent >= *nget)) {
   1257 			/* some new events arrived ...check them */
   1258 			goto portnowait;
   1259 		}
   1260 		rqtp = pgt->pgt_rqtp;
   1261 		timecheck = pgt->pgt_timecheck;
   1262 		pgt->pgt_flags |= PORTGET_WAIT_EVENTS;
   1263 	} else {
   1264 		/* check if enough events are available ... */
   1265 		if (portq->portq_nent >= *nget)
   1266 			goto portnowait;
   1267 		/*
   1268 		 * There are not enough events available to satisfy
   1269 		 * the request, check timeout value and wait for
   1270 		 * incoming events.
   1271 		 */
   1272 		error = port_get_timeout(pgt->pgt_timeout, &rqtime, &rqtp,
   1273 		    &blocking, flag);
   1274 		if (error) {
   1275 			port_check_return_cond(portq);
   1276 			mutex_exit(&portq->portq_mutex);
   1277 			return (error);
   1278 		}
   1279 
   1280 		if (blocking == 0) /* don't block, check fired events */
   1281 			goto portnowait;
   1282 
   1283 		if (rqtp != NULL) {
   1284 			timespec_t	now;
   1285 			timecheck = timechanged;
   1286 			gethrestime(&now);
   1287 			timespecadd(rqtp, &now);
   1288 		}
   1289 	}
   1290 
   1291 	/* enqueue thread in the list of waiting threads */
   1292 	pgetp = port_queue_thread(portq, *nget);
   1293 
   1294 
   1295 	/* Wait here until return conditions met */
   1296 	for (;;) {
   1297 		if (pgetp->portget_state & PORTGET_ALERT) {
   1298 			/* reap alert event and return */
   1299 			error = port_get_alert(&pgetp->portget_alert, uevp);
   1300 			if (error)
   1301 				*nget = 0;
   1302 			else
   1303 				*nget = 1;
   1304 			port_dequeue_thread(&pp->port_queue, pgetp);
   1305 			portq->portq_thrcnt--;
   1306 			mutex_exit(&portq->portq_mutex);
   1307 			return (error);
   1308 		}
   1309 
   1310 		/*
   1311 		 * Check if some other thread is already retrieving
   1312 		 * events (portq_getn > 0).
   1313 		 */
   1314 
   1315 		if ((portq->portq_getn  == 0) &&
   1316 		    ((portq)->portq_nent >= *nget) &&
   1317 		    (!((pgt)->pgt_flags & PORTGET_WAIT_EVENTS) ||
   1318 		    !((portq)->portq_flags & PORTQ_WAIT_EVENTS)))
   1319 			break;
   1320 
   1321 		if (portq->portq_flags & PORTQ_CLOSE) {
   1322 			error = EBADFD;
   1323 			break;
   1324 		}
   1325 
   1326 		rval = cv_waituntil_sig(&pgetp->portget_cv, &portq->portq_mutex,
   1327 		    rqtp, timecheck);
   1328 
   1329 		if (rval <= 0) {
   1330 			error = (rval == 0) ? EINTR : ETIME;
   1331 			break;
   1332 		}
   1333 	}
   1334 
   1335 	/* take thread out of the wait queue */
   1336 	port_dequeue_thread(portq, pgetp);
   1337 
   1338 	if (error != 0 && (error == EINTR || error == EBADFD ||
   1339 	    (error == ETIME && flag))) {
   1340 		/* return without events */
   1341 		port_check_return_cond(portq);
   1342 		mutex_exit(&portq->portq_mutex);
   1343 		return (error);
   1344 	}
   1345 
   1346 portnowait:
   1347 	/*
   1348 	 * Move port event queue to a temporary event queue .
   1349 	 * New incoming events will be continue be posted to the event queue
   1350 	 * and they will not be considered by the current thread.
   1351 	 * The idea is to avoid lock contentions or an often locking/unlocking
   1352 	 * of the port queue mutex. The contention and performance degradation
   1353 	 * could happen because:
   1354 	 * a) incoming events use the port queue mutex to enqueue new events and
   1355 	 * b) before the event can be delivered to the application it is
   1356 	 *    necessary to notify the event sources about the event delivery.
   1357 	 *    Sometimes the event sources can require a long time to return and
   1358 	 *    the queue mutex would block incoming events.
   1359 	 * During this time incoming events (port_send_event()) do not need
   1360 	 * to awake threads waiting for events. Before the current thread
   1361 	 * returns it will check the conditions to awake other waiting threads.
   1362 	 */
   1363 	portq->portq_getn++;	/* number of threads retrieving events */
   1364 	port_block(portq);	/* block other threads here */
   1365 	nmax = max < portq->portq_nent ? max : portq->portq_nent;
   1366 
   1367 	if (portq->portq_tnent) {
   1368 		/*
   1369 		 * Move remaining events from previous thread back to the
   1370 		 * port event queue.
   1371 		 */
   1372 		port_push_eventq(portq);
   1373 	}
   1374 	/* move port event queue to a temporary queue */
   1375 	list_move_tail(&portq->portq_get_list, &portq->portq_list);
   1376 	glist = &portq->portq_get_list;	/* use temporary event queue */
   1377 	tnent = portq->portq_nent;	/* get current number of events */
   1378 	portq->portq_nent = 0;		/* no events in the port event queue */
   1379 	portq->portq_flags |= PORTQ_WAIT_EVENTS; /* detect incoming events */
   1380 	mutex_exit(&portq->portq_mutex);    /* event queue can be reused now */
   1381 
   1382 	if (model == DATAMODEL_NATIVE) {
   1383 		eventsz = sizeof (port_event_t);
   1384 		kevp = kmem_alloc(eventsz * nmax, KM_NOSLEEP);
   1385 		if (kevp == NULL) {
   1386 			if (nmax > pp->port_max_list)
   1387 				nmax = pp->port_max_list;
   1388 			kevp = kmem_alloc(eventsz * nmax, KM_SLEEP);
   1389 		}
   1390 		results = kevp;
   1391 		lev = NULL;	/* start with first event in the queue */
   1392 		for (nevents = 0; nevents < nmax; ) {
   1393 			pev = port_get_kevent(glist, lev);
   1394 			if (pev == NULL)	/* no more events available */
   1395 				break;
   1396 			if (pev->portkev_flags & PORT_KEV_FREE) {
   1397 				/* Just discard event */
   1398 				list_remove(glist, pev);
   1399 				pev->portkev_flags &= ~(PORT_CLEANUP_DONE);
   1400 				if (PORT_FREE_EVENT(pev))
   1401 					port_free_event_local(pev, 0);
   1402 				tnent--;
   1403 				continue;
   1404 			}
   1405 
   1406 			/* move event data to copyout list */
   1407 			if (port_copy_event(&kevp[nevents], pev, glist)) {
   1408 				/*
   1409 				 * Event can not be delivered to the
   1410 				 * current process.
   1411 				 */
   1412 				if (lev != NULL)
   1413 					list_insert_after(glist, lev, pev);
   1414 				else
   1415 					list_insert_head(glist, pev);
   1416 				lev = pev;  /* last checked event */
   1417 			} else {
   1418 				nevents++;	/* # of events ready */
   1419 			}
   1420 		}
   1421 #ifdef	_SYSCALL32_IMPL
   1422 	} else {
   1423 		port_event32_t	*kevp32;
   1424 
   1425 		eventsz = sizeof (port_event32_t);
   1426 		kevp32 = kmem_alloc(eventsz * nmax, KM_NOSLEEP);
   1427 		if (kevp32 == NULL) {
   1428 			if (nmax > pp->port_max_list)
   1429 				nmax = pp->port_max_list;
   1430 			kevp32 = kmem_alloc(eventsz * nmax, KM_SLEEP);
   1431 		}
   1432 		results = kevp32;
   1433 		lev = NULL;	/* start with first event in the queue */
   1434 		for (nevents = 0; nevents < nmax; ) {
   1435 			pev = port_get_kevent(glist, lev);
   1436 			if (pev == NULL)	/* no more events available */
   1437 				break;
   1438 			if (pev->portkev_flags & PORT_KEV_FREE) {
   1439 				/* Just discard event */
   1440 				list_remove(glist, pev);
   1441 				pev->portkev_flags &= ~(PORT_CLEANUP_DONE);
   1442 				if (PORT_FREE_EVENT(pev))
   1443 					port_free_event_local(pev, 0);
   1444 				tnent--;
   1445 				continue;
   1446 			}
   1447 
   1448 			/* move event data to copyout list */
   1449 			if (port_copy_event32(&kevp32[nevents], pev, glist)) {
   1450 				/*
   1451 				 * Event can not be delivered to the
   1452 				 * current process.
   1453 				 */
   1454 				if (lev != NULL)
   1455 					list_insert_after(glist, lev, pev);
   1456 				else
   1457 					list_insert_head(glist, pev);
   1458 				lev = pev;  /* last checked event */
   1459 			} else {
   1460 				nevents++;	/* # of events ready */
   1461 			}
   1462 		}
   1463 #endif	/* _SYSCALL32_IMPL */
   1464 	}
   1465 
   1466 	/*
   1467 	 *  Remember number of remaining events in the temporary event queue.
   1468 	 */
   1469 	portq->portq_tnent = tnent - nevents;
   1470 
   1471 	/*
   1472 	 * Work to do before return :
   1473 	 * - push list of remaining events back to the top of the standard
   1474 	 *   port queue.
   1475 	 * - if this is the last thread calling port_get(n) then wakeup the
   1476 	 *   thread waiting on close(2).
   1477 	 * - check for a deferred cv_signal from port_send_event() and wakeup
   1478 	 *   the sleeping thread.
   1479 	 */
   1480 
   1481 	mutex_enter(&portq->portq_mutex);
   1482 	port_unblock(portq);
   1483 	if (portq->portq_tnent) {
   1484 		/*
   1485 		 * move remaining events in the temporary event queue back
   1486 		 * to the port event queue
   1487 		 */
   1488 		port_push_eventq(portq);
   1489 	}
   1490 	portq->portq_getn--;	/* update # of threads retrieving events */
   1491 	if (--portq->portq_thrcnt == 0) { /* # of threads waiting ... */
   1492 		/* Last thread => check close(2) conditions ... */
   1493 		if (portq->portq_flags & PORTQ_CLOSE) {
   1494 			cv_signal(&portq->portq_closecv);
   1495 			mutex_exit(&portq->portq_mutex);
   1496 			kmem_free(results, eventsz * nmax);
   1497 			/* do not copyout events */
   1498 			*nget = 0;
   1499 			return (EBADFD);
   1500 		}
   1501 	} else if (portq->portq_getn == 0) {
   1502 		/*
   1503 		 * no other threads retrieving events ...
   1504 		 * check wakeup conditions of sleeping threads
   1505 		 */
   1506 		if ((portq->portq_thread != NULL) &&
   1507 		    (portq->portq_nent >= portq->portq_nget))
   1508 			cv_signal(&portq->portq_thread->portget_cv);
   1509 	}
   1510 
   1511 	/*
   1512 	 * Check PORTQ_POLLIN here because the current thread set temporarily
   1513 	 * the number of events in the queue to zero.
   1514 	 */
   1515 	if (portq->portq_flags & PORTQ_POLLIN) {
   1516 		portq->portq_flags &= ~PORTQ_POLLIN;
   1517 		mutex_exit(&portq->portq_mutex);
   1518 		pollwakeup(&pp->port_pollhd, POLLIN);
   1519 	} else {
   1520 		mutex_exit(&portq->portq_mutex);
   1521 	}
   1522 
   1523 	/* now copyout list of user event structures to user space */
   1524 	if (nevents) {
   1525 		if (copyout(results, uevp, nevents * eventsz))
   1526 			error = EFAULT;
   1527 	}
   1528 	kmem_free(results, eventsz * nmax);
   1529 
   1530 	if (nevents == 0 && error == 0 && pgt->pgt_loop == 0 && blocking != 0) {
   1531 		/* no events retrieved: check loop conditions */
   1532 		if (blocking == -1) {
   1533 			/* no timeout checked */
   1534 			error = port_get_timeout(pgt->pgt_timeout,
   1535 			    &pgt->pgt_rqtime, &rqtp, &blocking, flag);
   1536 			if (error) {
   1537 				*nget = nevents;
   1538 				return (error);
   1539 			}
   1540 			if (rqtp != NULL) {
   1541 				timespec_t	now;
   1542 				pgt->pgt_timecheck = timechanged;
   1543 				gethrestime(&now);
   1544 				timespecadd(&pgt->pgt_rqtime, &now);
   1545 			}
   1546 			pgt->pgt_rqtp = rqtp;
   1547 		} else {
   1548 			/* timeout already checked -> remember values */
   1549 			pgt->pgt_rqtp = rqtp;
   1550 			if (rqtp != NULL) {
   1551 				pgt->pgt_timecheck = timecheck;
   1552 				pgt->pgt_rqtime = *rqtp;
   1553 			}
   1554 		}
   1555 		if (blocking)
   1556 			/* timeout remaining */
   1557 			pgt->pgt_loop = 1;
   1558 	}
   1559 
   1560 	/* set number of user event structures completed */
   1561 	*nget = nevents;
   1562 	return (error);
   1563 }
   1564 
   1565 /*
   1566  * 1. copy kernel event structure to user event structure.
   1567  * 2. PORT_KEV_WIRED event structures will be reused by the "source"
   1568  * 3. Remove PORT_KEV_DONEQ flag (event removed from the event queue)
   1569  * 4. Other types of event structures can be delivered back to the port cache
   1570  *    (port_free_event_local()).
   1571  * 5. The event source callback function is the last opportunity for the
   1572  *    event source to update events, to free local resources associated with
   1573  *    the event or to deny the delivery of the event.
   1574  */
   1575 static int
   1576 port_copy_event(port_event_t *puevp, port_kevent_t *pkevp, list_t *list)
   1577 {
   1578 	int	free_event = 0;
   1579 	int	flags;
   1580 	int	error;
   1581 
   1582 	puevp->portev_source = pkevp->portkev_source;
   1583 	puevp->portev_object = pkevp->portkev_object;
   1584 	puevp->portev_user = pkevp->portkev_user;
   1585 	puevp->portev_events = pkevp->portkev_events;
   1586 
   1587 	/* remove event from the queue */
   1588 	list_remove(list, pkevp);
   1589 
   1590 	/*
   1591 	 * Events of type PORT_KEV_WIRED remain allocated by the
   1592 	 * event source.
   1593 	 */
   1594 	flags = pkevp->portkev_flags;
   1595 	if (pkevp->portkev_flags & PORT_KEV_WIRED)
   1596 		pkevp->portkev_flags &= ~PORT_KEV_DONEQ;
   1597 	else
   1598 		free_event = 1;
   1599 
   1600 	if (pkevp->portkev_callback) {
   1601 		error = (*pkevp->portkev_callback)(pkevp->portkev_arg,
   1602 		    &puevp->portev_events, pkevp->portkev_pid,
   1603 		    PORT_CALLBACK_DEFAULT, pkevp);
   1604 
   1605 		if (error) {
   1606 			/*
   1607 			 * Event can not be delivered.
   1608 			 * Caller must reinsert the event into the queue.
   1609 			 */
   1610 			pkevp->portkev_flags = flags;
   1611 			return (error);
   1612 		}
   1613 	}
   1614 	if (free_event)
   1615 		port_free_event_local(pkevp, 0);
   1616 	return (0);
   1617 }
   1618 
   1619 #ifdef	_SYSCALL32_IMPL
   1620 /*
   1621  * 1. copy kernel event structure to user event structure.
   1622  * 2. PORT_KEV_WIRED event structures will be reused by the "source"
   1623  * 3. Remove PORT_KEV_DONEQ flag (event removed from the event queue)
   1624  * 4. Other types of event structures can be delivered back to the port cache
   1625  *    (port_free_event_local()).
   1626  * 5. The event source callback function is the last opportunity for the
   1627  *    event source to update events, to free local resources associated with
   1628  *    the event or to deny the delivery of the event.
   1629  */
   1630 static int
   1631 port_copy_event32(port_event32_t *puevp, port_kevent_t *pkevp, list_t *list)
   1632 {
   1633 	int	free_event = 0;
   1634 	int	error;
   1635 	int	flags;
   1636 
   1637 	puevp->portev_source = pkevp->portkev_source;
   1638 	puevp->portev_object = (daddr32_t)pkevp->portkev_object;
   1639 	puevp->portev_user = (caddr32_t)(uintptr_t)pkevp->portkev_user;
   1640 	puevp->portev_events = pkevp->portkev_events;
   1641 
   1642 	/* remove event from the queue */
   1643 	list_remove(list, pkevp);
   1644 
   1645 	/*
   1646 	 * Events if type PORT_KEV_WIRED remain allocated by the
   1647 	 * sub-system (source).
   1648 	 */
   1649 
   1650 	flags = pkevp->portkev_flags;
   1651 	if (pkevp->portkev_flags & PORT_KEV_WIRED)
   1652 		pkevp->portkev_flags &= ~PORT_KEV_DONEQ;
   1653 	else
   1654 		free_event = 1;
   1655 
   1656 	if (pkevp->portkev_callback != NULL) {
   1657 		error = (*pkevp->portkev_callback)(pkevp->portkev_arg,
   1658 		    &puevp->portev_events, pkevp->portkev_pid,
   1659 		    PORT_CALLBACK_DEFAULT, pkevp);
   1660 		if (error) {
   1661 			/*
   1662 			 * Event can not be delivered.
   1663 			 * Caller must reinsert the event into the queue.
   1664 			 */
   1665 			pkevp->portkev_flags = flags;
   1666 			return (error);
   1667 		}
   1668 	}
   1669 	if (free_event)
   1670 		port_free_event_local(pkevp, 0);
   1671 	return (0);
   1672 }
   1673 #endif	/* _SYSCALL32_IMPL */
   1674 
   1675 /*
   1676  * copyout alert event.
   1677  */
   1678 static int
   1679 port_get_alert(port_alert_t *pa, port_event_t *uevp)
   1680 {
   1681 	model_t	model = get_udatamodel();
   1682 
   1683 	/* copyout alert event structures to user space */
   1684 	if (model == DATAMODEL_NATIVE) {
   1685 		port_event_t	uev;
   1686 		uev.portev_source = PORT_SOURCE_ALERT;
   1687 		uev.portev_object = pa->portal_object;
   1688 		uev.portev_events = pa->portal_events;
   1689 		uev.portev_user = pa->portal_user;
   1690 		if (copyout(&uev, uevp, sizeof (port_event_t)))
   1691 			return (EFAULT);
   1692 #ifdef	_SYSCALL32_IMPL
   1693 	} else {
   1694 		port_event32_t	uev32;
   1695 		uev32.portev_source = PORT_SOURCE_ALERT;
   1696 		uev32.portev_object = (daddr32_t)pa->portal_object;
   1697 		uev32.portev_events = pa->portal_events;
   1698 		uev32.portev_user = (daddr32_t)(uintptr_t)pa->portal_user;
   1699 		if (copyout(&uev32, uevp, sizeof (port_event32_t)))
   1700 			return (EFAULT);
   1701 #endif	/* _SYSCALL32_IMPL */
   1702 	}
   1703 	return (0);
   1704 }
   1705 
   1706 /*
   1707  * Check return conditions :
   1708  * - pending port close(2)
   1709  * - threads waiting for events
   1710  */
   1711 static void
   1712 port_check_return_cond(port_queue_t *portq)
   1713 {
   1714 	ASSERT(MUTEX_HELD(&portq->portq_mutex));
   1715 	portq->portq_thrcnt--;
   1716 	if (portq->portq_flags & PORTQ_CLOSE) {
   1717 		if (portq->portq_thrcnt == 0)
   1718 			cv_signal(&portq->portq_closecv);
   1719 		else
   1720 			cv_signal(&portq->portq_thread->portget_cv);
   1721 	}
   1722 }
   1723 
   1724 /*
   1725  * The port_get_kevent() function returns
   1726  * - the event located at the head of the queue if 'last' pointer is NULL
   1727  * - the next event after the event pointed by 'last'
   1728  * The caller of this function is responsible for the integrity of the queue
   1729  * in use:
   1730  * - port_getn() is using a temporary queue protected with port_block().
   1731  * - port_close_events() is working on the global event queue and protects
   1732  *   the queue with portq->portq_mutex.
   1733  */
   1734 port_kevent_t *
   1735 port_get_kevent(list_t *list, port_kevent_t *last)
   1736 {
   1737 	if (last == NULL)
   1738 		return (list_head(list));
   1739 	else
   1740 		return (list_next(list, last));
   1741 }
   1742 
   1743 /*
   1744  * The port_get_timeout() function gets the timeout data from user space
   1745  * and converts that info into a corresponding internal representation.
   1746  * The kerneldata flag means that the timeout data is already loaded.
   1747  */
   1748 static int
   1749 port_get_timeout(timespec_t *timeout, timespec_t *rqtime, timespec_t **rqtp,
   1750     int *blocking, int kerneldata)
   1751 {
   1752 	model_t	model = get_udatamodel();
   1753 
   1754 	*rqtp = NULL;
   1755 	if (timeout == NULL) {
   1756 		*blocking = 1;
   1757 		return (0);
   1758 	}
   1759 
   1760 	if (kerneldata) {
   1761 		*rqtime = *timeout;
   1762 	} else {
   1763 		if (model == DATAMODEL_NATIVE) {
   1764 			if (copyin(timeout, rqtime, sizeof (*rqtime)))
   1765 				return (EFAULT);
   1766 #ifdef	_SYSCALL32_IMPL
   1767 		} else {
   1768 			timespec32_t 	wait_time_32;
   1769 			if (copyin(timeout, &wait_time_32,
   1770 			    sizeof (wait_time_32)))
   1771 				return (EFAULT);
   1772 			TIMESPEC32_TO_TIMESPEC(rqtime, &wait_time_32);
   1773 #endif  /* _SYSCALL32_IMPL */
   1774 		}
   1775 	}
   1776 
   1777 	if (rqtime->tv_sec == 0 && rqtime->tv_nsec == 0) {
   1778 		*blocking = 0;
   1779 		return (0);
   1780 	}
   1781 
   1782 	if (rqtime->tv_sec < 0 ||
   1783 	    rqtime->tv_nsec < 0 || rqtime->tv_nsec >= NANOSEC)
   1784 		return (EINVAL);
   1785 
   1786 	*rqtp = rqtime;
   1787 	*blocking = 1;
   1788 	return (0);
   1789 }
   1790 
   1791 /*
   1792  * port_queue_thread()
   1793  * Threads requiring more events than available will be put in a wait queue.
   1794  * There is a "thread wait queue" per port.
   1795  * Threads requiring less events get a higher priority than others and they
   1796  * will be awoken first.
   1797  */
   1798 static portget_t *
   1799 port_queue_thread(port_queue_t *portq, uint_t nget)
   1800 {
   1801 	portget_t	*pgetp;
   1802 	portget_t	*ttp;
   1803 	portget_t	*htp;
   1804 
   1805 	pgetp = kmem_zalloc(sizeof (portget_t), KM_SLEEP);
   1806 	pgetp->portget_nget = nget;
   1807 	pgetp->portget_pid = curproc->p_pid;
   1808 	if (portq->portq_thread == NULL) {
   1809 		/* first waiting thread */
   1810 		portq->portq_thread = pgetp;
   1811 		portq->portq_nget = nget;
   1812 		pgetp->portget_prev = pgetp;
   1813 		pgetp->portget_next = pgetp;
   1814 		return (pgetp);
   1815 	}
   1816 
   1817 	/*
   1818 	 * thread waiting for less events will be set on top of the queue.
   1819 	 */
   1820 	ttp = portq->portq_thread;
   1821 	htp = ttp;
   1822 	for (;;) {
   1823 		if (nget <= ttp->portget_nget)
   1824 			break;
   1825 		if (htp == ttp->portget_next)
   1826 			break;	/* last event */
   1827 		ttp = ttp->portget_next;
   1828 	}
   1829 
   1830 	/* add thread to the queue */
   1831 	pgetp->portget_next = ttp;
   1832 	pgetp->portget_prev = ttp->portget_prev;
   1833 	ttp->portget_prev->portget_next = pgetp;
   1834 	ttp->portget_prev = pgetp;
   1835 	if (portq->portq_thread == ttp)
   1836 		portq->portq_thread = pgetp;
   1837 	portq->portq_nget = portq->portq_thread->portget_nget;
   1838 	return (pgetp);
   1839 }
   1840 
   1841 /*
   1842  * Take thread out of the queue.
   1843  */
   1844 static void
   1845 port_dequeue_thread(port_queue_t *portq, portget_t *pgetp)
   1846 {
   1847 	if (pgetp->portget_next == pgetp) {
   1848 		/* last (single) waiting thread */
   1849 		portq->portq_thread = NULL;
   1850 		portq->portq_nget = 0;
   1851 	} else {
   1852 		pgetp->portget_prev->portget_next = pgetp->portget_next;
   1853 		pgetp->portget_next->portget_prev = pgetp->portget_prev;
   1854 		if (portq->portq_thread == pgetp)
   1855 			portq->portq_thread = pgetp->portget_next;
   1856 		portq->portq_nget = portq->portq_thread->portget_nget;
   1857 	}
   1858 	kmem_free(pgetp, sizeof (portget_t));
   1859 }
   1860 
   1861 /*
   1862  * Set up event port kstats.
   1863  */
   1864 static void
   1865 port_kstat_init()
   1866 {
   1867 	kstat_t	*ksp;
   1868 	uint_t	ndata;
   1869 
   1870 	ndata = sizeof (port_kstat) / sizeof (kstat_named_t);
   1871 	ksp = kstat_create("portfs", 0, "Event Ports", "misc",
   1872 	    KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_VIRTUAL);
   1873 	if (ksp) {
   1874 		ksp->ks_data = &port_kstat;
   1875 		kstat_install(ksp);
   1876 	}
   1877 }
   1878