Home | History | Annotate | Download | only in io
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 
     27 /*
     28  * This driver attempts to emulate some of the the behaviors of
     29  * Linux terminal devices (/dev/ptmx and /dev/pts/[0-9][0-9]*) on Solaris
     30  *
     31  * It does this by layering over the /dev/ptmx device and intercepting
     32  * opens to it.
     33  *
     34  * This driver makes the following assumptions about the way the ptm/pts
     35  * drivers on Solaris work:
     36  *
     37  *    - all opens of the /dev/ptmx device node return a unique dev_t.
     38  *
     39  *    - the dev_t minor node value for each open ptm instance corrospondes
     40  *      to it's associated slave terminal device number.  ie. the path to
     41  *      the slave terminal device associated with an open ptm instance
     42  *      who's dev_t minor node vaue is 5, is /dev/pts/5.
     43  *
     44  *    - the ptm driver always allocates the lowest numbered slave terminal
     45  *      device possible.
     46  */
     47 
     48 #include <sys/conf.h>
     49 #include <sys/ddi.h>
     50 #include <sys/devops.h>
     51 #include <sys/file.h>
     52 #include <sys/filio.h>
     53 #include <sys/kstr.h>
     54 #include <sys/ldlinux.h>
     55 #include <sys/lx_ptm.h>
     56 #include <sys/modctl.h>
     57 #include <sys/pathname.h>
     58 #include <sys/ptms.h>
     59 #include <sys/ptyvar.h>
     60 #include <sys/stat.h>
     61 #include <sys/stropts.h>
     62 #include <sys/sunddi.h>
     63 #include <sys/sunldi.h>
     64 #include <sys/sysmacros.h>
     65 #include <sys/types.h>
     66 
     67 #define	LP_PTM_PATH		"/dev/ptmx"
     68 #define	LP_PTS_PATH		"/dev/pts/"
     69 #define	LP_PTS_DRV_NAME		"pts"
     70 #define	LP_PTS_USEC_DELAY	(5 * 1000)	/* 5 ms */
     71 #define	LP_PTS_USEC_DELAY_MAX	(5 * MILLISEC)	/* 5 ms */
     72 
     73 /*
     74  * this driver is layered on top of the ptm driver.  we'd like to
     75  * make this drivers minor name space a mirror of the ptm drivers
     76  * namespace, but we can't actually do this.  the reason is that the
     77  * ptm driver is opened via the clone driver.  there for no minor nodes
     78  * of the ptm driver are actually accessible via the filesystem.
     79  * since we're not a streams device we can't be opened by the clone
     80  * driver.  there for we need to have at least minor node accessible
     81  * via the filesystem so that consumers can open it.  we use the device
     82  * node with a minor number of 0 for this purpose.  what this means is
     83  * that minor node 0 can't be used to map ptm minor node 0.  since this
     84  * minor node is now reserved we need to shift our ptm minor node
     85  * mappings by one.  ie. a ptm minor node with a value of 0 will
     86  * corrospond to our minor node with a value of 1.  these mappings are
     87  * managed with the following macros.
     88  */
     89 #define	DEVT_TO_INDEX(x)	LX_PTM_DEV_TO_PTS(x)
     90 #define	INDEX_TO_MINOR(x)	((x) + 1)
     91 
     92 /*
     93  * grow our layered handle array by the same size increment that the ptm
     94  * driver uses to grow the pty device space - PTY_MAXDELTA
     95  */
     96 #define	LP_PTY_INC	128
     97 
     98 /*
     99  * lx_ptm_ops contains state information about outstanding operations on the
    100  * underlying master terminal device.  Currently we only track information
    101  * for read operations.
    102  *
    103  * Note that this data has not been rolled directly into the lx_ptm_handle
    104  * structure because we can't put mutex's of condition variables into
    105  * lx_ptm_handle structure.  The reason is that the array of lx_ptm_handle
    106  * structures linked to from the global lx_ptm state can be resized
    107  * dynamically, and when it's resized, the new array is at a different
    108  * memory location and the old array memory is discarded.  Mutexs and cvs
    109  * are accessed based off their address, so if this array was re-sized while
    110  * there were outstanding operations on any mutexs or cvs in the array
    111  * then the system would tip over.  In the future the lx_ptm_handle structure
    112  * array should probably be replaced with either an array of pointers to
    113  * lx_ptm_handle structures or some other kind of data structure containing
    114  * pointers to lx_ptm_handle structures.  Then the lx_ptm_ops structure
    115  * could be folded directly into the lx_ptm_handle structures.  (This will
    116  * also require the definition of a new locking mechanism to protect the
    117  * contents of lx_ptm_handle structures.)
    118  */
    119 typedef struct lx_ptm_ops {
    120 	int			lpo_rops;
    121 	kcondvar_t		lpo_rops_cv;
    122 	kmutex_t		lpo_rops_lock;
    123 } lx_ptm_ops_t;
    124 
    125 /*
    126  * Every open of the master terminal device in a zone results in a new
    127  * lx_ptm_handle handle allocation.  These handles are stored in an array
    128  * hanging off the lx_ptm_state structure.
    129  */
    130 typedef struct lx_ptm_handle {
    131 	/* Device handle to the underlying real /dev/ptmx master terminal. */
    132 	ldi_handle_t		lph_handle;
    133 
    134 	/* Flag to indicate if TIOCPKT mode has been enabled. */
    135 	int			lph_pktio;
    136 
    137 	/* Number of times the slave device has been opened/closed. */
    138 	int			lph_eofed;
    139 
    140 	/* Callback handler in the ptm driver to check if slave is open. */
    141 	ptmptsopencb_t		lph_ppocb;
    142 
    143 	/* Pointer to state for operations on underlying device. */
    144 	lx_ptm_ops_t		*lph_lpo;
    145 } lx_ptm_handle_t;
    146 
    147 /*
    148  * Global state for the lx_ptm driver.
    149  */
    150 typedef struct lx_ptm_state {
    151 	/* lx_ptm device devinfo pointer */
    152 	dev_info_t		*lps_dip;
    153 
    154 	/* LDI ident used to open underlying real /dev/ptmx master terminals. */
    155 	ldi_ident_t		lps_li;
    156 
    157 	/* pts drivers major number */
    158 	major_t			lps_pts_major;
    159 
    160 	/* rw lock used to manage access and growth of lps_lh_array */
    161 	krwlock_t		lps_lh_rwlock;
    162 
    163 	/* number of elements in lps_lh_array */
    164 	uint_t			lps_lh_count;
    165 
    166 	/* Array of handles to underlying real /dev/ptmx master terminals. */
    167 	lx_ptm_handle_t		*lps_lh_array;
    168 } lx_ptm_state_t;
    169 
    170 /* Pointer to the lx_ptm global state structure. */
    171 static lx_ptm_state_t	lps;
    172 
    173 /*
    174  * List of modules to be autopushed onto slave terminal devices when they
    175  * are opened in an lx branded zone.
    176  */
    177 static char *lx_pts_mods[] = {
    178 	"ptem",
    179 	"ldterm",
    180 	"ttcompat",
    181 	LDLINUX_MOD,
    182 	NULL
    183 };
    184 
    185 static void
    186 lx_ptm_lh_grow(uint_t index)
    187 {
    188 	uint_t			new_lh_count, old_lh_count;
    189 	lx_ptm_handle_t		*new_lh_array, *old_lh_array;
    190 
    191 	/*
    192 	 * allocate a new array.  we drop the rw lock on the array so that
    193 	 * readers can still access devices in case our memory allocation
    194 	 * blocks.
    195 	 */
    196 	new_lh_count = MAX(lps.lps_lh_count + LP_PTY_INC, index + 1);
    197 	new_lh_array =
    198 	    kmem_zalloc(sizeof (lx_ptm_handle_t) * new_lh_count, KM_SLEEP);
    199 
    200 	/*
    201 	 * double check that we still actually need to increase the size
    202 	 * of the array
    203 	 */
    204 	rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
    205 	if (index < lps.lps_lh_count) {
    206 		/* someone beat us to it so there's nothing more to do */
    207 		rw_exit(&lps.lps_lh_rwlock);
    208 		kmem_free(new_lh_array,
    209 		    sizeof (lx_ptm_handle_t) * new_lh_count);
    210 		return;
    211 	}
    212 
    213 	/* copy the existing data into the new array */
    214 	ASSERT((lps.lps_lh_count != 0) || (lps.lps_lh_array == NULL));
    215 	ASSERT((lps.lps_lh_count == 0) || (lps.lps_lh_array != NULL));
    216 	if (lps.lps_lh_count != 0) {
    217 		bcopy(lps.lps_lh_array, new_lh_array,
    218 		    sizeof (lx_ptm_handle_t) * lps.lps_lh_count);
    219 	}
    220 
    221 	/* save info on the old array */
    222 	old_lh_array = lps.lps_lh_array;
    223 	old_lh_count = lps.lps_lh_count;
    224 
    225 	/* install the new array */
    226 	lps.lps_lh_array = new_lh_array;
    227 	lps.lps_lh_count = new_lh_count;
    228 
    229 	rw_exit(&lps.lps_lh_rwlock);
    230 
    231 	/* free the old array */
    232 	if (old_lh_array != NULL) {
    233 		kmem_free(old_lh_array,
    234 		    sizeof (lx_ptm_handle_t) * old_lh_count);
    235 	}
    236 }
    237 
    238 static void
    239 lx_ptm_lh_insert(uint_t index, ldi_handle_t lh)
    240 {
    241 	lx_ptm_ops_t *lpo;
    242 
    243 	ASSERT(lh != NULL);
    244 
    245 	/* Allocate and initialize the ops structure */
    246 	lpo = kmem_zalloc(sizeof (lx_ptm_ops_t), KM_SLEEP);
    247 	mutex_init(&lpo->lpo_rops_lock, NULL, MUTEX_DEFAULT, NULL);
    248 	cv_init(&lpo->lpo_rops_cv, NULL, CV_DEFAULT, NULL);
    249 
    250 	rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
    251 
    252 	/* check if we need to grow the size of the layered handle array */
    253 	if (index >= lps.lps_lh_count) {
    254 		rw_exit(&lps.lps_lh_rwlock);
    255 		lx_ptm_lh_grow(index);
    256 		rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
    257 	}
    258 
    259 	ASSERT(index < lps.lps_lh_count);
    260 	ASSERT(lps.lps_lh_array[index].lph_handle == NULL);
    261 	ASSERT(lps.lps_lh_array[index].lph_pktio == 0);
    262 	ASSERT(lps.lps_lh_array[index].lph_eofed == 0);
    263 	ASSERT(lps.lps_lh_array[index].lph_lpo == NULL);
    264 
    265 	/* insert the new handle and return */
    266 	lps.lps_lh_array[index].lph_handle = lh;
    267 	lps.lps_lh_array[index].lph_pktio = 0;
    268 	lps.lps_lh_array[index].lph_eofed = 0;
    269 	lps.lps_lh_array[index].lph_lpo = lpo;
    270 
    271 	rw_exit(&lps.lps_lh_rwlock);
    272 }
    273 
    274 static ldi_handle_t
    275 lx_ptm_lh_remove(uint_t index)
    276 {
    277 	ldi_handle_t	lh;
    278 
    279 	rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
    280 
    281 	ASSERT(index < lps.lps_lh_count);
    282 	ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
    283 	ASSERT(lps.lps_lh_array[index].lph_lpo->lpo_rops == 0);
    284 	ASSERT(!MUTEX_HELD(&lps.lps_lh_array[index].lph_lpo->lpo_rops_lock));
    285 
    286 	/* free the write handle */
    287 	kmem_free(lps.lps_lh_array[index].lph_lpo, sizeof (lx_ptm_ops_t));
    288 	lps.lps_lh_array[index].lph_lpo = NULL;
    289 
    290 	/* remove the handle and return it */
    291 	lh = lps.lps_lh_array[index].lph_handle;
    292 	lps.lps_lh_array[index].lph_handle = NULL;
    293 	lps.lps_lh_array[index].lph_pktio = 0;
    294 	lps.lps_lh_array[index].lph_eofed = 0;
    295 	rw_exit(&lps.lps_lh_rwlock);
    296 	return (lh);
    297 }
    298 
    299 static void
    300 lx_ptm_lh_get_ppocb(uint_t index, ptmptsopencb_t *ppocb)
    301 {
    302 	rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
    303 
    304 	ASSERT(index < lps.lps_lh_count);
    305 	ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
    306 
    307 	*ppocb = lps.lps_lh_array[index].lph_ppocb;
    308 	rw_exit(&lps.lps_lh_rwlock);
    309 }
    310 
    311 static void
    312 lx_ptm_lh_set_ppocb(uint_t index, ptmptsopencb_t *ppocb)
    313 {
    314 	rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
    315 
    316 	ASSERT(index < lps.lps_lh_count);
    317 	ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
    318 
    319 	lps.lps_lh_array[index].lph_ppocb = *ppocb;
    320 	rw_exit(&lps.lps_lh_rwlock);
    321 }
    322 
    323 static ldi_handle_t
    324 lx_ptm_lh_lookup(uint_t index)
    325 {
    326 	ldi_handle_t	lh;
    327 
    328 	rw_enter(&lps.lps_lh_rwlock, RW_READER);
    329 
    330 	ASSERT(index < lps.lps_lh_count);
    331 	ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
    332 
    333 	/* return the handle */
    334 	lh = lps.lps_lh_array[index].lph_handle;
    335 	rw_exit(&lps.lps_lh_rwlock);
    336 	return (lh);
    337 }
    338 
    339 static lx_ptm_ops_t *
    340 lx_ptm_lpo_lookup(uint_t index)
    341 {
    342 	lx_ptm_ops_t	*lpo;
    343 
    344 	rw_enter(&lps.lps_lh_rwlock, RW_READER);
    345 
    346 	ASSERT(index < lps.lps_lh_count);
    347 	ASSERT(lps.lps_lh_array[index].lph_lpo != NULL);
    348 
    349 	/* return the handle */
    350 	lpo = lps.lps_lh_array[index].lph_lpo;
    351 	rw_exit(&lps.lps_lh_rwlock);
    352 	return (lpo);
    353 }
    354 
    355 static int
    356 lx_ptm_lh_pktio_get(uint_t index)
    357 {
    358 	int		pktio;
    359 
    360 	rw_enter(&lps.lps_lh_rwlock, RW_READER);
    361 
    362 	ASSERT(index < lps.lps_lh_count);
    363 	ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
    364 
    365 	/* return the pktio state */
    366 	pktio = lps.lps_lh_array[index].lph_pktio;
    367 	rw_exit(&lps.lps_lh_rwlock);
    368 	return (pktio);
    369 }
    370 
    371 static void
    372 lx_ptm_lh_pktio_set(uint_t index, int pktio)
    373 {
    374 	rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
    375 
    376 	ASSERT(index < lps.lps_lh_count);
    377 	ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
    378 
    379 	/* set the pktio state */
    380 	lps.lps_lh_array[index].lph_pktio = pktio;
    381 	rw_exit(&lps.lps_lh_rwlock);
    382 }
    383 
    384 static int
    385 lx_ptm_lh_eofed_get(uint_t index)
    386 {
    387 	int		eofed;
    388 
    389 	rw_enter(&lps.lps_lh_rwlock, RW_READER);
    390 
    391 	ASSERT(index < lps.lps_lh_count);
    392 	ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
    393 
    394 	/* return the eofed state */
    395 	eofed = lps.lps_lh_array[index].lph_eofed;
    396 	rw_exit(&lps.lps_lh_rwlock);
    397 	return (eofed);
    398 }
    399 
    400 static void
    401 lx_ptm_lh_eofed_set(uint_t index)
    402 {
    403 	rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
    404 
    405 	ASSERT(index < lps.lps_lh_count);
    406 	ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
    407 
    408 	/* set the eofed state */
    409 	lps.lps_lh_array[index].lph_eofed++;
    410 	rw_exit(&lps.lps_lh_rwlock);
    411 }
    412 
    413 static int
    414 lx_ptm_read_start(dev_t dev)
    415 {
    416 	lx_ptm_ops_t	*lpo = lx_ptm_lpo_lookup(DEVT_TO_INDEX(dev));
    417 
    418 	mutex_enter(&lpo->lpo_rops_lock);
    419 	ASSERT(lpo->lpo_rops >= 0);
    420 
    421 	/* Wait for other read operations to finish */
    422 	while (lpo->lpo_rops != 0) {
    423 		if (cv_wait_sig(&lpo->lpo_rops_cv, &lpo->lpo_rops_lock) == 0) {
    424 			mutex_exit(&lpo->lpo_rops_lock);
    425 			return (-1);
    426 		}
    427 	}
    428 
    429 	/* Start a read operation */
    430 	VERIFY(++lpo->lpo_rops == 1);
    431 	mutex_exit(&lpo->lpo_rops_lock);
    432 	return (0);
    433 }
    434 
    435 static void
    436 lx_ptm_read_end(dev_t dev)
    437 {
    438 	lx_ptm_ops_t	*lpo = lx_ptm_lpo_lookup(DEVT_TO_INDEX(dev));
    439 
    440 	mutex_enter(&lpo->lpo_rops_lock);
    441 	ASSERT(lpo->lpo_rops >= 0);
    442 
    443 	/* End a read operation */
    444 	VERIFY(--lpo->lpo_rops == 0);
    445 	cv_signal(&lpo->lpo_rops_cv);
    446 
    447 	mutex_exit(&lpo->lpo_rops_lock);
    448 }
    449 
    450 static int
    451 lx_ptm_pts_isopen(dev_t dev)
    452 {
    453 	ptmptsopencb_t	ppocb;
    454 
    455 	lx_ptm_lh_get_ppocb(DEVT_TO_INDEX(dev), &ppocb);
    456 	return (ppocb.ppocb_func(ppocb.ppocb_arg));
    457 }
    458 
    459 static void
    460 lx_ptm_eof_read(ldi_handle_t lh)
    461 {
    462 	struct uio	uio;
    463 	iovec_t		iov;
    464 	char		junk[1];
    465 
    466 	/*
    467 	 * We can remove any EOF message from the head of the stream by
    468 	 * doing a zero byte read from the stream.
    469 	 */
    470 	iov.iov_len = 0;
    471 	iov.iov_base = junk;
    472 	uio.uio_iovcnt = 1;
    473 	uio.uio_iov = &iov;
    474 	uio.uio_resid = iov.iov_len;
    475 	uio.uio_offset = 0;
    476 	uio.uio_segflg = UIO_SYSSPACE;
    477 	uio.uio_fmode = 0;
    478 	uio.uio_extflg = 0;
    479 	uio.uio_llimit = MAXOFFSET_T;
    480 	(void) ldi_read(lh, &uio, kcred);
    481 }
    482 
    483 static int
    484 lx_ptm_eof_drop_1(dev_t dev, int *rvalp)
    485 {
    486 	ldi_handle_t	lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
    487 	int		err, msg_size, msg_count;
    488 
    489 	*rvalp = 0;
    490 
    491 	/*
    492 	 * Check if there is an EOF message (represented by a zero length
    493 	 * data message) at the head of the stream.  Note that the
    494 	 * I_NREAD ioctl is a streams framework ioctl so it will succeed
    495 	 * even if there have been previous write errors on this stream.
    496 	 */
    497 	if ((err = ldi_ioctl(lh, I_NREAD, (intptr_t)&msg_size,
    498 	    FKIOCTL, kcred, &msg_count)) != 0)
    499 		return (err);
    500 
    501 	if ((msg_count == 0) || (msg_size != 0)) {
    502 		/* No EOF message found */
    503 		return (0);
    504 	}
    505 
    506 	/* Record the fact that the slave device has been closed. */
    507 	lx_ptm_lh_eofed_set(DEVT_TO_INDEX(dev));
    508 
    509 	/* drop the EOF */
    510 	lx_ptm_eof_read(lh);
    511 	*rvalp = 1;
    512 	return (0);
    513 }
    514 
    515 static int
    516 lx_ptm_eof_drop(dev_t dev, int *rvalp)
    517 {
    518 	int rval, err;
    519 
    520 	if (rvalp != NULL)
    521 		*rvalp = 0;
    522 	for (;;) {
    523 		if ((err = lx_ptm_eof_drop_1(dev, &rval)) != 0)
    524 			return (err);
    525 		if (rval == 0)
    526 			return (0);
    527 		if (rvalp != NULL)
    528 			*rvalp = 1;
    529 	}
    530 }
    531 
    532 static int
    533 lx_ptm_data_check(dev_t dev, int ignore_eof, int *rvalp)
    534 {
    535 	ldi_handle_t	lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
    536 	int		err;
    537 
    538 	*rvalp = 0;
    539 	if (ignore_eof) {
    540 		int	size, rval;
    541 
    542 		if ((err = ldi_ioctl(lh, FIONREAD, (intptr_t)&size,
    543 		    FKIOCTL, kcred, &rval)) != 0)
    544 			return (err);
    545 		if (size != 0)
    546 			*rvalp = 1;
    547 	} else {
    548 		int	msg_size, msg_count;
    549 
    550 		if ((err = ldi_ioctl(lh, I_NREAD, (intptr_t)&msg_size,
    551 		    FKIOCTL, kcred, &msg_count)) != 0)
    552 			return (err);
    553 		if (msg_count != 0)
    554 			*rvalp = 1;
    555 	}
    556 	return (0);
    557 }
    558 
    559 static int
    560 lx_ptm_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
    561 {
    562 	int err;
    563 
    564 	if (cmd != DDI_ATTACH)
    565 		return (DDI_FAILURE);
    566 
    567 	if (ddi_create_minor_node(dip, LX_PTM_MINOR_NODE, S_IFCHR,
    568 	    ddi_get_instance(dip), DDI_PSEUDO, 0) != DDI_SUCCESS)
    569 		return (DDI_FAILURE);
    570 
    571 	err = ldi_ident_from_dip(dip, &lps.lps_li);
    572 	if (err != 0) {
    573 		ddi_remove_minor_node(dip, ddi_get_name(dip));
    574 		return (DDI_FAILURE);
    575 	}
    576 
    577 	lps.lps_dip = dip;
    578 	lps.lps_pts_major = ddi_name_to_major(LP_PTS_DRV_NAME);
    579 
    580 	rw_init(&lps.lps_lh_rwlock, NULL, RW_DRIVER, NULL);
    581 	lps.lps_lh_count = 0;
    582 	lps.lps_lh_array = NULL;
    583 
    584 	return (DDI_SUCCESS);
    585 }
    586 
    587 /*ARGSUSED*/
    588 static int
    589 lx_ptm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
    590 {
    591 	if (cmd != DDI_DETACH)
    592 		return (DDI_FAILURE);
    593 
    594 	ldi_ident_release(lps.lps_li);
    595 	lps.lps_dip = NULL;
    596 
    597 	ASSERT((lps.lps_lh_count != 0) || (lps.lps_lh_array == NULL));
    598 	ASSERT((lps.lps_lh_count == 0) || (lps.lps_lh_array != NULL));
    599 	if (lps.lps_lh_array != NULL) {
    600 		kmem_free(lps.lps_lh_array,
    601 		    sizeof (lx_ptm_handle_t) * lps.lps_lh_count);
    602 		lps.lps_lh_array = NULL;
    603 		lps.lps_lh_count = 0;
    604 	}
    605 
    606 	return (DDI_SUCCESS);
    607 }
    608 
    609 /*ARGSUSED*/
    610 static int
    611 lx_ptm_open(dev_t *devp, int flag, int otyp, cred_t *credp)
    612 {
    613 	struct strioctl	iocb;
    614 	ptmptsopencb_t	ppocb = { NULL, NULL };
    615 	ldi_handle_t	lh;
    616 	major_t		maj, our_major = getmajor(*devp);
    617 	minor_t		min, lastmin;
    618 	uint_t		index, anchor = 1;
    619 	dev_t		ptm_dev;
    620 	int		err, rval = 0;
    621 
    622 	/*
    623 	 * Don't support the FNDELAY flag and FNONBLOCK until we either
    624 	 * find a Linux app that opens /dev/ptmx with the O_NDELAY
    625 	 * or O_NONBLOCK flags explicitly, or until we create test cases
    626 	 * to determine how reads of master terminal devices opened with
    627 	 * these flags behave in different situations on Linux.  Supporting
    628 	 * these flags will involve enhancing our read implementation
    629 	 * and changing the way it deals with EOF notifications.
    630 	 */
    631 	if (flag & (FNDELAY | FNONBLOCK))
    632 		return (ENOTSUP);
    633 
    634 	/*
    635 	 * we're layered on top of the ptm driver so open that driver
    636 	 * first.  (note that we're opening /dev/ptmx in the global
    637 	 * zone, not ourselves in the Linux zone.)
    638 	 */
    639 	err = ldi_open_by_name(LP_PTM_PATH, flag, credp, &lh, lps.lps_li);
    640 	if (err != 0)
    641 		return (err);
    642 
    643 	/* get the devt returned by the ptmx open */
    644 	err = ldi_get_dev(lh, &ptm_dev);
    645 	if (err != 0) {
    646 		(void) ldi_close(lh, flag, credp);
    647 		return (err);
    648 	}
    649 
    650 	/*
    651 	 * we're a cloning driver so here's well change the devt that we
    652 	 * return.  the ptmx is also a cloning driver so we'll just use
    653 	 * it's minor number as our minor number (it already manages it's
    654 	 * minor name space so no reason to duplicate the effort.)
    655 	 */
    656 	index = getminor(ptm_dev);
    657 	*devp = makedevice(our_major, INDEX_TO_MINOR(index));
    658 
    659 	/* Get a callback function to query if the pts device is open. */
    660 	iocb.ic_cmd = PTMPTSOPENCB;
    661 	iocb.ic_timout = 0;
    662 	iocb.ic_len = sizeof (ppocb);
    663 	iocb.ic_dp = (char *)&ppocb;
    664 
    665 	err = ldi_ioctl(lh, I_STR, (intptr_t)&iocb, FKIOCTL, kcred, &rval);
    666 	if ((err != 0) || (rval != 0)) {
    667 		(void) ldi_close(lh, flag, credp);
    668 		return (EIO); /* XXX return something else here? */
    669 	}
    670 	ASSERT(ppocb.ppocb_func != NULL);
    671 
    672 	/*
    673 	 * now setup autopush for the terminal slave device.  this is
    674 	 * necessary so that when a Linux program opens the device we
    675 	 * can push required strmod modules onto the stream.  in Solaris
    676 	 * this is normally done by the application that actually
    677 	 * allocates the terminal.
    678 	 */
    679 	maj = lps.lps_pts_major;
    680 	min = index;
    681 	lastmin = 0;
    682 	err = kstr_autopush(SET_AUTOPUSH, &maj, &min, &lastmin,
    683 	    &anchor, lx_pts_mods);
    684 	if (err != 0) {
    685 		(void) ldi_close(lh, flag, credp);
    686 		return (EIO); /* XXX return something else here? */
    687 	}
    688 
    689 	/* save off this layered handle for future accesses */
    690 	lx_ptm_lh_insert(index, lh);
    691 	lx_ptm_lh_set_ppocb(index, &ppocb);
    692 	return (0);
    693 }
    694 
    695 /*ARGSUSED*/
    696 static int
    697 lx_ptm_close(dev_t dev, int flag, int otyp, cred_t *credp)
    698 {
    699 	ldi_handle_t	lh;
    700 	major_t		maj;
    701 	minor_t		min, lastmin;
    702 	uint_t		index;
    703 	int		err;
    704 
    705 	index = DEVT_TO_INDEX(dev);
    706 
    707 	/*
    708 	 * we must cleanup all the state associated with this major/minor
    709 	 * terminal pair before actually closing the ptm master device.
    710 	 * this is required because once the close of the ptm device is
    711 	 * complete major/minor terminal pair is immediatly available for
    712 	 * re-use in any zone.
    713 	 */
    714 
    715 	/* free up our saved reference for this layered handle */
    716 	lh = lx_ptm_lh_remove(index);
    717 
    718 	/* unconfigure autopush for the associated terminal slave device */
    719 	maj = lps.lps_pts_major;
    720 	min = index;
    721 	lastmin = 0;
    722 	do {
    723 		/*
    724 		 * we loop here because we don't want to release this ptm
    725 		 * node if autopush can't be disabled on the associated
    726 		 * slave device because then bad things could happen if
    727 		 * another brand were to get this terminal allocated
    728 		 * to them.
    729 		 *
    730 		 * XXX should we ever give up?
    731 		 */
    732 		err = kstr_autopush(CLR_AUTOPUSH, &maj, &min, &lastmin,
    733 		    0, NULL);
    734 	} while (err != 0);
    735 
    736 	err = ldi_close(lh, flag, credp);
    737 
    738 	/*
    739 	 * note that we don't have to bother with changing the permissions
    740 	 * on the associated slave device here.  the reason is that no one
    741 	 * can actually open the device untill it's associated master
    742 	 * device is re-opened, which will result in the permissions on
    743 	 * it being reset.
    744 	 */
    745 	return (err);
    746 }
    747 
    748 static int
    749 lx_ptm_read_loop(dev_t dev, struct uio *uiop, cred_t *credp, int *loop)
    750 {
    751 	ldi_handle_t	lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
    752 	int		err, rval;
    753 	struct uio	uio = *uiop;
    754 
    755 	*loop = 0;
    756 
    757 	/*
    758 	 * Here's another way that Linux master terminals behave differently
    759 	 * from Solaris master terminals.  If you do a read on a Linux
    760 	 * master terminal (that was opened witout NDELAY and NONBLOCK)
    761 	 * who's corrosponding slave terminal is currently closed and
    762 	 * has been opened and closed at least once, Linux return -1 and
    763 	 * set errno to EIO where as Solaris blocks.
    764 	 */
    765 	if (lx_ptm_lh_eofed_get(DEVT_TO_INDEX(dev))) {
    766 		/* Slave has been opened and closed at least once. */
    767 		if (lx_ptm_pts_isopen(dev) == 0) {
    768 			/*
    769 			 * Slave is closed.  Make sure that data is avaliable
    770 			 * before attempting a read.
    771 			 */
    772 			if ((err = lx_ptm_data_check(dev, 0, &rval)) != 0)
    773 				return (err);
    774 
    775 			/* If there is no data available then return. */
    776 			if (rval == 0)
    777 				return (EIO);
    778 		}
    779 	}
    780 
    781 	/* Actually do the read operation. */
    782 	if ((err = ldi_read(lh, uiop, credp)) != 0)
    783 		return (err);
    784 
    785 	/* If read returned actual data then return. */
    786 	if (uio.uio_resid != uiop->uio_resid)
    787 		return (0);
    788 
    789 	/*
    790 	 * This was a zero byte read (ie, an EOF).  This indicates
    791 	 * that the slave terinal device has been closed.  Record
    792 	 * the fact that the slave device has been closed and retry
    793 	 * the read operation.
    794 	 */
    795 	lx_ptm_lh_eofed_set(DEVT_TO_INDEX(dev));
    796 	*loop = 1;
    797 	return (0);
    798 }
    799 
    800 static int
    801 lx_ptm_read(dev_t dev, struct uio *uiop, cred_t *credp)
    802 {
    803 	int		pktio = lx_ptm_lh_pktio_get(DEVT_TO_INDEX(dev));
    804 	int		err, loop;
    805 	struct uio	uio;
    806 	struct iovec	iovp;
    807 
    808 	ASSERT(uiop->uio_iovcnt > 0);
    809 
    810 	/*
    811 	 * If packet mode has been enabled (via TIOCPKT) we need to pad
    812 	 * all read requests with a leading byte that indicates any
    813 	 * relevant control status information.
    814 	 */
    815 	if (pktio != 0) {
    816 		/*
    817 		 * We'd like to write the control information into
    818 		 * the current buffer but we can't yet.  We don't
    819 		 * want to modify userspace memory here only to have
    820 		 * the read operation fail later.  So instead
    821 		 * what we'll do here is read one character from the
    822 		 * beginning of the memory pointed to by the uio
    823 		 * structure.  This will advance the output pointer
    824 		 * by one.  Then when the read completes successfully
    825 		 * we can update the byte that we passed over.  Before
    826 		 * we do the read make a copy of the current uiop and
    827 		 * iovec structs so we can write to them later.
    828 		 */
    829 		uio = *uiop;
    830 		iovp = *uiop->uio_iov;
    831 		uio.uio_iov = &iovp;
    832 
    833 		if (uwritec(uiop) == -1)
    834 			return (EFAULT);
    835 	}
    836 
    837 	do {
    838 		/*
    839 		 * Before we actually attempt a read operation we need
    840 		 * to make sure there's some buffer space to actually
    841 		 * read in some data.  We do this because if we're in
    842 		 * pktio mode and the caller only requested one byte,
    843 		 * then we've already used up that one byte and we
    844 		 * don't want to pass this read request.  Doing a 0
    845 		 * byte read (unless there is a problem with the stream
    846 		 * head) always returns succcess.  Normally when a streams
    847 		 * read returns 0 bytes we interpret that as an EOF on
    848 		 * the stream (ie, the slave side has been opened and
    849 		 * closed) and we ignore it and re-try the read operation.
    850 		 * So if we pass on a 0 byte read here lx_ptm_read_loop()
    851 		 * will tell us to loop around and we'll end up in an
    852 		 * infinite loop.
    853 		 */
    854 		if (uiop->uio_resid == 0)
    855 			break;
    856 
    857 		/*
    858 		 * Serialize all reads.  We need to do this so that we can
    859 		 * properly emulate the behavior of master terminals on Linux.
    860 		 * In reality this serializaion should not pose any kind of
    861 		 * performance problem since it would be very strange to have
    862 		 * multiple threads trying to read from the same master
    863 		 * terminal device concurrently.
    864 		 */
    865 		if (lx_ptm_read_start(dev) != 0)
    866 			return (EINTR);
    867 
    868 		err = lx_ptm_read_loop(dev, uiop, credp, &loop);
    869 		lx_ptm_read_end(dev);
    870 		if (err != 0)
    871 			return (err);
    872 	} while (loop != 0);
    873 
    874 	if (pktio != 0) {
    875 		uint8_t		pktio_data = TIOCPKT_DATA;
    876 
    877 		/*
    878 		 * Note that the control status information we
    879 		 * pass back is faked up in the sense that we
    880 		 * don't actually report any events, we always
    881 		 * report a status of 0.
    882 		 */
    883 		if (uiomove(&pktio_data, 1, UIO_READ, &uio) != 0)
    884 			return (EFAULT);
    885 	}
    886 
    887 	return (0);
    888 }
    889 
    890 static int
    891 lx_ptm_write(dev_t dev, struct uio *uiop, cred_t *credp)
    892 {
    893 	ldi_handle_t		lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
    894 	int		err;
    895 
    896 	err = ldi_write(lh, uiop, credp);
    897 
    898 	return (err);
    899 }
    900 
    901 static int
    902 lx_ptm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
    903     int *rvalp)
    904 {
    905 	ldi_handle_t	lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
    906 	int		err;
    907 
    908 	/*
    909 	 * here we need to make sure that we never allow the
    910 	 * I_SETSIG and I_ESETSIG ioctls to pass through.  we
    911 	 * do this because we can't support them.
    912 	 *
    913 	 * the native Solaris ptm device supports these ioctls because
    914 	 * they are streams framework ioctls and all streams devices
    915 	 * support them by default.  these ioctls cause the current
    916 	 * process to be registered with a stream and receive signals
    917 	 * when certain stream events occur.
    918 	 *
    919 	 * a problem arises with cleanup of these registrations
    920 	 * for layered drivers.
    921 	 *
    922 	 * normally the streams framework is notified whenever a
    923 	 * process closes any reference to a stream and it goes ahead
    924 	 * and cleans up these registrations.  but actual device drivers
    925 	 * are not notified when a process performs a close operation
    926 	 * unless the process is closing the last opened reference to
    927 	 * the device on the entire system.
    928 	 *
    929 	 * so while we could pass these ioctls on and allow processes
    930 	 * to register for signal delivery, we would never receive
    931 	 * any notification when those processes exit (or close a
    932 	 * stream) and we wouldn't be able to unregister them.
    933 	 *
    934 	 * luckily these operations are streams specific and Linux
    935 	 * doesn't support streams devices.  so it doesn't actually
    936 	 * seem like we need to support these ioctls.  if it turns
    937 	 * out that we do need to support them for some reason in
    938 	 * the future, the current driver model will have to be
    939 	 * enhanced to better support streams device layering.
    940 	 */
    941 	if ((cmd == I_SETSIG) || (cmd == I_ESETSIG))
    942 		return (EINVAL);
    943 
    944 	/*
    945 	 * here we fake up support for TIOCPKT.  Linux applications expect
    946 	 * /etc/ptmx to support this ioctl, but on Solaris it doesn't.
    947 	 * (it is supported on older bsd style ptys.)  so we'll fake
    948 	 * up support for it here.
    949 	 *
    950 	 * the reason that this ioctl is emulated here instead of in
    951 	 * userland is that this ioctl affects the results returned
    952 	 * from read() operations.  if this ioctl was emulated in
    953 	 * userland the brand library would need to intercept all
    954 	 * read operations and check to see if pktio was enabled
    955 	 * for the fd being read from.  since this ioctl only needs
    956 	 * to be supported on the ptmx device it makes more sense
    957 	 * to support it here where we can easily update the results
    958 	 * returned for read() operations performed on ourselves.
    959 	 */
    960 	if (cmd == TIOCPKT) {
    961 		int	pktio;
    962 
    963 		if (ddi_copyin((void *)arg, &pktio, sizeof (pktio),
    964 		    mode) != DDI_SUCCESS)
    965 			return (EFAULT);
    966 
    967 		if (pktio == 0)
    968 			lx_ptm_lh_pktio_set(DEVT_TO_INDEX(dev), 0);
    969 		else
    970 			lx_ptm_lh_pktio_set(DEVT_TO_INDEX(dev), 1);
    971 
    972 		return (0);
    973 	}
    974 
    975 	err = ldi_ioctl(lh, cmd, arg, mode, credp, rvalp);
    976 
    977 	return (err);
    978 }
    979 
    980 static int
    981 lx_ptm_poll_loop(dev_t dev, short events, int anyyet, short *reventsp,
    982     struct pollhead **phpp, int *loop)
    983 {
    984 	ldi_handle_t	lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
    985 	short		reventsp2;
    986 	int		err, rval;
    987 
    988 	*loop = 0;
    989 
    990 	/*
    991 	 * If the slave device has been opened and closed at least
    992 	 * once and the slave device is currently closed, then poll
    993 	 * always needs to returns immediatly.
    994 	 */
    995 	if ((lx_ptm_lh_eofed_get(DEVT_TO_INDEX(dev)) != 0) &&
    996 	    (lx_ptm_pts_isopen(dev) == 0)) {
    997 		/* In this case always return POLLHUP */
    998 		*reventsp = POLLHUP;
    999 
   1000 		/*
   1001 		 * Check if there really is data on the stream.
   1002 		 * If so set the correct return flags.
   1003 		 */
   1004 		if ((err = lx_ptm_data_check(dev, 1, &rval)) != 0) {
   1005 			/* Something went wrong. */
   1006 			return (err);
   1007 		}
   1008 		if (rval != 0)
   1009 			*reventsp |= (events & (POLLIN | POLLRDNORM));
   1010 
   1011 		/*
   1012 		 * Is the user checking for writability?  Note that for ptm
   1013 		 * devices Linux seems to ignore the POLLWRBAND write flag.
   1014 		 */
   1015 		if ((events & POLLWRNORM) == 0)
   1016 			return (0);
   1017 
   1018 		/*
   1019 		 * To check if the stream is writable we have to actually
   1020 		 * call poll, but make sure to set anyyet to 1 to prevent
   1021 		 * the streams framework from setting up callbacks.
   1022 		 */
   1023 		if ((err = ldi_poll(lh, POLLWRNORM, 1, &reventsp2, NULL)) != 0)
   1024 			return (err);
   1025 
   1026 		*reventsp |= (reventsp2 & POLLWRNORM);
   1027 	} else {
   1028 		int lockstate;
   1029 
   1030 		/* The slave device is open, do the poll */
   1031 		if ((err = ldi_poll(lh, events, anyyet, reventsp, phpp)) != 0)
   1032 			return (err);
   1033 
   1034 		/*
   1035 		 * Drop any leading EOFs on the stream.
   1036 		 *
   1037 		 * Note that we have to use pollunlock() here to avoid
   1038 		 * recursive mutex enters in the poll framework.  The
   1039 		 * reason is that if there is an EOF message on the stream
   1040 		 * then the act of reading from the queue to remove the
   1041 		 * message can cause the ptm drivers event service
   1042 		 * routine to be invoked, and if there is no open
   1043 		 * slave device then the ptm driver may generate
   1044 		 * error messages and put them on the stream.  This
   1045 		 * in turn will generate a poll event and the poll
   1046 		 * framework will try to invoke any poll callbacks
   1047 		 * associated with the stream.  In the process of
   1048 		 * doing that the poll framework will try to aquire
   1049 		 * locks that we are already holding.  So we need to
   1050 		 * drop those locks here before we do our read.
   1051 		 */
   1052 		lockstate = pollunlock();
   1053 		err = lx_ptm_eof_drop(dev, &rval);
   1054 		pollrelock(lockstate);
   1055 		if (err)
   1056 			return (err);
   1057 
   1058 		/* If no EOF was dropped then return */
   1059 		if (rval == 0)
   1060 			return (0);
   1061 
   1062 		/*
   1063 		 * An EOF was removed from the stream.  Retry the entire
   1064 		 * poll operation from the top because polls on the ptm
   1065 		 * device should behave differently now.
   1066 		 */
   1067 		*loop = 1;
   1068 	}
   1069 	return (0);
   1070 }
   1071 
   1072 static int
   1073 lx_ptm_poll(dev_t dev, short events, int anyyet, short *reventsp,
   1074     struct pollhead **phpp)
   1075 {
   1076 	int loop, err;
   1077 
   1078 	do {
   1079 		/* Serialize ourself wrt read operations. */
   1080 		if (lx_ptm_read_start(dev) != 0)
   1081 			return (EINTR);
   1082 
   1083 		err = lx_ptm_poll_loop(dev,
   1084 		    events, anyyet, reventsp, phpp, &loop);
   1085 		lx_ptm_read_end(dev);
   1086 		if (err != 0)
   1087 			return (err);
   1088 	} while (loop != 0);
   1089 	return (0);
   1090 }
   1091 
   1092 static struct cb_ops lx_ptm_cb_ops = {
   1093 	lx_ptm_open,		/* open */
   1094 	lx_ptm_close,		/* close */
   1095 	nodev,			/* strategy */
   1096 	nodev,			/* print */
   1097 	nodev,			/* dump */
   1098 	lx_ptm_read,		/* read */
   1099 	lx_ptm_write,		/* write */
   1100 	lx_ptm_ioctl,		/* ioctl */
   1101 	nodev,			/* devmap */
   1102 	nodev,			/* mmap */
   1103 	nodev,			/* segmap */
   1104 	lx_ptm_poll,		/* chpoll */
   1105 	ddi_prop_op,		/* prop_op */
   1106 	NULL,			/* cb_str */
   1107 	D_NEW | D_MP,
   1108 	CB_REV,
   1109 	NULL,
   1110 	NULL
   1111 };
   1112 
   1113 static struct dev_ops lx_ptm_ops = {
   1114 	DEVO_REV,
   1115 	0,
   1116 	ddi_getinfo_1to1,
   1117 	nulldev,
   1118 	nulldev,
   1119 	lx_ptm_attach,
   1120 	lx_ptm_detach,
   1121 	nodev,
   1122 	&lx_ptm_cb_ops,
   1123 	NULL,
   1124 	NULL,
   1125 	ddi_quiesce_not_needed,		/* quiesce */
   1126 };
   1127 
   1128 static struct modldrv modldrv = {
   1129 	&mod_driverops,			/* type of module */
   1130 	"Linux master terminal driver",	/* description of module */
   1131 	&lx_ptm_ops			/* driver ops */
   1132 };
   1133 
   1134 static struct modlinkage modlinkage = {
   1135 	MODREV_1,
   1136 	&modldrv,
   1137 	NULL
   1138 };
   1139 
   1140 int
   1141 _init(void)
   1142 {
   1143 	return (mod_install(&modlinkage));
   1144 }
   1145 
   1146 int
   1147 _info(struct modinfo *modinfop)
   1148 {
   1149 	return (mod_info(&modlinkage, modinfop));
   1150 }
   1151 
   1152 int
   1153 _fini(void)
   1154 {
   1155 	return (mod_remove(&modlinkage));
   1156 }
   1157