Home | History | Annotate | Download | only in sockfs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #include <sys/types.h>
     28 #include <sys/param.h>
     29 #include <sys/systm.h>
     30 #include <sys/sysmacros.h>
     31 #include <sys/debug.h>
     32 #include <sys/cmn_err.h>
     33 #include <sys/vfs.h>
     34 #include <sys/policy.h>
     35 #include <sys/modctl.h>
     36 
     37 #include <sys/sunddi.h>
     38 
     39 #include <sys/strsun.h>
     40 #include <sys/stropts.h>
     41 #include <sys/strsubr.h>
     42 #include <sys/socket.h>
     43 #include <sys/socketvar.h>
     44 #include <sys/uio.h>
     45 
     46 #include <inet/ipclassifier.h>
     47 #include <fs/sockfs/sockcommon.h>
     48 #include <fs/sockfs/nl7c.h>
     49 #include <fs/sockfs/socktpi.h>
     50 #include <fs/sockfs/sodirect.h>
     51 #include <inet/ip.h>
     52 
     53 extern int xnet_skip_checks, xnet_check_print, xnet_truncate_print;
     54 
     55 /*
     56  * Common socket access functions.
     57  *
     58  * Instead of accessing the sonode switch directly (i.e., SOP_xxx()),
     59  * the socket_xxx() function should be used.
     60  */
     61 
     62 /*
     63  * Try to create a new sonode of the requested <family, type, protocol>.
     64  */
     65 /* ARGSUSED */
     66 struct sonode *
     67 socket_create(int family, int type, int protocol, char *devpath, char *mod,
     68     int flags, int version, struct cred *cr, int *errorp)
     69 {
     70 	struct sonode *so;
     71 	struct sockparams *sp = NULL;
     72 	int saved_error;
     73 
     74 	/*
     75 	 * Look for a sockparams entry that match the given criteria.
     76 	 * solookup() returns with the entry held.
     77 	 */
     78 	*errorp = solookup(family, type, protocol, &sp);
     79 	saved_error = *errorp;
     80 	if (sp == NULL) {
     81 		int kmflags = (flags == SOCKET_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
     82 		/*
     83 		 * There is no matching sockparams entry. An ephemeral entry is
     84 		 * created if the caller specifies a device or a socket module.
     85 		 */
     86 		if (devpath != NULL) {
     87 			saved_error = 0;
     88 			sp = sockparams_hold_ephemeral_bydev(family, type,
     89 			    protocol, devpath, kmflags, errorp);
     90 		} else if (mod != NULL) {
     91 			saved_error = 0;
     92 			sp = sockparams_hold_ephemeral_bymod(family, type,
     93 			    protocol, mod, kmflags, errorp);
     94 		} else {
     95 			*errorp = solookup(family, type, 0, &sp);
     96 		}
     97 
     98 		if (sp == NULL) {
     99 			if (saved_error && (*errorp == EPROTONOSUPPORT ||
    100 			    *errorp == EPROTOTYPE || *errorp == ENOPROTOOPT))
    101 				*errorp = saved_error;
    102 			return (NULL);
    103 		}
    104 	}
    105 
    106 	ASSERT(sp->sp_smod_info != NULL);
    107 	ASSERT(flags == SOCKET_SLEEP || flags == SOCKET_NOSLEEP);
    108 	sp->sp_stats.sps_ncreate.value.ui64++;
    109 	so = sp->sp_smod_info->smod_sock_create_func(sp, family, type,
    110 	    protocol, version, flags, errorp, cr);
    111 	if (so == NULL) {
    112 		SOCKPARAMS_DEC_REF(sp);
    113 	} else {
    114 		if ((*errorp = SOP_INIT(so, NULL, cr, flags)) == 0) {
    115 			/* Cannot fail, only bumps so_count */
    116 			(void) VOP_OPEN(&SOTOV(so), FREAD|FWRITE, cr, NULL);
    117 		} else {
    118 			if (saved_error && (*errorp == EPROTONOSUPPORT ||
    119 			    *errorp == EPROTOTYPE || *errorp == ENOPROTOOPT))
    120 				*errorp = saved_error;
    121 			socket_destroy(so);
    122 			so = NULL;
    123 		}
    124 	}
    125 	return (so);
    126 }
    127 
    128 struct sonode *
    129 socket_newconn(struct sonode *parent, sock_lower_handle_t lh,
    130     sock_downcalls_t *dc, int flags, int *errorp)
    131 {
    132 	struct sonode *so;
    133 	struct sockparams *sp;
    134 	struct cred *cr;
    135 
    136 	if ((cr = CRED()) == NULL)
    137 		cr = kcred;
    138 
    139 	sp = parent->so_sockparams;
    140 	ASSERT(sp != NULL);
    141 
    142 	sp->sp_stats.sps_ncreate.value.ui64++;
    143 	so = sp->sp_smod_info->smod_sock_create_func(sp, parent->so_family,
    144 	    parent->so_type, parent->so_protocol, parent->so_version, flags,
    145 	    errorp, cr);
    146 	if (so != NULL) {
    147 		SOCKPARAMS_INC_REF(sp);
    148 
    149 		so->so_proto_handle = lh;
    150 		so->so_downcalls = dc;
    151 		/*
    152 		 * This function may be called in interrupt context, and CRED()
    153 		 * will be NULL. In this case, pass in kcred.
    154 		 */
    155 		if ((*errorp = SOP_INIT(so, parent, cr, flags)) == 0) {
    156 			/* Cannot fail, only bumps so_count */
    157 			(void) VOP_OPEN(&SOTOV(so), FREAD|FWRITE, cr, NULL);
    158 		} else  {
    159 			socket_destroy(so);
    160 			so = NULL;
    161 		}
    162 	}
    163 
    164 	return (so);
    165 }
    166 
    167 /*
    168  * Bind local endpoint.
    169  */
    170 int
    171 socket_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen,
    172     int flags, cred_t *cr)
    173 {
    174 	return (SOP_BIND(so, name, namelen, flags, cr));
    175 }
    176 
    177 /*
    178  * Turn socket into a listen socket.
    179  */
    180 int
    181 socket_listen(struct sonode *so, int backlog, cred_t *cr)
    182 {
    183 	if (backlog < 0) {
    184 		backlog = 0;
    185 	}
    186 
    187 	/*
    188 	 * Use the same qlimit as in BSD. BSD checks the qlimit
    189 	 * before queuing the next connection implying that a
    190 	 * listen(sock, 0) allows one connection to be queued.
    191 	 * BSD also uses 1.5 times the requested backlog.
    192 	 *
    193 	 * XNS Issue 4 required a strict interpretation of the backlog.
    194 	 * This has been waived subsequently for Issue 4 and the change
    195 	 * incorporated in XNS Issue 5. So we aren't required to do
    196 	 * anything special for XPG apps.
    197 	 */
    198 	if (backlog >= (INT_MAX - 1) / 3)
    199 		backlog = INT_MAX;
    200 	else
    201 		backlog = backlog * 3 / 2 + 1;
    202 
    203 	return (SOP_LISTEN(so, backlog, cr));
    204 }
    205 
    206 /*
    207  * Accept incoming connection.
    208  */
    209 int
    210 socket_accept(struct sonode *lso, int fflag, cred_t *cr, struct sonode **nsop)
    211 {
    212 	return (SOP_ACCEPT(lso, fflag, cr, nsop));
    213 }
    214 
    215 /*
    216  * Active open.
    217  */
    218 int
    219 socket_connect(struct sonode *so, const struct sockaddr *name,
    220     socklen_t namelen, int fflag, int flags, cred_t *cr)
    221 {
    222 	int error;
    223 
    224 	/*
    225 	 * Handle a connect to a name parameter of type AF_UNSPEC like a
    226 	 * connect to a null address. This is the portable method to
    227 	 * unconnect a socket.
    228 	 */
    229 	if ((namelen >= sizeof (sa_family_t)) &&
    230 	    (name->sa_family == AF_UNSPEC)) {
    231 		name = NULL;
    232 		namelen = 0;
    233 	}
    234 
    235 	error = SOP_CONNECT(so, name, namelen, fflag, flags, cr);
    236 
    237 	if (error == EHOSTUNREACH && flags & _SOCONNECT_XPG4_2) {
    238 		/*
    239 		 * X/Open specification contains a requirement that
    240 		 * ENETUNREACH be returned but does not require
    241 		 * EHOSTUNREACH. In order to keep the test suite
    242 		 * happy we mess with the errno here.
    243 		 */
    244 		error = ENETUNREACH;
    245 	}
    246 
    247 	return (error);
    248 }
    249 
    250 /*
    251  * Get address of remote node.
    252  */
    253 int
    254 socket_getpeername(struct sonode *so, struct sockaddr *addr,
    255     socklen_t *addrlen, boolean_t accept, cred_t *cr)
    256 {
    257 	ASSERT(*addrlen > 0);
    258 	return (SOP_GETPEERNAME(so, addr, addrlen, accept, cr));
    259 
    260 }
    261 
    262 /*
    263  * Get local address.
    264  */
    265 int
    266 socket_getsockname(struct sonode *so, struct sockaddr *addr,
    267     socklen_t *addrlen, cred_t *cr)
    268 {
    269 	return (SOP_GETSOCKNAME(so, addr, addrlen, cr));
    270 
    271 }
    272 
    273 /*
    274  * Called from shutdown().
    275  */
    276 int
    277 socket_shutdown(struct sonode *so, int how, cred_t *cr)
    278 {
    279 	return (SOP_SHUTDOWN(so, how, cr));
    280 }
    281 
    282 /*
    283  * Get socket options.
    284  */
    285 /*ARGSUSED*/
    286 int
    287 socket_getsockopt(struct sonode *so, int level, int option_name,
    288     void *optval, socklen_t *optlenp, int flags, cred_t *cr)
    289 {
    290 	return (SOP_GETSOCKOPT(so, level, option_name, optval,
    291 	    optlenp, flags, cr));
    292 }
    293 
    294 /*
    295  * Set socket options
    296  */
    297 int
    298 socket_setsockopt(struct sonode *so, int level, int option_name,
    299     const void *optval, t_uscalar_t optlen, cred_t *cr)
    300 {
    301 	int val = 1;
    302 	/* Caller allocates aligned optval, or passes null */
    303 	ASSERT(((uintptr_t)optval & (sizeof (t_scalar_t) - 1)) == 0);
    304 	/* If optval is null optlen is 0, and vice-versa */
    305 	ASSERT(optval != NULL || optlen == 0);
    306 	ASSERT(optlen != 0 || optval == NULL);
    307 
    308 	if (optval == NULL && optlen == 0)
    309 		optval = &val;
    310 
    311 	return (SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr));
    312 }
    313 
    314 int
    315 socket_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
    316     cred_t *cr)
    317 {
    318 	int error = 0;
    319 	ssize_t orig_resid = uiop->uio_resid;
    320 
    321 	/*
    322 	 * Do not bypass the cache if we are doing a local (AF_UNIX) write.
    323 	 */
    324 	if (so->so_family == AF_UNIX)
    325 		uiop->uio_extflg |= UIO_COPY_CACHED;
    326 	else
    327 		uiop->uio_extflg &= ~UIO_COPY_CACHED;
    328 
    329 	error = SOP_SENDMSG(so, msg, uiop, cr);
    330 	switch (error) {
    331 	default:
    332 		break;
    333 	case EINTR:
    334 	/* EAGAIN is EWOULDBLOCK */
    335 	case EWOULDBLOCK:
    336 		/* We did a partial send */
    337 		if (uiop->uio_resid != orig_resid)
    338 			error = 0;
    339 		break;
    340 	case EPIPE:
    341 		if ((so->so_mode & SM_KERNEL) == 0)
    342 			tsignal(curthread, SIGPIPE);
    343 		break;
    344 	}
    345 
    346 	return (error);
    347 }
    348 
    349 int
    350 socket_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag,
    351     struct cred *cr, mblk_t **mpp)
    352 {
    353 	int error = 0;
    354 
    355 	error = SOP_SENDMBLK(so, msg, fflag, cr, mpp);
    356 	if (error == EPIPE) {
    357 		tsignal(curthread, SIGPIPE);
    358 	}
    359 	return (error);
    360 }
    361 
    362 int
    363 socket_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
    364     cred_t *cr)
    365 {
    366 	int error;
    367 	ssize_t orig_resid = uiop->uio_resid;
    368 
    369 	/*
    370 	 * Do not bypass the cache when reading data, as the application
    371 	 * is likely to access the data shortly.
    372 	 */
    373 	uiop->uio_extflg |= UIO_COPY_CACHED;
    374 
    375 	error = SOP_RECVMSG(so, msg, uiop, cr);
    376 
    377 	switch (error) {
    378 	case EINTR:
    379 	/* EAGAIN is EWOULDBLOCK */
    380 	case EWOULDBLOCK:
    381 		/* We did a partial read */
    382 		if (uiop->uio_resid != orig_resid)
    383 			error = 0;
    384 		break;
    385 	default:
    386 		break;
    387 	}
    388 	return (error);
    389 }
    390 
    391 int
    392 socket_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode,
    393     struct cred *cr, int32_t *rvalp)
    394 {
    395 	return (SOP_IOCTL(so, cmd, arg, mode, cr, rvalp));
    396 }
    397 
    398 int
    399 socket_poll(struct sonode *so, short events, int anyyet, short *reventsp,
    400     struct pollhead **phpp)
    401 {
    402 	return (SOP_POLL(so, events, anyyet, reventsp, phpp));
    403 }
    404 
    405 int
    406 socket_close(struct sonode *so, int flag, struct cred *cr)
    407 {
    408 	return (VOP_CLOSE(SOTOV(so), flag, 1, 0, cr, NULL));
    409 }
    410 
    411 int
    412 socket_close_internal(struct sonode *so, int flag, cred_t *cr)
    413 {
    414 	ASSERT(so->so_count == 0);
    415 
    416 	return (SOP_CLOSE(so, flag, cr));
    417 }
    418 
    419 void
    420 socket_destroy(struct sonode *so)
    421 {
    422 	vn_invalid(SOTOV(so));
    423 	VN_RELE(SOTOV(so));
    424 }
    425 
    426 /* ARGSUSED */
    427 void
    428 socket_destroy_internal(struct sonode *so, cred_t *cr)
    429 {
    430 	struct sockparams *sp = so->so_sockparams;
    431 	ASSERT(so->so_count == 0 && sp != NULL);
    432 
    433 	sp->sp_smod_info->smod_sock_destroy_func(so);
    434 
    435 	SOCKPARAMS_DEC_REF(sp);
    436 }
    437 
    438 /*
    439  * TODO Once the common vnode ops is available, then the vnops argument
    440  * should be removed.
    441  */
    442 /*ARGSUSED*/
    443 int
    444 sonode_constructor(void *buf, void *cdrarg, int kmflags)
    445 {
    446 	struct sonode *so = buf;
    447 	struct vnode *vp;
    448 
    449 	vp = so->so_vnode = vn_alloc(kmflags);
    450 	if (vp == NULL) {
    451 		return (-1);
    452 	}
    453 	vp->v_data = so;
    454 	vn_setops(vp, socket_vnodeops);
    455 
    456 	so->so_priv 		= NULL;
    457 	so->so_oobmsg		= NULL;
    458 
    459 	so->so_proto_handle	= NULL;
    460 
    461 	so->so_peercred 	= NULL;
    462 
    463 	so->so_rcv_queued	= 0;
    464 	so->so_rcv_q_head 	= NULL;
    465 	so->so_rcv_q_last_head 	= NULL;
    466 	so->so_rcv_head		= NULL;
    467 	so->so_rcv_last_head	= NULL;
    468 	so->so_rcv_wanted	= 0;
    469 	so->so_rcv_timer_interval = SOCKET_NO_RCVTIMER;
    470 	so->so_rcv_timer_tid	= 0;
    471 	so->so_rcv_thresh	= 0;
    472 
    473 	so->so_acceptq_head	= NULL;
    474 	so->so_acceptq_tail	= &so->so_acceptq_head;
    475 	so->so_acceptq_next	= NULL;
    476 	so->so_acceptq_len	= 0;
    477 	so->so_backlog		= 0;
    478 
    479 	so->so_snd_qfull	= B_FALSE;
    480 
    481 	mutex_init(&so->so_lock, NULL, MUTEX_DEFAULT, NULL);
    482 	mutex_init(&so->so_acceptq_lock, NULL, MUTEX_DEFAULT, NULL);
    483 	rw_init(&so->so_fallback_rwlock, NULL, RW_DEFAULT, NULL);
    484 	cv_init(&so->so_state_cv, NULL, CV_DEFAULT, NULL);
    485 	cv_init(&so->so_want_cv, NULL, CV_DEFAULT, NULL);
    486 
    487 	cv_init(&so->so_acceptq_cv, NULL, CV_DEFAULT, NULL);
    488 	cv_init(&so->so_snd_cv, NULL, CV_DEFAULT, NULL);
    489 	cv_init(&so->so_rcv_cv, NULL, CV_DEFAULT, NULL);
    490 	cv_init(&so->so_copy_cv, NULL, CV_DEFAULT, NULL);
    491 	cv_init(&so->so_closing_cv, NULL, CV_DEFAULT, NULL);
    492 
    493 	return (0);
    494 }
    495 
    496 /*ARGSUSED*/
    497 void
    498 sonode_destructor(void *buf, void *cdrarg)
    499 {
    500 	struct sonode *so = buf;
    501 	struct vnode *vp = SOTOV(so);
    502 
    503 	ASSERT(so->so_priv == NULL);
    504 	ASSERT(so->so_peercred == NULL);
    505 
    506 	ASSERT(so->so_oobmsg == NULL);
    507 
    508 	ASSERT(so->so_rcv_q_head == NULL);
    509 
    510 	ASSERT(so->so_acceptq_head == NULL);
    511 	ASSERT(so->so_acceptq_tail == &so->so_acceptq_head);
    512 	ASSERT(so->so_acceptq_next == NULL);
    513 
    514 	ASSERT(vp->v_data == so);
    515 	ASSERT(vn_matchops(vp, socket_vnodeops));
    516 
    517 	vn_free(vp);
    518 
    519 	mutex_destroy(&so->so_lock);
    520 	mutex_destroy(&so->so_acceptq_lock);
    521 	rw_destroy(&so->so_fallback_rwlock);
    522 
    523 	cv_destroy(&so->so_state_cv);
    524 	cv_destroy(&so->so_want_cv);
    525 	cv_destroy(&so->so_acceptq_cv);
    526 	cv_destroy(&so->so_snd_cv);
    527 	cv_destroy(&so->so_rcv_cv);
    528 	cv_destroy(&so->so_closing_cv);
    529 }
    530 
    531 void
    532 sonode_init(struct sonode *so, struct sockparams *sp, int family,
    533     int type, int protocol, sonodeops_t *sops)
    534 {
    535 	vnode_t *vp;
    536 
    537 	vp = SOTOV(so);
    538 
    539 	so->so_flag	= 0;
    540 
    541 	so->so_state	= 0;
    542 	so->so_mode	= 0;
    543 
    544 	so->so_count	= 0;
    545 
    546 	so->so_family	= family;
    547 	so->so_type	= type;
    548 	so->so_protocol	= protocol;
    549 
    550 	SOCK_CONNID_INIT(so->so_proto_connid);
    551 
    552 	so->so_options	= 0;
    553 	so->so_linger.l_onoff   = 0;
    554 	so->so_linger.l_linger = 0;
    555 	so->so_sndbuf	= 0;
    556 	so->so_error	= 0;
    557 	so->so_rcvtimeo	= 0;
    558 	so->so_sndtimeo = 0;
    559 	so->so_xpg_rcvbuf = 0;
    560 
    561 	ASSERT(so->so_oobmsg == NULL);
    562 	so->so_oobmark	= 0;
    563 	so->so_pgrp	= 0;
    564 
    565 	ASSERT(so->so_peercred == NULL);
    566 
    567 	so->so_zoneid = getzoneid();
    568 
    569 	so->so_sockparams = sp;
    570 
    571 	so->so_ops = sops;
    572 
    573 	so->so_not_str = (sops != &sotpi_sonodeops);
    574 
    575 	so->so_proto_handle = NULL;
    576 
    577 	so->so_downcalls = NULL;
    578 
    579 	so->so_copyflag = 0;
    580 
    581 	ASSERT(so->so_acceptq_head == NULL);
    582 	ASSERT(so->so_acceptq_tail == &so->so_acceptq_head);
    583 	ASSERT(so->so_acceptq_next == NULL);
    584 
    585 	vn_reinit(vp);
    586 	vp->v_vfsp	= rootvfs;
    587 	vp->v_type	= VSOCK;
    588 	vp->v_rdev	= sockdev;
    589 
    590 	so->so_rcv_queued = 0;
    591 	so->so_rcv_q_head = NULL;
    592 	so->so_rcv_q_last_head = NULL;
    593 	so->so_rcv_head	= NULL;
    594 	so->so_rcv_last_head = NULL;
    595 
    596 	so->so_snd_qfull = B_FALSE;
    597 	so->so_minpsz = 0;
    598 
    599 	so->so_rcv_wakeup = B_FALSE;
    600 	so->so_snd_wakeup = B_FALSE;
    601 	so->so_flowctrld = B_FALSE;
    602 
    603 	so->so_pollev = 0;
    604 	bzero(&so->so_poll_list, sizeof (so->so_poll_list));
    605 	bzero(&so->so_proto_props, sizeof (struct sock_proto_props));
    606 
    607 	bzero(&(so->so_ksock_callbacks), sizeof (ksocket_callbacks_t));
    608 	so->so_ksock_cb_arg = NULL;
    609 
    610 	so->so_max_addr_len = sizeof (struct sockaddr_storage);
    611 
    612 	so->so_direct = NULL;
    613 
    614 	vn_exists(vp);
    615 }
    616 
    617 void
    618 sonode_fini(struct sonode *so)
    619 {
    620 	mblk_t *mp;
    621 	vnode_t *vp;
    622 
    623 	ASSERT(so->so_count == 0);
    624 
    625 	if (so->so_rcv_timer_tid) {
    626 		ASSERT(MUTEX_NOT_HELD(&so->so_lock));
    627 		(void) untimeout(so->so_rcv_timer_tid);
    628 		so->so_rcv_timer_tid = 0;
    629 	}
    630 
    631 	so_acceptq_flush(so, B_FALSE);
    632 
    633 	if ((mp = so->so_oobmsg) != NULL) {
    634 		freemsg(mp);
    635 		so->so_oobmsg = NULL;
    636 		so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA|
    637 		    SS_RCVATMARK);
    638 	}
    639 
    640 	if (so->so_poll_list.ph_list != NULL) {
    641 		pollwakeup(&so->so_poll_list, POLLERR);
    642 		pollhead_clean(&so->so_poll_list);
    643 	}
    644 
    645 	if (so->so_direct != NULL)
    646 		sod_sock_fini(so);
    647 
    648 	vp = SOTOV(so);
    649 	vn_invalid(vp);
    650 
    651 	if (so->so_peercred != NULL) {
    652 		crfree(so->so_peercred);
    653 		so->so_peercred = NULL;
    654 	}
    655 }
    656