Home | History | Annotate | Download | only in rds
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 
     27 #include <sys/types.h>
     28 #include <sys/stat.h>
     29 #include <sys/conf.h>
     30 #include <sys/ddi.h>
     31 #include <sys/sunddi.h>
     32 #include <sys/modctl.h>
     33 #include <inet/ip.h>
     34 #include <sys/ib/clients/rds/rdsib_ib.h>
     35 #include <sys/ib/clients/rds/rdsib_buf.h>
     36 #include <sys/ib/clients/rds/rdsib_cm.h>
     37 #include <sys/ib/clients/rds/rdsib_protocol.h>
     38 #include <sys/ib/clients/rds/rds_transport.h>
     39 #include <sys/ib/clients/rds/rds_kstat.h>
     40 
     41 /*
     42  * Global Configuration Variables
     43  * As defined in RDS proposal
     44  */
     45 uint_t		MaxNodes		= RDS_MAX_NODES;
     46 uint_t		RdsPktSize;
     47 uint_t		NDataRX;
     48 uint_t		MaxDataSendBuffers	= RDS_MAX_DATA_SEND_BUFFERS;
     49 uint_t		MaxDataRecvBuffers	= RDS_MAX_DATA_RECV_BUFFERS;
     50 uint_t		MaxCtrlSendBuffers	= RDS_MAX_CTRL_SEND_BUFFERS;
     51 uint_t		MaxCtrlRecvBuffers	= RDS_MAX_CTRL_RECV_BUFFERS;
     52 uint_t		DataRecvBufferLWM	= RDS_DATA_RECV_BUFFER_LWM;
     53 uint_t		CtrlRecvBufferLWM	= RDS_CTRL_RECV_BUFFER_LWM;
     54 uint_t		PendingRxPktsHWM	= RDS_PENDING_RX_PKTS_HWM;
     55 uint_t		MinRnrRetry		= RDS_IB_RNR_RETRY;
     56 uint8_t		IBPathRetryCount	= RDS_IB_PATH_RETRY;
     57 uint8_t		IBPktLifeTime		= RDS_IB_PKT_LT;
     58 
     59 extern int rdsib_open_ib();
     60 extern void rdsib_close_ib();
     61 extern void rds_resume_port(in_port_t port);
     62 extern int rds_sendmsg(uio_t *uiop, ipaddr_t sendip, ipaddr_t recvip,
     63     in_port_t sendport, in_port_t recvport, zoneid_t zoneid);
     64 extern boolean_t rds_if_lookup_by_name(char *devname);
     65 
     66 rds_transport_ops_t rds_ib_transport_ops = {
     67 	rdsib_open_ib,
     68 	rdsib_close_ib,
     69 	rds_sendmsg,
     70 	rds_resume_port,
     71 	rds_if_lookup_by_name
     72 };
     73 
     74 /* global */
     75 rds_state_t	*rdsib_statep = NULL;
     76 krwlock_t	rds_loopback_portmap_lock;
     77 uint8_t		rds_loopback_portmap[RDS_PORT_MAP_SIZE];
     78 ddi_taskq_t	*rds_taskq = NULL;
     79 dev_info_t	*rdsib_dev_info = NULL;
     80 uint_t		rds_rx_pkts_pending_hwm;
     81 
     82 #ifdef DEBUG
     83 uint32_t	rdsdbglvl = RDS_LOG_L3;
     84 #else
     85 uint32_t	rdsdbglvl = RDS_LOG_L2;
     86 #endif
     87 
     88 #define		RDS_NUM_TASKQ_THREADS	4
     89 
     90 static int rdsib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
     91 static int rdsib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
     92 static int rdsib_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
     93     void **result);
     94 static void rds_read_config_values(dev_info_t *dip);
     95 
     96 /* Driver entry points */
     97 static struct cb_ops	rdsib_cb_ops = {
     98 	nulldev,		/* open */
     99 	nulldev,		/* close */
    100 	nodev,			/* strategy */
    101 	nodev,			/* print */
    102 	nodev,			/* dump */
    103 	nodev,			/* read */
    104 	nodev,			/* write */
    105 	nodev,			/* ioctl */
    106 	nodev,			/* devmap */
    107 	nodev,			/* mmap */
    108 	nodev,			/* segmap */
    109 	nochpoll,		/* poll */
    110 	ddi_prop_op,		/* prop_op */
    111 	NULL,			/* stream */
    112 	D_MP,			/* cb_flag */
    113 	CB_REV,			/* rev */
    114 	nodev,			/* int (*cb_aread)() */
    115 	nodev,			/* int (*cb_awrite)() */
    116 };
    117 
    118 /* Device options */
    119 static struct dev_ops rdsib_ops = {
    120 	DEVO_REV,		/* devo_rev, */
    121 	0,			/* refcnt  */
    122 	rdsib_info,		/* info */
    123 	nulldev,		/* identify */
    124 	nulldev,		/* probe */
    125 	rdsib_attach,		/* attach */
    126 	rdsib_detach,		/* detach */
    127 	nodev,			/* reset */
    128 	&rdsib_cb_ops,		/* driver ops - devctl interfaces */
    129 	NULL,			/* bus operations */
    130 	NULL,			/* power */
    131 	ddi_quiesce_not_needed,	/* devo_quiesce */
    132 };
    133 
    134 /*
    135  * Module linkage information.
    136  */
    137 #define	RDS_DEVDESC	"RDS IB driver"
    138 static struct modldrv rdsib_modldrv = {
    139 	&mod_driverops,		/* Driver module */
    140 	RDS_DEVDESC,		/* Driver name and version */
    141 	&rdsib_ops,		/* Driver ops */
    142 };
    143 
    144 static struct modlinkage rdsib_modlinkage = {
    145 	MODREV_1,
    146 	(void *)&rdsib_modldrv,
    147 	NULL
    148 };
    149 
    150 /* Called from _init */
    151 int
    152 rdsib_init()
    153 {
    154 	/* RDS supports only one instance */
    155 	rdsib_statep = kmem_zalloc(sizeof (rds_state_t), KM_SLEEP);
    156 
    157 	rw_init(&rdsib_statep->rds_sessionlock, NULL, RW_DRIVER, NULL);
    158 	rw_init(&rdsib_statep->rds_hca_lock, NULL, RW_DRIVER, NULL);
    159 
    160 	rw_init(&rds_loopback_portmap_lock, NULL, RW_DRIVER, NULL);
    161 	bzero(rds_loopback_portmap, RDS_PORT_MAP_SIZE);
    162 
    163 	mutex_init(&rds_dpool.pool_lock, NULL, MUTEX_DRIVER, NULL);
    164 	cv_init(&rds_dpool.pool_cv, NULL, CV_DRIVER, NULL);
    165 	mutex_init(&rds_cpool.pool_lock, NULL, MUTEX_DRIVER, NULL);
    166 	cv_init(&rds_cpool.pool_cv, NULL, CV_DRIVER, NULL);
    167 
    168 	/* Initialize logging */
    169 	rds_logging_initialization();
    170 
    171 	RDS_SET_NPORT(1); /* this should never be 0 */
    172 
    173 	ASSERT(rds_transport_ops == NULL);
    174 	rds_transport_ops = &rds_ib_transport_ops;
    175 
    176 	return (0);
    177 }
    178 
    179 /* Called from _fini */
    180 void
    181 rdsib_fini()
    182 {
    183 	/* Stop logging */
    184 	rds_logging_destroy();
    185 
    186 	cv_destroy(&rds_dpool.pool_cv);
    187 	mutex_destroy(&rds_dpool.pool_lock);
    188 	cv_destroy(&rds_cpool.pool_cv);
    189 	mutex_destroy(&rds_cpool.pool_lock);
    190 
    191 	rw_destroy(&rds_loopback_portmap_lock);
    192 
    193 	rw_destroy(&rdsib_statep->rds_hca_lock);
    194 	rw_destroy(&rdsib_statep->rds_sessionlock);
    195 	kmem_free(rdsib_statep, sizeof (rds_state_t));
    196 
    197 	rds_transport_ops = NULL;
    198 }
    199 
    200 int
    201 _init(void)
    202 {
    203 	int	ret;
    204 
    205 	if (ibt_hw_is_present() == 0) {
    206 		return (ENODEV);
    207 	}
    208 
    209 	ret = rdsib_init();
    210 	if (ret != 0) {
    211 		return (ret);
    212 	}
    213 
    214 	ret = mod_install(&rdsib_modlinkage);
    215 	if (ret != 0) {
    216 		/*
    217 		 * Could not load module
    218 		 */
    219 		rdsib_fini();
    220 		return (ret);
    221 	}
    222 
    223 	return (0);
    224 }
    225 
    226 int
    227 _fini()
    228 {
    229 	int	ret;
    230 
    231 	/*
    232 	 * Remove module
    233 	 */
    234 	if ((ret = mod_remove(&rdsib_modlinkage)) != 0) {
    235 		return (ret);
    236 	}
    237 
    238 	rdsib_fini();
    239 
    240 	return (0);
    241 }
    242 
    243 int
    244 _info(struct modinfo *modinfop)
    245 {
    246 	return (mod_info(&rdsib_modlinkage, modinfop));
    247 }
    248 
    249 static int
    250 rdsib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
    251 {
    252 	int	ret;
    253 
    254 	RDS_DPRINTF2("rdsib_attach", "enter");
    255 
    256 	if (cmd != DDI_ATTACH)
    257 		return (DDI_FAILURE);
    258 
    259 	if (rdsib_dev_info != NULL) {
    260 		RDS_DPRINTF1("rdsib_attach", "Multiple RDS instances are"
    261 		    " not supported (rds_dev_info: 0x%p)", rdsib_dev_info);
    262 		return (DDI_FAILURE);
    263 	}
    264 
    265 	rdsib_dev_info = dip;
    266 	rds_read_config_values(dip);
    267 
    268 	rds_taskq = ddi_taskq_create(dip, "rds_taskq", RDS_NUM_TASKQ_THREADS,
    269 	    TASKQ_DEFAULTPRI, 0);
    270 	if (rds_taskq == NULL) {
    271 		RDS_DPRINTF1("rdsib_attach",
    272 		    "ddi_taskq_create failed for rds_taskq");
    273 		rdsib_dev_info = NULL;
    274 		return (DDI_FAILURE);
    275 	}
    276 
    277 	ret = ddi_create_minor_node(dip, "rdsib", S_IFCHR, 0, DDI_PSEUDO, 0);
    278 	if (ret != DDI_SUCCESS) {
    279 		RDS_DPRINTF1("rdsib_attach",
    280 		    "ddi_create_minor_node failed: %d", ret);
    281 		ddi_taskq_destroy(rds_taskq);
    282 		rds_taskq = NULL;
    283 		rdsib_dev_info = NULL;
    284 		return (DDI_FAILURE);
    285 	}
    286 
    287 	/* Max number of receive buffers on the system */
    288 	NDataRX = (MaxNodes - 1) * MaxDataRecvBuffers * 2;
    289 
    290 	/*
    291 	 * High water mark for the receive buffers in the system. If the
    292 	 * number of buffers used crosses this mark then all sockets in
    293 	 * would be stalled. The port quota for the sockets is set based
    294 	 * on this limit.
    295 	 */
    296 	rds_rx_pkts_pending_hwm = (PendingRxPktsHWM * NDataRX)/100;
    297 
    298 	ret = rdsib_initialize_ib();
    299 	if (ret != 0) {
    300 		RDS_DPRINTF1("rdsib_attach",
    301 		    "rdsib_initialize_ib failed: %d", ret);
    302 		ddi_taskq_destroy(rds_taskq);
    303 		rds_taskq = NULL;
    304 		rdsib_dev_info = NULL;
    305 		return (DDI_FAILURE);
    306 	}
    307 
    308 	RDS_DPRINTF2("rdsib_attach", "return");
    309 
    310 	return (DDI_SUCCESS);
    311 }
    312 
    313 static int
    314 rdsib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
    315 {
    316 	RDS_DPRINTF2("rdsib_detach", "enter");
    317 
    318 	if (cmd != DDI_DETACH)
    319 		return (DDI_FAILURE);
    320 
    321 	rdsib_deinitialize_ib();
    322 
    323 	ddi_remove_minor_node(dip, "rdsib");
    324 
    325 	/* destroy taskq */
    326 	if (rds_taskq != NULL) {
    327 		ddi_taskq_destroy(rds_taskq);
    328 		rds_taskq = NULL;
    329 	}
    330 
    331 	rdsib_dev_info = NULL;
    332 
    333 	RDS_DPRINTF2("rdsib_detach", "return");
    334 
    335 	return (DDI_SUCCESS);
    336 }
    337 
    338 /* ARGSUSED */
    339 static int
    340 rdsib_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
    341 {
    342 	int ret = DDI_FAILURE;
    343 
    344 	switch (cmd) {
    345 	case DDI_INFO_DEVT2DEVINFO:
    346 		if (rdsib_dev_info != NULL) {
    347 			*result = (void *)rdsib_dev_info;
    348 			ret = DDI_SUCCESS;
    349 		}
    350 		break;
    351 
    352 	case DDI_INFO_DEVT2INSTANCE:
    353 		*result = NULL;
    354 		ret = DDI_SUCCESS;
    355 		break;
    356 
    357 	default:
    358 		break;
    359 	}
    360 
    361 	return (ret);
    362 }
    363 
    364 static void
    365 rds_read_config_values(dev_info_t *dip)
    366 {
    367 	MaxNodes = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
    368 	    "MaxNodes", RDS_MAX_NODES);
    369 
    370 	UserBufferSize = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
    371 	    DDI_PROP_DONTPASS, "UserBufferSize", RDS_USER_DATA_BUFFER_SIZE);
    372 
    373 	MaxDataSendBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
    374 	    DDI_PROP_DONTPASS, "MaxDataSendBuffers", RDS_MAX_DATA_SEND_BUFFERS);
    375 
    376 	MaxDataRecvBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
    377 	    DDI_PROP_DONTPASS, "MaxDataRecvBuffers", RDS_MAX_DATA_RECV_BUFFERS);
    378 
    379 	MaxCtrlSendBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
    380 	    DDI_PROP_DONTPASS, "MaxCtrlSendBuffers", RDS_MAX_CTRL_SEND_BUFFERS);
    381 
    382 	MaxCtrlRecvBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
    383 	    DDI_PROP_DONTPASS, "MaxCtrlRecvBuffers", RDS_MAX_CTRL_RECV_BUFFERS);
    384 
    385 	DataRecvBufferLWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
    386 	    DDI_PROP_DONTPASS, "DataRecvBufferLWM", RDS_DATA_RECV_BUFFER_LWM);
    387 
    388 	CtrlRecvBufferLWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
    389 	    DDI_PROP_DONTPASS, "CtrlRecvBufferLWM", RDS_CTRL_RECV_BUFFER_LWM);
    390 
    391 	PendingRxPktsHWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
    392 	    DDI_PROP_DONTPASS, "PendingRxPktsHWM", RDS_PENDING_RX_PKTS_HWM);
    393 
    394 	MinRnrRetry = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
    395 	    "MinRnrRetry", RDS_IB_RNR_RETRY);
    396 
    397 	IBPathRetryCount = (uint8_t)ddi_prop_get_int(DDI_DEV_T_ANY, dip,
    398 	    DDI_PROP_DONTPASS, "IBPathRetryCount", RDS_IB_PATH_RETRY);
    399 
    400 	IBPktLifeTime = (uint8_t)ddi_prop_get_int(DDI_DEV_T_ANY, dip,
    401 	    DDI_PROP_DONTPASS, "IBPktLifeTime", RDS_IB_PKT_LT);
    402 
    403 	rdsdbglvl = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
    404 	    "rdsdbglvl", RDS_LOG_L2);
    405 
    406 	if (MaxNodes < 2) {
    407 		cmn_err(CE_WARN, "MaxNodes is set to less than 2");
    408 		MaxNodes = 2;
    409 	}
    410 }
    411