Home | History | Annotate | Download | only in rpc
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  * Copyright (c) 2008, The Ohio State University. All rights reserved.
     28  *
     29  * Portions of this source code is developed by the team members of
     30  * The Ohio State University's Network-Based Computing Laboratory (NBCL),
     31  * headed by Professor Dhabaleswar K. (DK) Panda.
     32  *
     33  * Acknowledgements to contributions from developors:
     34  *   Ranjit Noronha: noronha (at) cse.ohio-state.edu
     35  *   Lei Chai      : chail (at) cse.ohio-state.edu
     36  *   Weikuan Yu    : yuw (at) cse.ohio-state.edu
     37  *
     38  */
     39 
     40 #include <sys/systm.h>
     41 #include <sys/kstat.h>
     42 #include <sys/modctl.h>
     43 #include <sys/sdt.h>
     44 #include <rpc/rpc_rdma.h>
     45 
     46 #include <sys/ib/ibtl/ibti.h>
     47 
     48 uint_t rdma_minchunk = RDMA_MINCHUNK;
     49 
     50 /*
     51  * Globals
     52  */
     53 int rdma_modloaded = 0;		/* flag to load RDMA plugin modules */
     54 int rdma_dev_available = 0;	/* if any RDMA device is loaded */
     55 kmutex_t rdma_modload_lock;	/* protects rdma_modloaded flag */
     56 
     57 rdma_svc_wait_t rdma_wait;
     58 
     59 rdma_registry_t	*rdma_mod_head = NULL;	/* head for RDMA modules */
     60 krwlock_t	rdma_lock;		/* protects rdma_mod_head list */
     61 ldi_ident_t rpcmod_li = NULL;	/* identifies us with ldi_ framework */
     62 
     63 kmem_cache_t *clist_cache = NULL;
     64 
     65 /*
     66  * Statics
     67  */
     68 ldi_handle_t rpcib_handle = NULL;
     69 
     70 /*
     71  * Externs
     72  */
     73 extern	kstat_named_t	*rdmarcstat_ptr;
     74 extern	uint_t		rdmarcstat_ndata;
     75 extern	kstat_named_t	*rdmarsstat_ptr;
     76 extern	uint_t		rdmarsstat_ndata;
     77 
     78 void rdma_kstat_init();
     79 
     80 /*
     81  * RDMATF module registration routine.
     82  * This routine is expected to be called by the init routine in
     83  * the plugin modules.
     84  */
     85 rdma_stat
     86 rdma_register_mod(rdma_mod_t *mod)
     87 {
     88 	rdma_registry_t **mp, *m;
     89 
     90 	if (mod->rdma_version != RDMATF_VERS) {
     91 		return (RDMA_BADVERS);
     92 	}
     93 
     94 	rw_enter(&rdma_lock, RW_WRITER);
     95 	/*
     96 	 * Ensure not already registered
     97 	 */
     98 	mp = &rdma_mod_head;
     99 	while (*mp != NULL) {
    100 		if (strncmp((*mp)->r_mod->rdma_api, mod->rdma_api,
    101 		    KNC_STRSIZE) == 0) {
    102 			if ((*mp)->r_mod_state == RDMA_MOD_INACTIVE) {
    103 				(*mp)->r_mod_state = RDMA_MOD_ACTIVE;
    104 				(*mp)->r_mod->rdma_ops = mod->rdma_ops;
    105 				(*mp)->r_mod->rdma_count = mod->rdma_count;
    106 				goto announce_hca;
    107 			}
    108 			rw_exit(&rdma_lock);
    109 			return (RDMA_REG_EXIST);
    110 		}
    111 		mp = &((*mp)->r_next);
    112 	}
    113 
    114 	/*
    115 	 * New one, create and add to registry
    116 	 */
    117 	m = kmem_alloc(sizeof (rdma_registry_t), KM_SLEEP);
    118 	m->r_mod = kmem_alloc(sizeof (rdma_mod_t), KM_SLEEP);
    119 	*m->r_mod = *mod;
    120 	m->r_next = NULL;
    121 	m->r_mod->rdma_api = kmem_zalloc(KNC_STRSIZE, KM_SLEEP);
    122 	(void) strncpy(m->r_mod->rdma_api, mod->rdma_api, KNC_STRSIZE);
    123 	m->r_mod->rdma_api[KNC_STRSIZE - 1] = '\0';
    124 	m->r_mod_state = RDMA_MOD_ACTIVE;
    125 	*mp = m;
    126 
    127 announce_hca:
    128 	rw_exit(&rdma_lock);
    129 	/*
    130 	 * Start the nfs service on the rdma xprts.
    131 	 * (this notification mechanism will need to change when we support
    132 	 * multiple hcas and have support for multiple rdma plugins).
    133 	 */
    134 	mutex_enter(&rdma_wait.svc_lock);
    135 	rdma_wait.svc_stat = RDMA_HCA_ATTACH;
    136 	cv_signal(&rdma_wait.svc_cv);
    137 	mutex_exit(&rdma_wait.svc_lock);
    138 
    139 	return (RDMA_SUCCESS);
    140 }
    141 
    142 /*
    143  * RDMATF module unregistration routine.
    144  * This routine is expected to be called by the fini routine in
    145  * the plugin modules.
    146  */
    147 rdma_stat
    148 rdma_unregister_mod(rdma_mod_t *mod)
    149 {
    150 	rdma_registry_t **m, *mmod = NULL;
    151 
    152 	rw_enter(&rdma_lock, RW_WRITER);
    153 
    154 	m = &rdma_mod_head;
    155 	while (*m != NULL) {
    156 		if (strncmp((*m)->r_mod->rdma_api, mod->rdma_api,
    157 		    KNC_STRSIZE) != 0) {
    158 			m = &((*m)->r_next);
    159 			continue;
    160 		}
    161 		/*
    162 		 * Check if any device attached, if so return error
    163 		 */
    164 		if (mod->rdma_count != 0) {
    165 			rw_exit(&rdma_lock);
    166 			return (RDMA_FAILED);
    167 		}
    168 		/*
    169 		 * Found entry. Mark it inactive.
    170 		 */
    171 		mmod = *m;
    172 		mmod->r_mod->rdma_count = 0;
    173 		mmod->r_mod_state = RDMA_MOD_INACTIVE;
    174 		break;
    175 	}
    176 
    177 	rdma_modloaded = 0;
    178 	rdma_dev_available = 0;
    179 	rw_exit(&rdma_lock);
    180 
    181 	/*
    182 	 * Stop the nfs service running on the rdma xprts.
    183 	 * (this notification mechanism will need to change when we support
    184 	 * multiple hcas and have support for multiple rdma plugins).
    185 	 */
    186 	mutex_enter(&rdma_wait.svc_lock);
    187 	rdma_wait.svc_stat = RDMA_HCA_DETACH;
    188 	cv_signal(&rdma_wait.svc_cv);
    189 	mutex_exit(&rdma_wait.svc_lock);
    190 
    191 	/*
    192 	 * Not found.
    193 	 */
    194 	return (RDMA_SUCCESS);
    195 }
    196 
    197 struct clist *
    198 clist_alloc(void)
    199 {
    200 	struct clist *clp;
    201 
    202 	clp = kmem_cache_alloc(clist_cache, KM_SLEEP);
    203 
    204 	bzero(clp, sizeof (*clp));
    205 
    206 	return (clp);
    207 }
    208 
    209 uint32_t
    210 clist_len(struct clist *cl)
    211 {
    212 	uint32_t len = 0;
    213 	while (cl) {
    214 		len += cl->c_len;
    215 		cl = cl->c_next;
    216 	}
    217 	return (len);
    218 }
    219 
    220 void
    221 clist_zero_len(struct clist *cl)
    222 {
    223 	while (cl != NULL) {
    224 		if (cl->c_dmemhandle.mrc_rmr == 0)
    225 			break;
    226 		cl->c_len = 0;
    227 		cl = cl->c_next;
    228 	}
    229 }
    230 
    231 /*
    232  * Creates a new chunk list entry, and
    233  * adds it to the end of a chunk list.
    234  */
    235 void
    236 clist_add(struct clist **clp, uint32_t xdroff, int len,
    237 	struct mrc *shandle, caddr_t saddr,
    238 	struct mrc *dhandle, caddr_t daddr)
    239 {
    240 	struct clist *cl;
    241 
    242 	/* Find the end of the list */
    243 
    244 	while (*clp != NULL)
    245 		clp = &((*clp)->c_next);
    246 
    247 	cl = clist_alloc();
    248 	cl->c_xdroff = xdroff;
    249 	cl->c_len = len;
    250 	cl->w.c_saddr = (uint64_t)(uintptr_t)saddr;
    251 	if (shandle)
    252 		cl->c_smemhandle = *shandle;
    253 	cl->u.c_daddr = (uint64_t)(uintptr_t)daddr;
    254 	if (dhandle)
    255 		cl->c_dmemhandle = *dhandle;
    256 	cl->c_next = NULL;
    257 
    258 	*clp = cl;
    259 }
    260 
    261 rdma_stat
    262 clist_register(CONN *conn, struct clist *cl, clist_dstsrc dstsrc)
    263 {
    264 	struct clist *c;
    265 	int status;
    266 
    267 	for (c = cl; c; c = c->c_next) {
    268 		if (c->c_len <= 0)
    269 			continue;
    270 
    271 		c->c_regtype = dstsrc;
    272 
    273 		switch (dstsrc) {
    274 		case CLIST_REG_SOURCE:
    275 			status = RDMA_REGMEMSYNC(conn,
    276 			    (caddr_t)(struct as *)c->c_adspc,
    277 			    (caddr_t)(uintptr_t)c->w.c_saddr3, c->c_len,
    278 			    &c->c_smemhandle, (void **)&c->c_ssynchandle,
    279 			    (void *)c->rb_longbuf.rb_private);
    280 			break;
    281 		case CLIST_REG_DST:
    282 			status = RDMA_REGMEMSYNC(conn,
    283 			    (caddr_t)(struct as *)c->c_adspc,
    284 			    (caddr_t)(uintptr_t)c->u.c_daddr3, c->c_len,
    285 			    &c->c_dmemhandle, (void **)&c->c_dsynchandle,
    286 			    (void *)c->rb_longbuf.rb_private);
    287 			break;
    288 		default:
    289 			return (RDMA_INVAL);
    290 		}
    291 		if (status != RDMA_SUCCESS) {
    292 			(void) clist_deregister(conn, cl);
    293 			return (status);
    294 		}
    295 	}
    296 
    297 	return (RDMA_SUCCESS);
    298 }
    299 
    300 rdma_stat
    301 clist_deregister(CONN *conn, struct clist *cl)
    302 {
    303 	struct clist *c;
    304 
    305 	for (c = cl; c; c = c->c_next) {
    306 		switch (c->c_regtype) {
    307 		case CLIST_REG_SOURCE:
    308 			if (c->c_smemhandle.mrc_rmr != 0) {
    309 				(void) RDMA_DEREGMEMSYNC(conn,
    310 				    (caddr_t)(uintptr_t)c->w.c_saddr3,
    311 				    c->c_smemhandle,
    312 				    (void *)(uintptr_t)c->c_ssynchandle,
    313 				    (void *)c->rb_longbuf.rb_private);
    314 				c->c_smemhandle.mrc_rmr = 0;
    315 				c->c_ssynchandle = NULL;
    316 			}
    317 			break;
    318 		case CLIST_REG_DST:
    319 			if (c->c_dmemhandle.mrc_rmr != 0) {
    320 				(void) RDMA_DEREGMEMSYNC(conn,
    321 				    (caddr_t)(uintptr_t)c->u.c_daddr3,
    322 				    c->c_dmemhandle,
    323 				    (void *)(uintptr_t)c->c_dsynchandle,
    324 				    (void *)c->rb_longbuf.rb_private);
    325 				c->c_dmemhandle.mrc_rmr = 0;
    326 				c->c_dsynchandle = NULL;
    327 			}
    328 			break;
    329 		default:
    330 			/* clist unregistered. continue */
    331 			break;
    332 		}
    333 	}
    334 
    335 	return (RDMA_SUCCESS);
    336 }
    337 
    338 rdma_stat
    339 clist_syncmem(CONN *conn, struct clist *cl, clist_dstsrc dstsrc)
    340 {
    341 	struct clist *c;
    342 	rdma_stat status;
    343 
    344 	c = cl;
    345 	switch (dstsrc) {
    346 	case CLIST_REG_SOURCE:
    347 		while (c != NULL) {
    348 			if (c->c_ssynchandle) {
    349 				status = RDMA_SYNCMEM(conn,
    350 				    (void *)(uintptr_t)c->c_ssynchandle,
    351 				    (caddr_t)(uintptr_t)c->w.c_saddr3,
    352 				    c->c_len, 0);
    353 				if (status != RDMA_SUCCESS)
    354 					return (status);
    355 			}
    356 			c = c->c_next;
    357 		}
    358 		break;
    359 	case CLIST_REG_DST:
    360 		while (c != NULL) {
    361 			if (c->c_ssynchandle) {
    362 				status = RDMA_SYNCMEM(conn,
    363 				    (void *)(uintptr_t)c->c_dsynchandle,
    364 				    (caddr_t)(uintptr_t)c->u.c_daddr3,
    365 				    c->c_len, 1);
    366 				if (status != RDMA_SUCCESS)
    367 					return (status);
    368 			}
    369 			c = c->c_next;
    370 		}
    371 		break;
    372 	default:
    373 		return (RDMA_INVAL);
    374 	}
    375 
    376 	return (RDMA_SUCCESS);
    377 }
    378 
    379 /*
    380  * Frees up entries in chunk list
    381  */
    382 void
    383 clist_free(struct clist *cl)
    384 {
    385 	struct clist *c = cl;
    386 
    387 	while (c != NULL) {
    388 		cl = cl->c_next;
    389 		kmem_cache_free(clist_cache, c);
    390 		c = cl;
    391 	}
    392 }
    393 
    394 rdma_stat
    395 rdma_clnt_postrecv(CONN *conn, uint32_t xid)
    396 {
    397 	struct clist *cl = NULL;
    398 	rdma_stat retval;
    399 	rdma_buf_t rbuf = {0};
    400 
    401 	rbuf.type = RECV_BUFFER;
    402 	if (RDMA_BUF_ALLOC(conn, &rbuf)) {
    403 		return (RDMA_NORESOURCE);
    404 	}
    405 
    406 	clist_add(&cl, 0, rbuf.len, &rbuf.handle, rbuf.addr,
    407 	    NULL, NULL);
    408 	retval = RDMA_CLNT_RECVBUF(conn, cl, xid);
    409 	clist_free(cl);
    410 
    411 	return (retval);
    412 }
    413 
    414 rdma_stat
    415 rdma_clnt_postrecv_remove(CONN *conn, uint32_t xid)
    416 {
    417 	return (RDMA_CLNT_RECVBUF_REMOVE(conn, xid));
    418 }
    419 
    420 rdma_stat
    421 rdma_svc_postrecv(CONN *conn)
    422 {
    423 	struct clist *cl = NULL;
    424 	rdma_stat retval;
    425 	rdma_buf_t rbuf = {0};
    426 
    427 	rbuf.type = RECV_BUFFER;
    428 	if (RDMA_BUF_ALLOC(conn, &rbuf)) {
    429 		retval = RDMA_NORESOURCE;
    430 	} else {
    431 		clist_add(&cl, 0, rbuf.len, &rbuf.handle, rbuf.addr,
    432 		    NULL, NULL);
    433 		retval = RDMA_SVC_RECVBUF(conn, cl);
    434 		clist_free(cl);
    435 	}
    436 	return (retval);
    437 }
    438 
    439 rdma_stat
    440 rdma_buf_alloc(CONN *conn, rdma_buf_t *rbuf)
    441 {
    442 	return (RDMA_BUF_ALLOC(conn, rbuf));
    443 }
    444 
    445 void
    446 rdma_buf_free(CONN *conn, rdma_buf_t *rbuf)
    447 {
    448 	if (!rbuf || rbuf->addr == NULL) {
    449 		return;
    450 	}
    451 	RDMA_BUF_FREE(conn, rbuf);
    452 	bzero(rbuf, sizeof (rdma_buf_t));
    453 }
    454 
    455 /*
    456  * Caller is holding rdma_modload_lock mutex
    457  */
    458 int
    459 rdma_modload()
    460 {
    461 	int status;
    462 	ASSERT(MUTEX_HELD(&rdma_modload_lock));
    463 	/*
    464 	 * Load all available RDMA plugins which right now is only IB plugin.
    465 	 * If no IB hardware is present, then quit right away.
    466 	 * ENODEV -- For no device on the system
    467 	 * EPROTONOSUPPORT -- For module not avilable either due to failure to
    468 	 * load or some other reason.
    469 	 */
    470 	rdma_modloaded = 1;
    471 	if (ibt_hw_is_present() == 0) {
    472 		rdma_dev_available = 0;
    473 		return (ENODEV);
    474 	}
    475 
    476 	rdma_dev_available = 1;
    477 	if (rpcmod_li == NULL)
    478 		return (EPROTONOSUPPORT);
    479 
    480 	status = ldi_open_by_name("/devices/ib/rpcib@0:rpcib",
    481 	    FREAD | FWRITE, kcred,
    482 	    &rpcib_handle, rpcmod_li);
    483 
    484 	if (status != 0)
    485 		return (EPROTONOSUPPORT);
    486 
    487 
    488 	/*
    489 	 * We will need to reload the plugin module after it was unregistered
    490 	 * but the resources below need to allocated only the first time.
    491 	 */
    492 	if (!clist_cache) {
    493 		clist_cache = kmem_cache_create("rdma_clist",
    494 		    sizeof (struct clist), _POINTER_ALIGNMENT, NULL,
    495 		    NULL, NULL, NULL, 0, 0);
    496 		rdma_kstat_init();
    497 	}
    498 
    499 	(void) ldi_close(rpcib_handle, FREAD|FWRITE, kcred);
    500 
    501 	return (0);
    502 }
    503 
    504 void
    505 rdma_kstat_init(void)
    506 {
    507 	kstat_t *ksp;
    508 
    509 	/*
    510 	 * The RDMA framework doesn't know how to deal with Zones, and is
    511 	 * only available in the global zone.
    512 	 */
    513 	ASSERT(INGLOBALZONE(curproc));
    514 	ksp = kstat_create_zone("unix", 0, "rpc_rdma_client", "rpc",
    515 	    KSTAT_TYPE_NAMED, rdmarcstat_ndata,
    516 	    KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, GLOBAL_ZONEID);
    517 	if (ksp) {
    518 		ksp->ks_data = (void *) rdmarcstat_ptr;
    519 		kstat_install(ksp);
    520 	}
    521 
    522 	ksp = kstat_create_zone("unix", 0, "rpc_rdma_server", "rpc",
    523 	    KSTAT_TYPE_NAMED, rdmarsstat_ndata,
    524 	    KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, GLOBAL_ZONEID);
    525 	if (ksp) {
    526 		ksp->ks_data = (void *) rdmarsstat_ptr;
    527 		kstat_install(ksp);
    528 	}
    529 }
    530 
    531 rdma_stat
    532 rdma_kwait(void)
    533 {
    534 	int ret;
    535 	rdma_stat stat;
    536 
    537 	mutex_enter(&rdma_wait.svc_lock);
    538 
    539 	ret = cv_wait_sig(&rdma_wait.svc_cv, &rdma_wait.svc_lock);
    540 
    541 	/*
    542 	 * If signalled by a hca attach/detach, pass the right
    543 	 * stat back.
    544 	 */
    545 
    546 	if (ret)
    547 		stat =  rdma_wait.svc_stat;
    548 	else
    549 		stat = RDMA_INTR;
    550 
    551 	mutex_exit(&rdma_wait.svc_lock);
    552 
    553 	return (stat);
    554 }
    555