Home | History | Annotate | Download | only in ibcm
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  * ibcm_impl.c
     28  *
     29  * contains internal functions of IB CM module.
     30  *
     31  * TBD:
     32  * 1. HCA CATASTROPHIC/RECOVERED not handled yet
     33  */
     34 
     35 #include <sys/ib/mgt/ibcm/ibcm_impl.h>
     36 #include <sys/disp.h>
     37 
     38 
     39 /* function prototypes */
     40 static ibcm_status_t	ibcm_init(void);
     41 static ibcm_status_t	ibcm_fini(void);
     42 
     43 /* Routines to initialize and destroy CM global locks and CVs */
     44 static void		ibcm_init_locks(void);
     45 static void		ibcm_fini_locks(void);
     46 
     47 /* Routines that initialize/teardown CM's global hca structures */
     48 static void		ibcm_init_hcas();
     49 static ibcm_status_t	ibcm_fini_hcas();
     50 
     51 static void		ibcm_init_classportinfo();
     52 static void		ibcm_stop_timeout_thread();
     53 
     54 /* Routines that handle HCA attach/detach asyncs */
     55 static void		ibcm_hca_attach(ib_guid_t);
     56 static ibcm_status_t	ibcm_hca_detach(ibcm_hca_info_t *);
     57 
     58 /* Routines that initialize the HCA's port related fields */
     59 static ibt_status_t	ibcm_hca_init_port(ibcm_hca_info_t *hcap,
     60 			    uint8_t port_index);
     61 static ibcm_status_t	ibcm_hca_fini_port(ibcm_hca_info_t *hcap,
     62 			    uint8_t port_index);
     63 
     64 static void ibcm_rc_flow_control_init(void);
     65 static void ibcm_rc_flow_control_fini(void);
     66 
     67 /*
     68  * Routines that check if hca's avl trees and sidr lists are free of any
     69  * active client resources ie., RC or UD state structures in certain states
     70  */
     71 static ibcm_status_t	ibcm_check_avl_clean(ibcm_hca_info_t *hcap);
     72 static ibcm_status_t	ibcm_check_sidr_clean(ibcm_hca_info_t *hcap);
     73 
     74 /* Add a new hca structure to CM's global hca list */
     75 static ibcm_hca_info_t	*ibcm_add_hca_entry(ib_guid_t hcaguid, uint_t nports);
     76 
     77 static void		ibcm_comm_est_handler(ibt_async_event_t *);
     78 void			ibcm_async_handler(void *, ibt_hca_hdl_t,
     79 			    ibt_async_code_t, ibt_async_event_t *);
     80 
     81 /* Global variables */
     82 char			cmlog[] = "ibcm";	/* for debug log messages */
     83 ibt_clnt_hdl_t		ibcm_ibt_handle;	/* IBT handle */
     84 kmutex_t		ibcm_svc_info_lock;	/* list lock */
     85 kcondvar_t		ibcm_svc_info_cv;	/* cv for deregister */
     86 kmutex_t		ibcm_recv_mutex;
     87 avl_tree_t		ibcm_svc_avl_tree;
     88 taskq_t			*ibcm_taskq = NULL;
     89 int			taskq_dispatch_fail_cnt;
     90 
     91 kmutex_t		ibcm_mcglist_lock;	/* MCG list lock */
     92 kmutex_t		ibcm_trace_mutex;	/* Trace mutex */
     93 kmutex_t		ibcm_trace_print_mutex;	/* Trace print mutex */
     94 int			ibcm_conn_max_trcnt = IBCM_MAX_CONN_TRCNT;
     95 
     96 int			ibcm_enable_trace = 2;	/* Trace level 4 by default */
     97 int			ibcm_dtrace = 0; /* conditionally enable more dtrace */
     98 
     99 _NOTE(MUTEX_PROTECTS_DATA(ibcm_svc_info_lock, ibcm_svc_info_s::{svc_bind_list
    100     svc_ref_cnt svc_to_delete}))
    101 
    102 _NOTE(MUTEX_PROTECTS_DATA(ibcm_svc_info_lock, ibcm_svc_bind_s::{sbind_link}))
    103 
    104 _NOTE(MUTEX_PROTECTS_DATA(ibcm_trace_mutex, ibcm_conn_trace_s))
    105 
    106 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_conn_trace_s))
    107 
    108 _NOTE(MUTEX_PROTECTS_DATA(ibcm_trace_print_mutex, ibcm_debug_buf))
    109 
    110 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_debug_buf))
    111 
    112 /*
    113  * Initial state is INIT. All hca dr's return success immediately in this
    114  * state, without adding or deleting any hca's to CM.
    115  */
    116 ibcm_finit_state_t	ibcm_finit_state = IBCM_FINIT_INIT;
    117 
    118 /* mutex and cv to manage hca's reference and resource count(s) */
    119 kmutex_t		ibcm_global_hca_lock;
    120 kcondvar_t		ibcm_global_hca_cv;
    121 
    122 /* mutex and cv to sa session open */
    123 kmutex_t		ibcm_sa_open_lock;
    124 kcondvar_t		ibcm_sa_open_cv;
    125 int			ibcm_sa_timeout_delay = 1;		/* in ticks */
    126 _NOTE(MUTEX_PROTECTS_DATA(ibcm_sa_open_lock,
    127     ibcm_port_info_s::{port_ibmf_saa_hdl port_saa_open_in_progress}))
    128 
    129 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_port_info_s::{port_ibmf_saa_hdl}))
    130 
    131 /* serialize sm notice callbacks */
    132 kmutex_t		ibcm_sm_notice_serialize_lock;
    133 
    134 _NOTE(LOCK_ORDER(ibcm_sm_notice_serialize_lock ibcm_global_hca_lock))
    135 
    136 _NOTE(MUTEX_PROTECTS_DATA(ibcm_global_hca_lock, ibcm_hca_info_s::{hca_state
    137     hca_svc_cnt hca_acc_cnt hca_res_cnt hca_next}))
    138 
    139 _NOTE(MUTEX_PROTECTS_DATA(ibcm_global_hca_lock,
    140     ibcm_port_info_s::{port_ibmf_hdl}))
    141 
    142 _NOTE(MUTEX_PROTECTS_DATA(ibcm_sm_notice_serialize_lock,
    143     ibcm_port_info_s::{port_event_status}))
    144 
    145 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_hca_info_s::{hca_state}))
    146 _NOTE(DATA_READABLE_WITHOUT_LOCK(
    147     ibcm_hca_info_s::{hca_port_info.port_ibmf_hdl}))
    148 
    149 /* mutex for CM's qp list management */
    150 kmutex_t		ibcm_qp_list_lock;
    151 
    152 _NOTE(MUTEX_PROTECTS_DATA(ibcm_qp_list_lock, ibcm_port_info_s::{port_qplist}))
    153 _NOTE(MUTEX_PROTECTS_DATA(ibcm_qp_list_lock, ibcm_qp_list_s))
    154 _NOTE(MUTEX_PROTECTS_DATA(ibcm_qp_list_lock, ibcm_qp_list_s))
    155 
    156 kcondvar_t		ibcm_timeout_list_cv;
    157 kcondvar_t		ibcm_timeout_thread_done_cv;
    158 kt_did_t		ibcm_timeout_thread_did;
    159 ibcm_state_data_t	*ibcm_timeout_list_hdr, *ibcm_timeout_list_tail;
    160 ibcm_ud_state_data_t	*ibcm_ud_timeout_list_hdr, *ibcm_ud_timeout_list_tail;
    161 kmutex_t		ibcm_timeout_list_lock;
    162 uint8_t			ibcm_timeout_list_flags = 0;
    163 pri_t			ibcm_timeout_thread_pri = MINCLSYSPRI;
    164 
    165 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock,
    166     ibcm_state_data_s::timeout_next))
    167 
    168 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock,
    169     ibcm_ud_state_data_s::ud_timeout_next))
    170 
    171 /*
    172  * Flow control logic for open_rc_channel uses the following.
    173  */
    174 
    175 struct ibcm_open_s {
    176 	kmutex_t		mutex;
    177 	kcondvar_t		cv;
    178 	uint8_t			task_running;
    179 	uint_t			queued;
    180 	uint_t			exit_deferred;
    181 	uint_t			in_progress;
    182 	uint_t			in_progress_max;
    183 	uint_t			sends;
    184 	uint_t			sends_max;
    185 	uint_t			sends_lowat;
    186 	uint_t			sends_hiwat;
    187 	ibcm_state_data_t	*tail;
    188 	ibcm_state_data_t	head;
    189 } ibcm_open;
    190 
    191 /*
    192  * Flow control logic for SA access and close_rc_channel calls follows.
    193  */
    194 
    195 int ibcm_close_simul_max	= 12;
    196 int ibcm_lapr_simul_max		= 12;
    197 int ibcm_saa_simul_max		= 8;
    198 
    199 typedef struct ibcm_flow1_s {
    200 	struct ibcm_flow1_s	*link;
    201 	kcondvar_t		cv;
    202 	uint8_t			waiters;	/* 1 to IBCM_FLOW_SIMUL_MAX */
    203 } ibcm_flow1_t;
    204 
    205 typedef struct ibcm_flow_s {
    206 	ibcm_flow1_t		*list;
    207 	uint_t			simul;	/* #requests currently outstanding */
    208 	uint_t			simul_max;
    209 	uint_t			waiters_per_chunk;
    210 	uint_t			lowat;
    211 	uint_t			lowat_default;
    212 	/* statistics */
    213 	uint_t			total;
    214 } ibcm_flow_t;
    215 
    216 ibcm_flow_t ibcm_saa_flow;
    217 ibcm_flow_t ibcm_close_flow;
    218 ibcm_flow_t ibcm_lapr_flow;
    219 
    220 /* NONBLOCKING close requests are queued */
    221 struct ibcm_close_s {
    222 	kmutex_t		mutex;
    223 	ibcm_state_data_t	*tail;
    224 	ibcm_state_data_t	head;
    225 } ibcm_close;
    226 
    227 static ibt_clnt_modinfo_t ibcm_ibt_modinfo = {	/* Client's modinfop */
    228 	IBTI_V_CURR,
    229 	IBT_CM,
    230 	ibcm_async_handler,
    231 	NULL,
    232 	"IBCM"
    233 };
    234 
    235 /* IBCM's list of HCAs registered with it */
    236 static ibcm_hca_info_t	*ibcm_hca_listp = NULL;	/* CM's HCA list */
    237 
    238 /* Array of CM state call table functions */
    239 ibcm_state_handler_t	ibcm_sm_funcs_tbl[] = {
    240 	ibcm_process_req_msg,
    241 	ibcm_process_mra_msg,
    242 	ibcm_process_rej_msg,
    243 	ibcm_process_rep_msg,
    244 	ibcm_process_rtu_msg,
    245 	ibcm_process_dreq_msg,
    246 	ibcm_process_drep_msg,
    247 	ibcm_process_sidr_req_msg,
    248 	ibcm_process_sidr_rep_msg,
    249 	ibcm_process_lap_msg,
    250 	ibcm_process_apr_msg
    251 };
    252 
    253 /* the following globals are CM tunables */
    254 ibt_rnr_nak_time_t	ibcm_default_rnr_nak_time = IBT_RNR_NAK_655ms;
    255 
    256 uint8_t		ibcm_max_retries = IBCM_MAX_RETRIES;
    257 clock_t		ibcm_local_processing_time = IBCM_LOCAL_RESPONSE_TIME;
    258 clock_t		ibcm_remote_response_time = IBCM_REMOTE_RESPONSE_TIME;
    259 ib_time_t	ibcm_max_sidr_rep_proctime = IBCM_MAX_SIDR_PROCESS_TIME;
    260 ib_time_t	ibcm_max_sidr_pktlife_time = IBCM_MAX_SIDR_PKT_LIFE_TIME;
    261 
    262 ib_time_t	ibcm_max_sidr_rep_store_time = 18;
    263 uint32_t	ibcm_wait_for_acc_cnt_timeout = 2000000;	/* 2 sec */
    264 
    265 ib_time_t	ibcm_max_ib_pkt_lt = IBCM_MAX_IB_PKT_LT;
    266 ib_time_t	ibcm_max_ib_mad_pkt_lt = IBCM_MAX_IB_MAD_PKT_LT;
    267 
    268 /*
    269  * This delay accounts for time involved in various activities as follows :
    270  *
    271  * IBMF delays for posting the MADs in non-blocking mode
    272  * IBMF delays for receiving the MADs and delivering to CM
    273  * CM delays in processing the MADs before invoking client handlers,
    274  * Any other delays associated with HCA driver in processing the MADs and
    275  * 	other subsystems that CM may invoke (ex : SA, HCA driver)
    276  */
    277 uint32_t	ibcm_sw_delay	= 1000;	/* 1000us / 1ms */
    278 uint32_t	ibcm_max_sa_retries = IBCM_MAX_SA_RETRIES + 1;
    279 
    280 /*	approx boot time */
    281 uint32_t	ibcm_adj_btime = 4;	/* 4 seconds */
    282 
    283 /*
    284  * The information in ibcm_clpinfo is kept in wireformat and is setup at
    285  * init time, and used read-only after that
    286  */
    287 ibcm_classportinfo_msg_t	ibcm_clpinfo;
    288 
    289 char	*event_str[] = {
    290 	"NEVER SEE THIS             ",
    291 	"SESSION_ID                 ",
    292 	"CHAN_HDL                   ",
    293 	"LOCAL_COMID/HCA/PORT       ",
    294 	"LOCAL_QPN                  ",
    295 	"REMOTE_COMID/HCA           ",
    296 	"REMOTE_QPN                 ",
    297 	"BASE_TIME                  ",
    298 	"INCOMING_REQ               ",
    299 	"INCOMING_REP               ",
    300 	"INCOMING_RTU               ",
    301 	"INCOMING_COMEST            ",
    302 	"INCOMING_MRA               ",
    303 	"INCOMING_REJ               ",
    304 	"INCOMING_LAP               ",
    305 	"INCOMING_APR               ",
    306 	"INCOMING_DREQ              ",
    307 	"INCOMING_DREP              ",
    308 	"OUTGOING_REQ               ",
    309 	"OUTGOING_REP               ",
    310 	"OUTGOING_RTU               ",
    311 	"OUTGOING_LAP               ",
    312 	"OUTGOING_APR               ",
    313 	"OUTGOING_MRA               ",
    314 	"OUTGOING_REJ               ",
    315 	"OUTGOING_DREQ              ",
    316 	"OUTGOING_DREP              ",
    317 	"REQ_POST_COMPLETE          ",
    318 	"REP_POST_COMPLETE          ",
    319 	"RTU_POST_COMPLETE          ",
    320 	"MRA_POST_COMPLETE          ",
    321 	"REJ_POST_COMPLETE          ",
    322 	"LAP_POST_COMPLETE          ",
    323 	"APR_POST_COMPLETE          ",
    324 	"DREQ_POST_COMPLETE         ",
    325 	"DREP_POST_COMPLETE         ",
    326 	"TIMEOUT_REP                ",
    327 	"CALLED_REQ_RCVD_EVENT      ",
    328 	"RET_REQ_RCVD_EVENT         ",
    329 	"CALLED_REP_RCVD_EVENT      ",
    330 	"RET_REP_RCVD_EVENT         ",
    331 	"CALLED_CONN_EST_EVENT      ",
    332 	"RET_CONN_EST_EVENT         ",
    333 	"CALLED_CONN_FAIL_EVENT     ",
    334 	"RET_CONN_FAIL_EVENT        ",
    335 	"CALLED_CONN_CLOSE_EVENT    ",
    336 	"RET_CONN_CLOSE_EVENT       ",
    337 	"INIT_INIT                  ",
    338 	"INIT_INIT_FAIL             ",
    339 	"INIT_RTR                   ",
    340 	"INIT_RTR_FAIL              ",
    341 	"RTR_RTS                    ",
    342 	"RTR_RTS_FAIL               ",
    343 	"RTS_RTS                    ",
    344 	"RTS_RTS_FAIL               ",
    345 	"TO_ERROR                   ",
    346 	"ERROR_FAIL                 ",
    347 	"SET_ALT                    ",
    348 	"SET_ALT_FAIL               ",
    349 	"STALE_DETECT               ",
    350 	"OUTGOING_REQ_RETRY         ",
    351 	"OUTGOING_REP_RETRY         ",
    352 	"OUTGOING_LAP_RETRY         ",
    353 	"OUTGOING_MRA_RETRY         ",
    354 	"OUTGOING_DREQ_RETRY        ",
    355 	"NEVER SEE THIS             "
    356 };
    357 
    358 char	ibcm_debug_buf[IBCM_DEBUG_BUF_SIZE];
    359 
    360 _NOTE(SCHEME_PROTECTS_DATA("used in a localized function consistently",
    361     ibcm_debug_buf))
    362 _NOTE(READ_ONLY_DATA(ibcm_taskq))
    363 
    364 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock, ibcm_timeout_list_flags))
    365 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock, ibcm_timeout_list_hdr))
    366 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock, ibcm_ud_timeout_list_hdr))
    367 
    368 #ifdef DEBUG
    369 int		ibcm_test_mode = 0;	/* set to 1, if running tests */
    370 #endif
    371 
    372 
    373 /* Module Driver Info */
    374 static struct modlmisc ibcm_modlmisc = {
    375 	&mod_miscops,
    376 	"IB Communication Manager"
    377 };
    378 
    379 /* Module Linkage */
    380 static struct modlinkage ibcm_modlinkage = {
    381 	MODREV_1,
    382 	&ibcm_modlmisc,
    383 	NULL
    384 };
    385 
    386 
    387 int
    388 _init(void)
    389 {
    390 	int		rval;
    391 	ibcm_status_t	status;
    392 
    393 	status = ibcm_init();
    394 	if (status != IBCM_SUCCESS) {
    395 		IBTF_DPRINTF_L2(cmlog, "_init: ibcm failed %d", status);
    396 		return (EINVAL);
    397 	}
    398 
    399 	rval = mod_install(&ibcm_modlinkage);
    400 	if (rval != 0) {
    401 		IBTF_DPRINTF_L2(cmlog, "_init: ibcm mod_install failed %d",
    402 		    rval);
    403 		(void) ibcm_fini();
    404 	}
    405 
    406 	IBTF_DPRINTF_L5(cmlog, "_init: ibcm successful");
    407 	return (rval);
    408 
    409 }
    410 
    411 
    412 int
    413 _info(struct modinfo *modinfop)
    414 {
    415 	return (mod_info(&ibcm_modlinkage, modinfop));
    416 }
    417 
    418 
    419 int
    420 _fini(void)
    421 {
    422 	int status;
    423 
    424 	if (ibcm_fini() != IBCM_SUCCESS)
    425 		return (EBUSY);
    426 
    427 	if ((status = mod_remove(&ibcm_modlinkage)) != 0) {
    428 		IBTF_DPRINTF_L2(cmlog, "_fini: ibcm mod_remove failed %d",
    429 		    status);
    430 		return (status);
    431 	}
    432 
    433 	IBTF_DPRINTF_L5(cmlog, "_fini: ibcm successful");
    434 
    435 	return (status);
    436 }
    437 
    438 /* Initializes all global mutex and CV in cm module */
    439 static void
    440 ibcm_init_locks()
    441 {
    442 
    443 	/* Verify CM MAD sizes */
    444 #ifdef DEBUG
    445 
    446 	if (ibcm_test_mode > 1) {
    447 
    448 		IBTF_DPRINTF_L1(cmlog, "REQ MAD SIZE %d",
    449 		    sizeof (ibcm_req_msg_t));
    450 		IBTF_DPRINTF_L1(cmlog, "REP MAD SIZE %d",
    451 		    sizeof (ibcm_rep_msg_t));
    452 		IBTF_DPRINTF_L1(cmlog, "RTU MAD SIZE %d",
    453 		    sizeof (ibcm_rtu_msg_t));
    454 		IBTF_DPRINTF_L1(cmlog, "MRA MAD SIZE %d",
    455 		    sizeof (ibcm_mra_msg_t));
    456 		IBTF_DPRINTF_L1(cmlog, "REJ MAD SIZE %d",
    457 		    sizeof (ibcm_rej_msg_t));
    458 		IBTF_DPRINTF_L1(cmlog, "LAP MAD SIZE %d",
    459 		    sizeof (ibcm_lap_msg_t));
    460 		IBTF_DPRINTF_L1(cmlog, "APR MAD SIZE %d",
    461 		    sizeof (ibcm_apr_msg_t));
    462 		IBTF_DPRINTF_L1(cmlog, "DREQ MAD SIZE %d",
    463 		    sizeof (ibcm_dreq_msg_t));
    464 		IBTF_DPRINTF_L1(cmlog, "DREP MAD SIZE %d",
    465 		    sizeof (ibcm_drep_msg_t));
    466 		IBTF_DPRINTF_L1(cmlog, "SIDR REQ MAD SIZE %d",
    467 		    sizeof (ibcm_sidr_req_msg_t));
    468 		IBTF_DPRINTF_L1(cmlog, "SIDR REP MAD SIZE %d",
    469 		    sizeof (ibcm_sidr_rep_msg_t));
    470 	}
    471 
    472 #endif
    473 
    474 	/* Create all global locks within cm module */
    475 	mutex_init(&ibcm_svc_info_lock, NULL, MUTEX_DEFAULT, NULL);
    476 	mutex_init(&ibcm_mcglist_lock, NULL, MUTEX_DEFAULT, NULL);
    477 	mutex_init(&ibcm_timeout_list_lock, NULL, MUTEX_DEFAULT, NULL);
    478 	mutex_init(&ibcm_global_hca_lock, NULL, MUTEX_DEFAULT, NULL);
    479 	mutex_init(&ibcm_sa_open_lock, NULL, MUTEX_DEFAULT, NULL);
    480 	mutex_init(&ibcm_recv_mutex, NULL, MUTEX_DEFAULT, NULL);
    481 	mutex_init(&ibcm_sm_notice_serialize_lock, NULL, MUTEX_DEFAULT, NULL);
    482 	mutex_init(&ibcm_qp_list_lock, NULL, MUTEX_DEFAULT, NULL);
    483 	mutex_init(&ibcm_trace_mutex, NULL, MUTEX_DEFAULT, NULL);
    484 	mutex_init(&ibcm_trace_print_mutex, NULL, MUTEX_DEFAULT, NULL);
    485 	cv_init(&ibcm_svc_info_cv, NULL, CV_DRIVER, NULL);
    486 	cv_init(&ibcm_timeout_list_cv, NULL, CV_DRIVER, NULL);
    487 	cv_init(&ibcm_timeout_thread_done_cv, NULL, CV_DRIVER, NULL);
    488 	cv_init(&ibcm_global_hca_cv, NULL, CV_DRIVER, NULL);
    489 	cv_init(&ibcm_sa_open_cv, NULL, CV_DRIVER, NULL);
    490 	avl_create(&ibcm_svc_avl_tree, ibcm_svc_compare,
    491 	    sizeof (ibcm_svc_info_t),
    492 	    offsetof(struct ibcm_svc_info_s, svc_link));
    493 
    494 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_locks: done");
    495 }
    496 
    497 /* Destroys all global mutex and CV in cm module */
    498 static void
    499 ibcm_fini_locks()
    500 {
    501 	/* Destroy all global locks within cm module */
    502 	mutex_destroy(&ibcm_svc_info_lock);
    503 	mutex_destroy(&ibcm_mcglist_lock);
    504 	mutex_destroy(&ibcm_timeout_list_lock);
    505 	mutex_destroy(&ibcm_global_hca_lock);
    506 	mutex_destroy(&ibcm_sa_open_lock);
    507 	mutex_destroy(&ibcm_recv_mutex);
    508 	mutex_destroy(&ibcm_sm_notice_serialize_lock);
    509 	mutex_destroy(&ibcm_qp_list_lock);
    510 	mutex_destroy(&ibcm_trace_mutex);
    511 	mutex_destroy(&ibcm_trace_print_mutex);
    512 	cv_destroy(&ibcm_svc_info_cv);
    513 	cv_destroy(&ibcm_timeout_list_cv);
    514 	cv_destroy(&ibcm_timeout_thread_done_cv);
    515 	cv_destroy(&ibcm_global_hca_cv);
    516 	cv_destroy(&ibcm_sa_open_cv);
    517 	avl_destroy(&ibcm_svc_avl_tree);
    518 
    519 	IBTF_DPRINTF_L5(cmlog, "ibcm_fini_locks: done");
    520 }
    521 
    522 
    523 /* Initialize CM's classport info */
    524 static void
    525 ibcm_init_classportinfo()
    526 {
    527 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_clpinfo));
    528 
    529 	ibcm_clpinfo.BaseVersion = IBCM_MAD_BASE_VERSION;
    530 	ibcm_clpinfo.ClassVersion = IBCM_MAD_CLASS_VERSION;
    531 
    532 	/* For now, CM supports same capabilities at all ports */
    533 	ibcm_clpinfo.CapabilityMask =
    534 	    h2b16(IBCM_CPINFO_CAP_RC | IBCM_CPINFO_CAP_SIDR);
    535 
    536 	/* Bits 0-7 are all 0 for Communication Mgmt Class */
    537 
    538 	/* For now, CM has the same respvalue at all ports */
    539 	ibcm_clpinfo.RespTimeValue_plus =
    540 	    h2b32(ibt_usec2ib(ibcm_local_processing_time) & 0x1f);
    541 
    542 	/* For now, redirect fields are set to 0 */
    543 	/* Trap fields are not applicable to CM, hence set to 0 */
    544 
    545 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_clpinfo));
    546 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_classportinfo: done");
    547 }
    548 
    549 /*
    550  * ibcm_init():
    551  * 	- call ibt_attach()
    552  * 	- create AVL trees
    553  *	- Attach HCA handlers that are already present before
    554  *	CM got loaded.
    555  *
    556  * Arguments:	NONE
    557  *
    558  * Return values:
    559  *	IBCM_SUCCESS - success
    560  */
    561 static ibcm_status_t
    562 ibcm_init(void)
    563 {
    564 	ibt_status_t	status;
    565 	kthread_t	*t;
    566 
    567 	IBTF_DPRINTF_L3(cmlog, "ibcm_init:");
    568 
    569 	ibcm_init_classportinfo();
    570 
    571 	if (ibcm_init_ids() != IBCM_SUCCESS) {
    572 		IBTF_DPRINTF_L1(cmlog, "ibcm_init: "
    573 		    "fatal error: vmem_create() failed");
    574 		return (IBCM_FAILURE);
    575 	}
    576 	ibcm_init_locks();
    577 
    578 	if (ibcm_ar_init() != IBCM_SUCCESS) {
    579 		IBTF_DPRINTF_L1(cmlog, "ibcm_init: "
    580 		    "fatal error: ibcm_ar_init() failed");
    581 		ibcm_fini_ids();
    582 		ibcm_fini_locks();
    583 		return (IBCM_FAILURE);
    584 	}
    585 	ibcm_rc_flow_control_init();
    586 
    587 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_taskq))
    588 	ibcm_taskq = system_taskq;
    589 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_taskq))
    590 
    591 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_flags))
    592 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_timeout_thread_did))
    593 
    594 	/* Start the timeout list processing thread */
    595 	ibcm_timeout_list_flags = 0;
    596 	t = thread_create(NULL, 0, ibcm_process_tlist, 0, 0, &p0, TS_RUN,
    597 	    ibcm_timeout_thread_pri);
    598 	ibcm_timeout_thread_did = t->t_did;
    599 
    600 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_flags))
    601 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_timeout_thread_did))
    602 
    603 	/*
    604 	 * NOTE : if ibt_attach is done after ibcm_init_hcas, then some
    605 	 * HCA DR events may be lost. CM could call re-init hca list
    606 	 * again, but it is more complicated. Some HCA's DR's lost may
    607 	 * be HCA detach, which makes hca list re-syncing and locking more
    608 	 * complex
    609 	 */
    610 	status = ibt_attach(&ibcm_ibt_modinfo, NULL, NULL, &ibcm_ibt_handle);
    611 	if (status != IBT_SUCCESS) {
    612 		IBTF_DPRINTF_L2(cmlog, "ibcm_init(): ibt_attach failed %d",
    613 		    status);
    614 		(void) ibcm_ar_fini();
    615 		ibcm_stop_timeout_thread();
    616 		ibcm_fini_ids();
    617 		ibcm_fini_locks();
    618 		ibcm_rc_flow_control_fini();
    619 		return (IBCM_FAILURE);
    620 	}
    621 
    622 	/* Block all HCA attach/detach asyncs */
    623 	mutex_enter(&ibcm_global_hca_lock);
    624 
    625 	ibcm_init_hcas();
    626 	ibcm_finit_state = IBCM_FINIT_IDLE;
    627 
    628 	ibcm_path_cache_init();
    629 	/*
    630 	 * This callback will be used by IBTL to get the Node record for a
    631 	 * given LID via the speccified HCA and port.
    632 	 */
    633 	ibtl_cm_set_node_info_cb(ibcm_ibtl_node_info);
    634 
    635 	/* Unblock any waiting HCA DR asyncs in CM */
    636 	mutex_exit(&ibcm_global_hca_lock);
    637 
    638 	IBTF_DPRINTF_L4(cmlog, "ibcm_init: done");
    639 	return (IBCM_SUCCESS);
    640 }
    641 
    642 /* Allocates and initializes the "per hca" global data in CM */
    643 static void
    644 ibcm_init_hcas()
    645 {
    646 	uint_t	num_hcas = 0;
    647 	ib_guid_t *guid_array;
    648 	int i;
    649 
    650 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_hcas:");
    651 
    652 	/* Get the number of HCAs */
    653 	num_hcas = ibt_get_hca_list(&guid_array);
    654 	IBTF_DPRINTF_L4(cmlog, "ibcm_init_hcas: ibt_get_hca_list() "
    655 	    "returned %d hcas", num_hcas);
    656 
    657 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
    658 
    659 	for (i = 0; i < num_hcas; i++)
    660 		ibcm_hca_attach(guid_array[i]);
    661 
    662 	if (num_hcas)
    663 		ibt_free_hca_list(guid_array, num_hcas);
    664 
    665 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_hcas: done");
    666 }
    667 
    668 
    669 /*
    670  * ibcm_fini():
    671  * 	- Deregister w/ ibt
    672  * 	- Cleanup IBCM HCA listp
    673  * 	- Destroy mutexes
    674  *
    675  * Arguments:	NONE
    676  *
    677  * Return values:
    678  *	IBCM_SUCCESS - success
    679  */
    680 static ibcm_status_t
    681 ibcm_fini(void)
    682 {
    683 	ibt_status_t	status;
    684 
    685 	IBTF_DPRINTF_L3(cmlog, "ibcm_fini:");
    686 
    687 	/*
    688 	 * CM assumes that the all general clients got rid of all the
    689 	 * established connections and service registrations, completed all
    690 	 * pending SIDR operations before a call to ibcm_fini()
    691 	 */
    692 
    693 	if (ibcm_ar_fini() != IBCM_SUCCESS) {
    694 		IBTF_DPRINTF_L2(cmlog, "ibcm_fini: ibcm_ar_fini failed");
    695 		return (IBCM_FAILURE);
    696 	}
    697 
    698 	/* cleanup the svcinfo list */
    699 	mutex_enter(&ibcm_svc_info_lock);
    700 	if (avl_first(&ibcm_svc_avl_tree) != NULL) {
    701 		IBTF_DPRINTF_L2(cmlog, "ibcm_fini: "
    702 		    "ibcm_svc_avl_tree is not empty");
    703 		mutex_exit(&ibcm_svc_info_lock);
    704 		return (IBCM_FAILURE);
    705 	}
    706 	mutex_exit(&ibcm_svc_info_lock);
    707 
    708 	/* disables any new hca attach/detaches */
    709 	mutex_enter(&ibcm_global_hca_lock);
    710 
    711 	ibcm_finit_state = IBCM_FINIT_BUSY;
    712 
    713 	if (ibcm_fini_hcas() != IBCM_SUCCESS) {
    714 		IBTF_DPRINTF_L2(cmlog, "ibcm_fini: "
    715 		    "some hca's still have client resources");
    716 
    717 		/* First, re-initialize the hcas */
    718 		ibcm_init_hcas();
    719 		/* and then enable the HCA asyncs */
    720 		ibcm_finit_state = IBCM_FINIT_IDLE;
    721 		mutex_exit(&ibcm_global_hca_lock);
    722 		if (ibcm_ar_init() != IBCM_SUCCESS) {
    723 			IBTF_DPRINTF_L1(cmlog, "ibcm_fini:ibcm_ar_init failed");
    724 		}
    725 		return (IBCM_FAILURE);
    726 	}
    727 
    728 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_hdr))
    729 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_ud_timeout_list_hdr))
    730 
    731 	ASSERT(ibcm_timeout_list_hdr == NULL);
    732 	ASSERT(ibcm_ud_timeout_list_hdr == NULL);
    733 
    734 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_hdr))
    735 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_ud_timeout_list_hdr))
    736 
    737 	/* Release any pending asyncs on ibcm_global_hca_lock */
    738 	ibcm_finit_state = IBCM_FINIT_SUCCESS;
    739 	mutex_exit(&ibcm_global_hca_lock);
    740 
    741 	ibcm_stop_timeout_thread();
    742 
    743 	ibtl_cm_set_node_info_cb(NULL);
    744 	/*
    745 	 * Detach from IBTL. Waits until all pending asyncs are complete.
    746 	 * Above cv_broadcast wakes up any waiting hca attach/detach asyncs
    747 	 */
    748 	status = ibt_detach(ibcm_ibt_handle);
    749 
    750 	/* if detach fails, CM didn't free up some resources, so assert */
    751 	if (status != IBT_SUCCESS)
    752 		IBTF_DPRINTF_L1(cmlog, "ibcm_fini: ibt_detach failed %d",
    753 		    status);
    754 
    755 	ibcm_rc_flow_control_fini();
    756 
    757 	ibcm_path_cache_fini();
    758 
    759 	ibcm_fini_ids();
    760 	ibcm_fini_locks();
    761 	IBTF_DPRINTF_L3(cmlog, "ibcm_fini: done");
    762 	return (IBCM_SUCCESS);
    763 }
    764 
    765 /* This routine exit's the ibcm timeout thread  */
    766 static void
    767 ibcm_stop_timeout_thread()
    768 {
    769 	mutex_enter(&ibcm_timeout_list_lock);
    770 
    771 	/* Stop the timeout list processing thread */
    772 	ibcm_timeout_list_flags =
    773 	    ibcm_timeout_list_flags | IBCM_TIMEOUT_THREAD_EXIT;
    774 
    775 	/* Wake up, if the timeout thread is on a cv_wait */
    776 	cv_signal(&ibcm_timeout_list_cv);
    777 
    778 	mutex_exit(&ibcm_timeout_list_lock);
    779 	thread_join(ibcm_timeout_thread_did);
    780 
    781 	IBTF_DPRINTF_L5(cmlog, "ibcm_stop_timeout_thread: done");
    782 }
    783 
    784 
    785 /* Attempts to release all the hca's associated with CM */
    786 static ibcm_status_t
    787 ibcm_fini_hcas()
    788 {
    789 	ibcm_hca_info_t *hcap, *next;
    790 
    791 	IBTF_DPRINTF_L4(cmlog, "ibcm_fini_hcas:");
    792 
    793 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
    794 
    795 	hcap = ibcm_hca_listp;
    796 	while (hcap != NULL) {
    797 		next = hcap->hca_next;
    798 		if (ibcm_hca_detach(hcap) != IBCM_SUCCESS) {
    799 			ibcm_hca_listp = hcap;
    800 			return (IBCM_FAILURE);
    801 		}
    802 		hcap = next;
    803 	}
    804 
    805 	IBTF_DPRINTF_L4(cmlog, "ibcm_fini_hcas: SUCCEEDED");
    806 	return (IBCM_SUCCESS);
    807 }
    808 
    809 
    810 /*
    811  * ibcm_hca_attach():
    812  *	Called as an asynchronous event to notify CM of an attach of HCA.
    813  *	Here ibcm_hca_info_t is initialized and all fields are
    814  *	filled in along with SA Access handles and IBMA handles.
    815  *	Also called from ibcm_init to initialize ibcm_hca_info_t's for each
    816  *	hca's
    817  *
    818  * Arguments: (WILL CHANGE BASED ON ASYNC EVENT CODE)
    819  *	hca_guid	- HCA's guid
    820  *
    821  * Return values: NONE
    822  */
    823 static void
    824 ibcm_hca_attach(ib_guid_t hcaguid)
    825 {
    826 	int			i;
    827 	ibt_status_t		status;
    828 	uint8_t			nports = 0;
    829 	ibcm_hca_info_t		*hcap;
    830 	ibt_hca_attr_t		hca_attrs;
    831 
    832 	IBTF_DPRINTF_L3(cmlog, "ibcm_hca_attach: guid = 0x%llX", hcaguid);
    833 
    834 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
    835 
    836 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*hcap))
    837 
    838 	status = ibt_query_hca_byguid(hcaguid, &hca_attrs);
    839 	if (status != IBT_SUCCESS) {
    840 		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_attach: "
    841 		    "ibt_query_hca_byguid failed = %d", status);
    842 		return;
    843 	}
    844 	nports = hca_attrs.hca_nports;
    845 
    846 	IBTF_DPRINTF_L4(cmlog, "ibcm_hca_attach: num ports = %x", nports);
    847 
    848 	if ((hcap = ibcm_add_hca_entry(hcaguid, nports)) == NULL)
    849 		return;
    850 
    851 	hcap->hca_guid = hcaguid;	/* Set GUID */
    852 	hcap->hca_num_ports = nports;	/* Set number of ports */
    853 
    854 	if (ibcm_init_hca_ids(hcap) != IBCM_SUCCESS) {
    855 		ibcm_delete_hca_entry(hcap);
    856 		return;
    857 	}
    858 
    859 	/* Store the static hca attribute data */
    860 	hcap->hca_caps = hca_attrs.hca_flags;
    861 	hcap->hca_vendor_id = hca_attrs.hca_vendor_id;
    862 	hcap->hca_device_id = hca_attrs.hca_device_id;
    863 	hcap->hca_ack_delay = hca_attrs.hca_local_ack_delay;
    864 	hcap->hca_max_rdma_in_qp = hca_attrs.hca_max_rdma_in_qp;
    865 	hcap->hca_max_rdma_out_qp = hca_attrs.hca_max_rdma_out_qp;
    866 
    867 	/* loop thru nports and initialize IBMF handles */
    868 	for (i = 0; i < hcap->hca_num_ports; i++) {
    869 		status = ibt_get_port_state_byguid(hcaguid, i + 1, NULL, NULL);
    870 		if (status != IBT_SUCCESS) {
    871 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_attach: "
    872 			    "port_num %d state DOWN", i + 1);
    873 		}
    874 
    875 		hcap->hca_port_info[i].port_hcap = hcap;
    876 		hcap->hca_port_info[i].port_num = i+1;
    877 
    878 		if (ibcm_hca_init_port(hcap, i) != IBT_SUCCESS)
    879 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_attach: "
    880 			    "ibcm_hca_init_port failed %d port_num %d",
    881 			    status, i+1);
    882 	}
    883 
    884 	/* create the "active" CM AVL tree */
    885 	avl_create(&hcap->hca_active_tree, ibcm_active_node_compare,
    886 	    sizeof (ibcm_state_data_t),
    887 	    offsetof(struct ibcm_state_data_s, avl_active_link));
    888 
    889 	/* create the "passive" CM AVL tree */
    890 	avl_create(&hcap->hca_passive_tree, ibcm_passive_node_compare,
    891 	    sizeof (ibcm_state_data_t),
    892 	    offsetof(struct ibcm_state_data_s, avl_passive_link));
    893 
    894 	/* create the "passive comid" CM AVL tree */
    895 	avl_create(&hcap->hca_passive_comid_tree,
    896 	    ibcm_passive_comid_node_compare,
    897 	    sizeof (ibcm_state_data_t),
    898 	    offsetof(struct ibcm_state_data_s, avl_passive_comid_link));
    899 
    900 	/*
    901 	 * Mark the state of the HCA to "attach" only at the end
    902 	 * Now CM starts accepting incoming MADs and client API calls
    903 	 */
    904 	hcap->hca_state = IBCM_HCA_ACTIVE;
    905 
    906 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*hcap))
    907 
    908 	IBTF_DPRINTF_L3(cmlog, "ibcm_hca_attach: ATTACH Done");
    909 }
    910 
    911 /*
    912  * ibcm_hca_detach():
    913  *	Called as an asynchronous event to notify CM of a detach of HCA.
    914  *	Here ibcm_hca_info_t is freed up and all fields that
    915  *	were initialized earlier are cleaned up
    916  *
    917  * Arguments: (WILL CHANGE BASED ON ASYNC EVENT CODE)
    918  *	hca_guid    - HCA's guid
    919  *
    920  * Return values:
    921  *	IBCM_SUCCESS	- able to detach HCA
    922  *	IBCM_FAILURE	- failed to detach HCA
    923  */
    924 static ibcm_status_t
    925 ibcm_hca_detach(ibcm_hca_info_t *hcap)
    926 {
    927 	int		port_index, i;
    928 	ibcm_status_t	status = IBCM_SUCCESS;
    929 	clock_t		absolute_time;
    930 
    931 	IBTF_DPRINTF_L3(cmlog, "ibcm_hca_detach: hcap = 0x%p guid = 0x%llX",
    932 	    hcap, hcap->hca_guid);
    933 
    934 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
    935 
    936 	/*
    937 	 * Declare hca is going away to all CM clients. Wait until the
    938 	 * access count becomes zero.
    939 	 */
    940 	hcap->hca_state = IBCM_HCA_NOT_ACTIVE;
    941 
    942 	/* wait on response CV */
    943 	absolute_time = ddi_get_lbolt() +
    944 	    drv_usectohz(ibcm_wait_for_acc_cnt_timeout);
    945 
    946 	while (hcap->hca_acc_cnt > 0)
    947 		if (cv_timedwait(&ibcm_global_hca_cv, &ibcm_global_hca_lock,
    948 		    absolute_time) == -1)
    949 			break;
    950 
    951 	if (hcap->hca_acc_cnt != 0) {
    952 		/* We got a timeout */
    953 		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: Aborting due"
    954 		    " to timeout on hca_acc_cnt %u, \n Some CM Clients are "
    955 		    "still active, looks like we need to wait some more time "
    956 		    "(ibcm_wait_for_acc_cnt_timeout).", hcap->hca_acc_cnt);
    957 		hcap->hca_state = IBCM_HCA_ACTIVE;
    958 		return (IBCM_FAILURE);
    959 	}
    960 
    961 	/*
    962 	 * First make sure, there are no active users of ibma handles,
    963 	 * and then de-register handles.
    964 	 */
    965 
    966 	/* make sure that there are no "Service"s registered w/ this HCA. */
    967 	if (hcap->hca_svc_cnt != 0) {
    968 		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
    969 		    "Active services still there %d", hcap->hca_svc_cnt);
    970 		hcap->hca_state = IBCM_HCA_ACTIVE;
    971 		return (IBCM_FAILURE);
    972 	}
    973 
    974 	if (ibcm_check_sidr_clean(hcap) != IBCM_SUCCESS) {
    975 		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach:"
    976 		    "There are active SIDR operations");
    977 		hcap->hca_state = IBCM_HCA_ACTIVE;
    978 		return (IBCM_FAILURE);
    979 	}
    980 
    981 	if (ibcm_check_avl_clean(hcap) != IBCM_SUCCESS) {
    982 		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
    983 		    "There are active RC connections");
    984 		hcap->hca_state = IBCM_HCA_ACTIVE;
    985 		return (IBCM_FAILURE);
    986 	}
    987 
    988 	/*
    989 	 * Now, wait until all rc and sidr stateps go away
    990 	 * All these stateps must be short lived ones, waiting to be cleaned
    991 	 * up after some timeout value, based on the current state.
    992 	 */
    993 	IBTF_DPRINTF_L3(cmlog, "ibcm_hca_detach:hca_guid = 0x%llX res_cnt = %d",
    994 	    hcap->hca_guid, hcap->hca_res_cnt);
    995 
    996 	while (hcap->hca_res_cnt > 0)
    997 		cv_wait(&ibcm_global_hca_cv, &ibcm_global_hca_lock);
    998 
    999 	/* Re-assert the while loop step above */
   1000 	ASSERT(hcap->hca_sidr_list == NULL);
   1001 	avl_destroy(&hcap->hca_active_tree);
   1002 	avl_destroy(&hcap->hca_passive_tree);
   1003 	avl_destroy(&hcap->hca_passive_comid_tree);
   1004 
   1005 	/*
   1006 	 * Unregister all ports from IBMA
   1007 	 * If there is a failure, re-initialize any free'd ibma handles. This
   1008 	 * is required to receive the incoming mads
   1009 	 */
   1010 	status = IBCM_SUCCESS;
   1011 	for (port_index = 0; port_index < hcap->hca_num_ports; port_index++) {
   1012 		if ((status = ibcm_hca_fini_port(hcap, port_index)) !=
   1013 		    IBCM_SUCCESS) {
   1014 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
   1015 			    "Failed to free IBMA Handle for port_num %d",
   1016 			    port_index + 1);
   1017 			break;
   1018 		}
   1019 	}
   1020 
   1021 	/* If detach fails, re-initialize ibma handles for incoming mads */
   1022 	if (status != IBCM_SUCCESS)  {
   1023 		for (i = 0; i < port_index; i++) {
   1024 			if (ibcm_hca_init_port(hcap, i) != IBT_SUCCESS)
   1025 				IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
   1026 				    "Failed to re-allocate IBMA Handles for"
   1027 				    " port_num %d", port_index + 1);
   1028 		}
   1029 		hcap->hca_state = IBCM_HCA_ACTIVE;
   1030 		return (IBCM_FAILURE);
   1031 	}
   1032 
   1033 	ibcm_fini_hca_ids(hcap);
   1034 	ibcm_delete_hca_entry(hcap);
   1035 
   1036 	IBTF_DPRINTF_L3(cmlog, "ibcm_hca_detach: DETACH succeeded");
   1037 	return (IBCM_SUCCESS);
   1038 }
   1039 
   1040 /* Checks, if there are any active sidr state entries in the specified hca */
   1041 static ibcm_status_t
   1042 ibcm_check_sidr_clean(ibcm_hca_info_t *hcap)
   1043 {
   1044 	ibcm_ud_state_data_t	*usp;
   1045 	uint32_t		transient_cnt = 0;
   1046 
   1047 	IBTF_DPRINTF_L5(cmlog, "ibcm_check_sidr_clean:");
   1048 
   1049 	rw_enter(&hcap->hca_sidr_list_lock, RW_WRITER);
   1050 	usp = hcap->hca_sidr_list;	/* Point to the list */
   1051 	while (usp != NULL) {
   1052 		mutex_enter(&usp->ud_state_mutex);
   1053 		if ((usp->ud_state != IBCM_STATE_SIDR_REP_SENT) &&
   1054 		    (usp->ud_state != IBCM_STATE_TIMED_OUT) &&
   1055 		    (usp->ud_state != IBCM_STATE_DELETE)) {
   1056 
   1057 			IBTF_DPRINTF_L3(cmlog, "ibcm_check_sidr_clean:"
   1058 			    "usp = %p not in transient state = %d", usp,
   1059 			    usp->ud_state);
   1060 
   1061 			mutex_exit(&usp->ud_state_mutex);
   1062 			rw_exit(&hcap->hca_sidr_list_lock);
   1063 			return (IBCM_FAILURE);
   1064 		} else {
   1065 			mutex_exit(&usp->ud_state_mutex);
   1066 			++transient_cnt;
   1067 		}
   1068 
   1069 		usp = usp->ud_nextp;
   1070 	}
   1071 	rw_exit(&hcap->hca_sidr_list_lock);
   1072 
   1073 	IBTF_DPRINTF_L4(cmlog, "ibcm_check_sidr_clean: transient_cnt %d",
   1074 	    transient_cnt);
   1075 
   1076 	return (IBCM_SUCCESS);
   1077 }
   1078 
   1079 /* Checks, if there are any active rc state entries, in the specified hca */
   1080 static ibcm_status_t
   1081 ibcm_check_avl_clean(ibcm_hca_info_t *hcap)
   1082 
   1083 {
   1084 	ibcm_state_data_t	*sp;
   1085 	avl_tree_t		*avl_tree;
   1086 	uint32_t		transient_cnt = 0;
   1087 
   1088 	IBTF_DPRINTF_L5(cmlog, "ibcm_check_avl_clean:");
   1089 	/*
   1090 	 * Both the trees ie., active and passive must reference to all
   1091 	 * statep's, so let's use one
   1092 	 */
   1093 	avl_tree = &hcap->hca_active_tree;
   1094 
   1095 	rw_enter(&hcap->hca_state_rwlock, RW_WRITER);
   1096 
   1097 	for (sp = avl_first(avl_tree); sp != NULL;
   1098 	    sp = avl_walk(avl_tree, sp, AVL_AFTER)) {
   1099 		mutex_enter(&sp->state_mutex);
   1100 		if ((sp->state != IBCM_STATE_TIMEWAIT) &&
   1101 		    (sp->state != IBCM_STATE_REJ_SENT) &&
   1102 		    (sp->state != IBCM_STATE_DELETE)) {
   1103 			IBTF_DPRINTF_L3(cmlog, "ibcm_check_avl_clean: "
   1104 			    "sp = %p not in transient state = %d", sp,
   1105 			    sp->state);
   1106 			mutex_exit(&sp->state_mutex);
   1107 			rw_exit(&hcap->hca_state_rwlock);
   1108 			return (IBCM_FAILURE);
   1109 		} else {
   1110 			mutex_exit(&sp->state_mutex);
   1111 			++transient_cnt;
   1112 		}
   1113 	}
   1114 
   1115 	rw_exit(&hcap->hca_state_rwlock);
   1116 
   1117 	IBTF_DPRINTF_L4(cmlog, "ibcm_check_avl_clean: transient_cnt %d",
   1118 	    transient_cnt);
   1119 
   1120 	return (IBCM_SUCCESS);
   1121 }
   1122 
   1123 /* Adds a new entry into CM's global hca list, if hca_guid is not there yet */
   1124 static ibcm_hca_info_t *
   1125 ibcm_add_hca_entry(ib_guid_t hcaguid, uint_t nports)
   1126 {
   1127 	ibcm_hca_info_t	*hcap;
   1128 
   1129 	IBTF_DPRINTF_L5(cmlog, "ibcm_add_hca_entry: guid = 0x%llX",
   1130 	    hcaguid);
   1131 
   1132 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
   1133 
   1134 	/*
   1135 	 * Check if this hca_guid already in the list
   1136 	 * If yes, then ignore this and return NULL
   1137 	 */
   1138 
   1139 	hcap = ibcm_hca_listp;
   1140 
   1141 	/* search for this HCA */
   1142 	while (hcap != NULL) {
   1143 		if (hcap->hca_guid == hcaguid) {
   1144 			/* already exists */
   1145 			IBTF_DPRINTF_L2(cmlog, "ibcm_add_hca_entry: "
   1146 			    "hcap %p guid 0x%llX, entry already exists !!",
   1147 			    hcap, hcap->hca_guid);
   1148 			return (NULL);
   1149 		}
   1150 		hcap = hcap->hca_next;
   1151 	}
   1152 
   1153 	/* Allocate storage for the new HCA entry found */
   1154 	hcap = kmem_zalloc(sizeof (ibcm_hca_info_t) +
   1155 	    (nports - 1) * sizeof (ibcm_port_info_t), KM_SLEEP);
   1156 
   1157 	/* initialize RW lock */
   1158 	rw_init(&hcap->hca_state_rwlock, NULL, RW_DRIVER, NULL);
   1159 	/* initialize SIDR list lock */
   1160 	rw_init(&hcap->hca_sidr_list_lock, NULL, RW_DRIVER, NULL);
   1161 	/* Insert "hcap" into the global HCA list maintained by CM */
   1162 	hcap->hca_next = ibcm_hca_listp;
   1163 	ibcm_hca_listp = hcap;
   1164 
   1165 	IBTF_DPRINTF_L5(cmlog, "ibcm_add_hca_entry: done hcap = 0x%p", hcap);
   1166 
   1167 	return (hcap);
   1168 
   1169 }
   1170 
   1171 /* deletes the given ibcm_hca_info_t from CM's global hca list */
   1172 void
   1173 ibcm_delete_hca_entry(ibcm_hca_info_t *hcap)
   1174 {
   1175 	ibcm_hca_info_t	*headp, *prevp = NULL;
   1176 
   1177 	/* ibcm_hca_global_lock is held */
   1178 	IBTF_DPRINTF_L5(cmlog, "ibcm_delete_hca_entry: guid = 0x%llX "
   1179 	    "hcap = 0x%p", hcap->hca_guid, hcap);
   1180 
   1181 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
   1182 
   1183 	headp = ibcm_hca_listp;
   1184 	while (headp != NULL) {
   1185 		if (headp == hcap) {
   1186 			IBTF_DPRINTF_L3(cmlog, "ibcm_delete_hca_entry: "
   1187 			    "deleting hcap %p hcaguid %llX", hcap,
   1188 			    hcap->hca_guid);
   1189 			if (prevp) {
   1190 				prevp->hca_next = headp->hca_next;
   1191 			} else {
   1192 				prevp = headp->hca_next;
   1193 				ibcm_hca_listp = prevp;
   1194 			}
   1195 			rw_destroy(&hcap->hca_state_rwlock);
   1196 			rw_destroy(&hcap->hca_sidr_list_lock);
   1197 			kmem_free(hcap, sizeof (ibcm_hca_info_t) +
   1198 			    (hcap->hca_num_ports - 1) *
   1199 			    sizeof (ibcm_port_info_t));
   1200 			return;
   1201 		}
   1202 
   1203 		prevp = headp;
   1204 		headp = headp->hca_next;
   1205 	}
   1206 }
   1207 
   1208 /*
   1209  * ibcm_find_hca_entry:
   1210  *	Given a HCA's GUID find out ibcm_hca_info_t entry for that HCA
   1211  *	This entry can be then used to access AVL tree/SIDR list etc.
   1212  *	If entry exists and in HCA ATTACH state, then hca's ref cnt is
   1213  *	incremented and entry returned. Else NULL returned.
   1214  *
   1215  *	All functions that use ibcm_find_hca_entry and get a non-NULL
   1216  *	return values must call ibcm_dec_hca_acc_cnt to decrement the
   1217  *	respective hca ref cnt. There shouldn't be any usage of
   1218  *	ibcm_hca_info_t * returned from ibcm_find_hca_entry,
   1219  *	after decrementing the hca_acc_cnt
   1220  *
   1221  * INPUTS:
   1222  *	hca_guid	- HCA's guid
   1223  *
   1224  * RETURN VALUE:
   1225  *	hcap		- if a match is found, else NULL
   1226  */
   1227 ibcm_hca_info_t *
   1228 ibcm_find_hca_entry(ib_guid_t hca_guid)
   1229 {
   1230 	ibcm_hca_info_t *hcap;
   1231 
   1232 	IBTF_DPRINTF_L5(cmlog, "ibcm_find_hca_entry: guid = 0x%llX", hca_guid);
   1233 
   1234 	mutex_enter(&ibcm_global_hca_lock);
   1235 
   1236 	hcap = ibcm_hca_listp;
   1237 	/* search for this HCA */
   1238 	while (hcap != NULL) {
   1239 		if (hcap->hca_guid == hca_guid)
   1240 			break;
   1241 		hcap = hcap->hca_next;
   1242 	}
   1243 
   1244 	/* if no hcap for the hca_guid, return NULL */
   1245 	if (hcap == NULL) {
   1246 		mutex_exit(&ibcm_global_hca_lock);
   1247 		return (NULL);
   1248 	}
   1249 
   1250 	/* return hcap, only if it valid to use */
   1251 	if (hcap->hca_state == IBCM_HCA_ACTIVE) {
   1252 		++(hcap->hca_acc_cnt);
   1253 
   1254 		IBTF_DPRINTF_L5(cmlog, "ibcm_find_hca_entry: "
   1255 		    "found hcap = 0x%p hca_acc_cnt %u", hcap,
   1256 		    hcap->hca_acc_cnt);
   1257 
   1258 		mutex_exit(&ibcm_global_hca_lock);
   1259 		return (hcap);
   1260 	} else {
   1261 		mutex_exit(&ibcm_global_hca_lock);
   1262 
   1263 		IBTF_DPRINTF_L2(cmlog, "ibcm_find_hca_entry: "
   1264 		    "found hcap = 0x%p not in active state", hcap);
   1265 		return (NULL);
   1266 	}
   1267 }
   1268 
   1269 /*
   1270  * Searches for ibcm_hca_info_t entry based on hca_guid, but doesn't increment
   1271  * the hca's reference count. This function is used, where the calling context
   1272  * is attempting to delete hcap itself and hence acc_cnt cannot be incremented
   1273  * OR assumes that valid hcap must be available in ibcm's global hca list.
   1274  */
   1275 ibcm_hca_info_t *
   1276 ibcm_find_hcap_entry(ib_guid_t hca_guid)
   1277 {
   1278 	ibcm_hca_info_t *hcap;
   1279 
   1280 	IBTF_DPRINTF_L5(cmlog, "ibcm_find_hcap_entry: guid = 0x%llX", hca_guid);
   1281 
   1282 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
   1283 
   1284 	hcap = ibcm_hca_listp;
   1285 	/* search for this HCA */
   1286 	while (hcap != NULL) {
   1287 		if (hcap->hca_guid == hca_guid)
   1288 			break;
   1289 		hcap = hcap->hca_next;
   1290 	}
   1291 
   1292 	if (hcap == NULL)
   1293 		IBTF_DPRINTF_L2(cmlog, "ibcm_find_hcap_entry: No hcap found for"
   1294 		    " hca_guid 0x%llX", hca_guid);
   1295 	else
   1296 		IBTF_DPRINTF_L5(cmlog, "ibcm_find_hcap_entry: hcap found for"
   1297 		    " hca_guid 0x%llX", hca_guid);
   1298 
   1299 	return (hcap);
   1300 }
   1301 
   1302 /* increment the hca's temporary reference count */
   1303 ibcm_status_t
   1304 ibcm_inc_hca_acc_cnt(ibcm_hca_info_t *hcap)
   1305 {
   1306 	mutex_enter(&ibcm_global_hca_lock);
   1307 	if (hcap->hca_state == IBCM_HCA_ACTIVE) {
   1308 		++(hcap->hca_acc_cnt);
   1309 		IBTF_DPRINTF_L5(cmlog, "ibcm_inc_hca_acc_cnt: "
   1310 		    "hcap = 0x%p  acc_cnt = %d ", hcap, hcap->hca_acc_cnt);
   1311 		mutex_exit(&ibcm_global_hca_lock);
   1312 		return (IBCM_SUCCESS);
   1313 	} else {
   1314 		IBTF_DPRINTF_L2(cmlog, "ibcm_inc_hca_acc_cnt: "
   1315 		    "hcap INACTIVE 0x%p  acc_cnt = %d ", hcap,
   1316 		    hcap->hca_acc_cnt);
   1317 		mutex_exit(&ibcm_global_hca_lock);
   1318 		return (IBCM_FAILURE);
   1319 	}
   1320 }
   1321 
   1322 /* decrement the hca's ref count, and wake up any waiting threads */
   1323 void
   1324 ibcm_dec_hca_acc_cnt(ibcm_hca_info_t *hcap)
   1325 {
   1326 	mutex_enter(&ibcm_global_hca_lock);
   1327 	ASSERT(hcap->hca_acc_cnt > 0);
   1328 	--(hcap->hca_acc_cnt);
   1329 	IBTF_DPRINTF_L5(cmlog, "ibcm_dec_hca_acc_cnt: hcap = 0x%p "
   1330 	    "acc_cnt = %d", hcap, hcap->hca_acc_cnt);
   1331 	if ((hcap->hca_state == IBCM_HCA_NOT_ACTIVE) &&
   1332 	    (hcap->hca_acc_cnt == 0)) {
   1333 		IBTF_DPRINTF_L3(cmlog, "ibcm_dec_hca_acc_cnt: "
   1334 		    "cv_broadcast for hcap = 0x%p", hcap);
   1335 		cv_broadcast(&ibcm_global_hca_cv);
   1336 	}
   1337 	mutex_exit(&ibcm_global_hca_lock);
   1338 }
   1339 
   1340 /* increment the hca's resource count */
   1341 void
   1342 ibcm_inc_hca_res_cnt(ibcm_hca_info_t *hcap)
   1343 
   1344 {
   1345 	mutex_enter(&ibcm_global_hca_lock);
   1346 	++(hcap->hca_res_cnt);
   1347 	IBTF_DPRINTF_L5(cmlog, "ibcm_inc_hca_res_cnt: hcap = 0x%p "
   1348 	    "ref_cnt = %d", hcap, hcap->hca_res_cnt);
   1349 	mutex_exit(&ibcm_global_hca_lock);
   1350 }
   1351 
   1352 /* decrement the hca's resource count, and wake up any waiting threads */
   1353 void
   1354 ibcm_dec_hca_res_cnt(ibcm_hca_info_t *hcap)
   1355 {
   1356 	mutex_enter(&ibcm_global_hca_lock);
   1357 	ASSERT(hcap->hca_res_cnt > 0);
   1358 	--(hcap->hca_res_cnt);
   1359 	IBTF_DPRINTF_L5(cmlog, "ibcm_dec_hca_res_cnt: hcap = 0x%p "
   1360 	    "ref_cnt = %d", hcap, hcap->hca_res_cnt);
   1361 	if ((hcap->hca_state == IBCM_HCA_NOT_ACTIVE) &&
   1362 	    (hcap->hca_res_cnt == 0)) {
   1363 		IBTF_DPRINTF_L3(cmlog, "ibcm_dec_hca_res_cnt: "
   1364 		    "cv_broadcast for hcap = 0x%p", hcap);
   1365 		cv_broadcast(&ibcm_global_hca_cv);
   1366 	}
   1367 	mutex_exit(&ibcm_global_hca_lock);
   1368 }
   1369 
   1370 /* increment the hca's service count */
   1371 void
   1372 ibcm_inc_hca_svc_cnt(ibcm_hca_info_t *hcap)
   1373 
   1374 {
   1375 	mutex_enter(&ibcm_global_hca_lock);
   1376 	++(hcap->hca_svc_cnt);
   1377 	IBTF_DPRINTF_L5(cmlog, "ibcm_inc_hca_svc_cnt: hcap = 0x%p "
   1378 	    "svc_cnt = %d", hcap, hcap->hca_svc_cnt);
   1379 	mutex_exit(&ibcm_global_hca_lock);
   1380 }
   1381 
   1382 /* decrement the hca's service count */
   1383 void
   1384 ibcm_dec_hca_svc_cnt(ibcm_hca_info_t *hcap)
   1385 {
   1386 	mutex_enter(&ibcm_global_hca_lock);
   1387 	ASSERT(hcap->hca_svc_cnt > 0);
   1388 	--(hcap->hca_svc_cnt);
   1389 	IBTF_DPRINTF_L5(cmlog, "ibcm_dec_hca_svc_cnt: hcap = 0x%p "
   1390 	    "svc_cnt = %d", hcap, hcap->hca_svc_cnt);
   1391 	mutex_exit(&ibcm_global_hca_lock);
   1392 }
   1393 
   1394 /*
   1395  * The following code manages three classes of requests that CM makes to
   1396  * the fabric.  Those three classes are SA_ACCESS, REQ/REP/RTU, and DREQ/DREP.
   1397  * The main issue is that the fabric can become very busy, and the CM
   1398  * protocols rely on responses being made based on a predefined timeout
   1399  * value.  By managing how many simultaneous sessions are allowed, there
   1400  * is observed extremely high reliability of CM protocol succeeding when
   1401  * it should.
   1402  *
   1403  * SA_ACCESS and DREQ/DREP are managed at the thread level, whereby the
   1404  * thread blocks until there are less than some number of threads doing
   1405  * similar requests.
   1406  *
   1407  * REQ/REP/RTU requests beyond a given limit are added to a list,
   1408  * allowing the thread to return immediately to its caller in the
   1409  * case where the "mode" is IBT_NONBLOCKING.  This is the mode used
   1410  * by uDAPL and seems to be an important feature/behavior.
   1411  */
   1412 
   1413 static int
   1414 ibcm_ok_to_start(struct ibcm_open_s *openp)
   1415 {
   1416 	return (openp->sends < openp->sends_hiwat &&
   1417 	    openp->in_progress < openp->in_progress_max);
   1418 }
   1419 
   1420 void
   1421 ibcm_open_done(ibcm_state_data_t *statep)
   1422 {
   1423 	int run;
   1424 	ibcm_state_data_t **linkp, *tmp;
   1425 
   1426 	ASSERT(MUTEX_HELD(&statep->state_mutex));
   1427 	if (statep->open_flow == 1) {
   1428 		statep->open_flow = 0;
   1429 		mutex_enter(&ibcm_open.mutex);
   1430 		if (statep->open_link == NULL) {
   1431 			ibcm_open.in_progress--;
   1432 			run = ibcm_ok_to_start(&ibcm_open);
   1433 		} else {
   1434 			ibcm_open.queued--;
   1435 			linkp = &ibcm_open.head.open_link;
   1436 			while (*linkp != statep)
   1437 				linkp = &((*linkp)->open_link);
   1438 			*linkp = statep->open_link;
   1439 			statep->open_link = NULL;
   1440 			/*
   1441 			 * If we remove what tail pointed to, we need
   1442 			 * to reassign tail (it is never NULL).
   1443 			 * tail points to head for the empty list.
   1444 			 */
   1445 			if (ibcm_open.tail == statep) {
   1446 				tmp = &ibcm_open.head;
   1447 				while (tmp->open_link != &ibcm_open.head)
   1448 					tmp = tmp->open_link;
   1449 				ibcm_open.tail = tmp;
   1450 			}
   1451 			run = 0;
   1452 		}
   1453 		mutex_exit(&ibcm_open.mutex);
   1454 		if (run)
   1455 			ibcm_run_tlist_thread();
   1456 	}
   1457 }
   1458 
   1459 /* dtrace */
   1460 void
   1461 ibcm_open_wait(hrtime_t delta)
   1462 {
   1463 	if (delta > 1000000)
   1464 		IBTF_DPRINTF_L2(cmlog, "ibcm_open_wait: flow more %lld", delta);
   1465 }
   1466 
   1467 void
   1468 ibcm_open_start(ibcm_state_data_t *statep)
   1469 {
   1470 	ibcm_insert_trace(statep, IBCM_TRACE_OUTGOING_REQ);
   1471 
   1472 	mutex_enter(&statep->state_mutex);
   1473 	ibcm_open_wait(gethrtime() - statep->post_time);
   1474 	mutex_exit(&statep->state_mutex);
   1475 
   1476 	ibcm_post_rc_mad(statep, statep->stored_msg, ibcm_post_req_complete,
   1477 	    statep);
   1478 
   1479 	mutex_enter(&statep->state_mutex);
   1480 	IBCM_REF_CNT_DECR(statep);
   1481 	mutex_exit(&statep->state_mutex);
   1482 }
   1483 
   1484 void
   1485 ibcm_open_enqueue(ibcm_state_data_t *statep)
   1486 {
   1487 	int run;
   1488 
   1489 	mutex_enter(&statep->state_mutex);
   1490 	statep->post_time = gethrtime();
   1491 	mutex_exit(&statep->state_mutex);
   1492 	mutex_enter(&ibcm_open.mutex);
   1493 	if (ibcm_open.queued == 0 && ibcm_ok_to_start(&ibcm_open)) {
   1494 		ibcm_open.in_progress++;
   1495 		mutex_exit(&ibcm_open.mutex);
   1496 		ibcm_open_start(statep);
   1497 	} else {
   1498 		ibcm_open.queued++;
   1499 		statep->open_link = &ibcm_open.head;
   1500 		ibcm_open.tail->open_link = statep;
   1501 		ibcm_open.tail = statep;
   1502 		run = ibcm_ok_to_start(&ibcm_open);
   1503 		mutex_exit(&ibcm_open.mutex);
   1504 		if (run)
   1505 			ibcm_run_tlist_thread();
   1506 	}
   1507 }
   1508 
   1509 ibcm_state_data_t *
   1510 ibcm_open_dequeue(void)
   1511 {
   1512 	ibcm_state_data_t *statep;
   1513 
   1514 	ASSERT(MUTEX_HELD(&ibcm_open.mutex));
   1515 	ibcm_open.queued--;
   1516 	ibcm_open.in_progress++;
   1517 	statep = ibcm_open.head.open_link;
   1518 	ibcm_open.head.open_link = statep->open_link;
   1519 	statep->open_link = NULL;
   1520 	/*
   1521 	 * If we remove what tail pointed to, we need
   1522 	 * to reassign tail (it is never NULL).
   1523 	 * tail points to head for the empty list.
   1524 	 */
   1525 	if (ibcm_open.tail == statep)
   1526 		ibcm_open.tail = &ibcm_open.head;
   1527 	return (statep);
   1528 }
   1529 
   1530 void
   1531 ibcm_check_for_opens(void)
   1532 {
   1533 	ibcm_state_data_t 	*statep;
   1534 
   1535 	mutex_enter(&ibcm_open.mutex);
   1536 
   1537 	while (ibcm_open.queued > 0) {
   1538 		if (ibcm_ok_to_start(&ibcm_open)) {
   1539 			statep = ibcm_open_dequeue();
   1540 			mutex_exit(&ibcm_open.mutex);
   1541 
   1542 			ibcm_open_start(statep);
   1543 
   1544 			mutex_enter(&ibcm_open.mutex);
   1545 		} else {
   1546 			break;
   1547 		}
   1548 	}
   1549 	mutex_exit(&ibcm_open.mutex);
   1550 }
   1551 
   1552 
   1553 static void
   1554 ibcm_flow_init(ibcm_flow_t *flow, uint_t simul_max)
   1555 {
   1556 	flow->list			= NULL;
   1557 	flow->simul			= 0;
   1558 	flow->waiters_per_chunk		= 4;
   1559 	flow->simul_max			= simul_max;
   1560 	flow->lowat			= simul_max - flow->waiters_per_chunk;
   1561 	flow->lowat_default		= flow->lowat;
   1562 	/* stats */
   1563 	flow->total			= 0;
   1564 }
   1565 
   1566 static void
   1567 ibcm_rc_flow_control_init(void)
   1568 {
   1569 	mutex_init(&ibcm_open.mutex, NULL, MUTEX_DEFAULT, NULL);
   1570 	mutex_enter(&ibcm_open.mutex);
   1571 	ibcm_flow_init(&ibcm_close_flow, ibcm_close_simul_max);
   1572 	ibcm_flow_init(&ibcm_lapr_flow, ibcm_lapr_simul_max);
   1573 	ibcm_flow_init(&ibcm_saa_flow, ibcm_saa_simul_max);
   1574 
   1575 	ibcm_open.queued 		= 0;
   1576 	ibcm_open.exit_deferred 	= 0;
   1577 	ibcm_open.in_progress 		= 0;
   1578 	ibcm_open.in_progress_max 	= 16;
   1579 	ibcm_open.sends 		= 0;
   1580 	ibcm_open.sends_max 		= 0;
   1581 	ibcm_open.sends_lowat 		= 8;
   1582 	ibcm_open.sends_hiwat 		= 16;
   1583 	ibcm_open.tail 			= &ibcm_open.head;
   1584 	ibcm_open.head.open_link 	= NULL;
   1585 	mutex_exit(&ibcm_open.mutex);
   1586 
   1587 	mutex_init(&ibcm_close.mutex, NULL, MUTEX_DEFAULT, NULL);
   1588 	mutex_enter(&ibcm_close.mutex);
   1589 	ibcm_close.tail			= &ibcm_close.head;
   1590 	ibcm_close.head.close_link 	= NULL;
   1591 	mutex_exit(&ibcm_close.mutex);
   1592 }
   1593 
   1594 static void
   1595 ibcm_rc_flow_control_fini(void)
   1596 {
   1597 	mutex_destroy(&ibcm_open.mutex);
   1598 	mutex_destroy(&ibcm_close.mutex);
   1599 }
   1600 
   1601 static ibcm_flow1_t *
   1602 ibcm_flow_find(ibcm_flow_t *flow)
   1603 {
   1604 	ibcm_flow1_t *flow1;
   1605 	ibcm_flow1_t *f;
   1606 
   1607 	f = flow->list;
   1608 	if (f) {	/* most likely code path */
   1609 		while (f->link != NULL)
   1610 			f = f->link;
   1611 		if (f->waiters < flow->waiters_per_chunk)
   1612 			return (f);
   1613 	}
   1614 
   1615 	/* There was no flow1 list element ready for another waiter */
   1616 	mutex_exit(&ibcm_open.mutex);
   1617 	flow1 = kmem_alloc(sizeof (*flow1), KM_SLEEP);
   1618 	mutex_enter(&ibcm_open.mutex);
   1619 
   1620 	f = flow->list;
   1621 	if (f) {
   1622 		while (f->link != NULL)
   1623 			f = f->link;
   1624 		if (f->waiters < flow->waiters_per_chunk) {
   1625 			kmem_free(flow1, sizeof (*flow1));
   1626 			return (f);
   1627 		}
   1628 		f->link = flow1;
   1629 	} else {
   1630 		flow->list = flow1;
   1631 	}
   1632 	cv_init(&flow1->cv, NULL, CV_DRIVER, NULL);
   1633 	flow1->waiters = 0;
   1634 	flow1->link = NULL;
   1635 	return (flow1);
   1636 }
   1637 
   1638 static void
   1639 ibcm_flow_enter(ibcm_flow_t *flow)
   1640 {
   1641 	mutex_enter(&ibcm_open.mutex);
   1642 	if (flow->list == NULL && flow->simul < flow->simul_max) {
   1643 		flow->simul++;
   1644 		flow->total++;
   1645 		mutex_exit(&ibcm_open.mutex);
   1646 	} else {
   1647 		ibcm_flow1_t *flow1;
   1648 
   1649 		flow1 = ibcm_flow_find(flow);
   1650 		flow1->waiters++;
   1651 		cv_wait(&flow1->cv, &ibcm_open.mutex);
   1652 		if (--flow1->waiters == 0) {
   1653 			cv_destroy(&flow1->cv);
   1654 			mutex_exit(&ibcm_open.mutex);
   1655 			kmem_free(flow1, sizeof (*flow1));
   1656 		} else
   1657 			mutex_exit(&ibcm_open.mutex);
   1658 	}
   1659 }
   1660 
   1661 static void
   1662 ibcm_flow_exit(ibcm_flow_t *flow)
   1663 {
   1664 	mutex_enter(&ibcm_open.mutex);
   1665 	if (--flow->simul < flow->lowat) {
   1666 		if (flow->lowat < flow->lowat_default)
   1667 			flow->lowat++;
   1668 		if (flow->list) {
   1669 			ibcm_flow1_t *flow1;
   1670 
   1671 			flow1 = flow->list;
   1672 			flow->list = flow1->link;	/* unlink */
   1673 			flow1->link = NULL;		/* be clean */
   1674 			flow->total += flow1->waiters;
   1675 			flow->simul += flow1->waiters;
   1676 			cv_broadcast(&flow1->cv);
   1677 		}
   1678 	}
   1679 	mutex_exit(&ibcm_open.mutex);
   1680 }
   1681 
   1682 void
   1683 ibcm_flow_inc(void)
   1684 {
   1685 	mutex_enter(&ibcm_open.mutex);
   1686 	if (++ibcm_open.sends > ibcm_open.sends_max) {
   1687 		ibcm_open.sends_max = ibcm_open.sends;
   1688 		IBTF_DPRINTF_L2(cmlog, "ibcm_flow_inc: sends max = %d",
   1689 		    ibcm_open.sends_max);
   1690 	}
   1691 	mutex_exit(&ibcm_open.mutex);
   1692 }
   1693 
   1694 static void
   1695 ibcm_check_send_cmpltn_time(hrtime_t delta, char *event_msg)
   1696 {
   1697 	if (delta > 4000000LL) {
   1698 		IBTF_DPRINTF_L2(cmlog, "ibcm_check_send_cmpltn_time: "
   1699 		    "%s: %lldns", event_msg, delta);
   1700 	}
   1701 }
   1702 
   1703 void
   1704 ibcm_flow_dec(hrtime_t time, char *mad_type)
   1705 {
   1706 	int flow_exit = 0;
   1707 	int run = 0;
   1708 
   1709 	if (ibcm_dtrace)
   1710 		ibcm_check_send_cmpltn_time(gethrtime() - time, mad_type);
   1711 	mutex_enter(&ibcm_open.mutex);
   1712 	ibcm_open.sends--;
   1713 	if (ibcm_open.sends < ibcm_open.sends_lowat) {
   1714 		run = ibcm_ok_to_start(&ibcm_open);
   1715 		if (ibcm_open.exit_deferred) {
   1716 			ibcm_open.exit_deferred--;
   1717 			flow_exit = 1;
   1718 		}
   1719 	}
   1720 	mutex_exit(&ibcm_open.mutex);
   1721 	if (flow_exit)
   1722 		ibcm_flow_exit(&ibcm_close_flow);
   1723 	if (run)
   1724 		ibcm_run_tlist_thread();
   1725 }
   1726 
   1727 void
   1728 ibcm_close_enqueue(ibcm_state_data_t *statep)
   1729 {
   1730 	mutex_enter(&ibcm_close.mutex);
   1731 	statep->close_link = NULL;
   1732 	ibcm_close.tail->close_link = statep;
   1733 	ibcm_close.tail = statep;
   1734 	mutex_exit(&ibcm_close.mutex);
   1735 	ibcm_run_tlist_thread();
   1736 }
   1737 
   1738 void
   1739 ibcm_check_for_async_close()
   1740 {
   1741 	ibcm_state_data_t 	*statep;
   1742 
   1743 	mutex_enter(&ibcm_close.mutex);
   1744 
   1745 	while (ibcm_close.head.close_link) {
   1746 		statep = ibcm_close.head.close_link;
   1747 		ibcm_close.head.close_link = statep->close_link;
   1748 		statep->close_link = NULL;
   1749 		if (ibcm_close.tail == statep)
   1750 			ibcm_close.tail = &ibcm_close.head;
   1751 		mutex_exit(&ibcm_close.mutex);
   1752 		ibcm_close_start(statep);
   1753 		mutex_enter(&ibcm_close.mutex);
   1754 	}
   1755 	mutex_exit(&ibcm_close.mutex);
   1756 }
   1757 
   1758 void
   1759 ibcm_close_enter(void)
   1760 {
   1761 	ibcm_flow_enter(&ibcm_close_flow);
   1762 }
   1763 
   1764 void
   1765 ibcm_close_exit(void)
   1766 {
   1767 	int flow_exit;
   1768 
   1769 	mutex_enter(&ibcm_open.mutex);
   1770 	if (ibcm_open.sends < ibcm_open.sends_lowat ||
   1771 	    ibcm_open.exit_deferred >= 4)
   1772 		flow_exit = 1;
   1773 	else {
   1774 		flow_exit = 0;
   1775 		ibcm_open.exit_deferred++;
   1776 	}
   1777 	mutex_exit(&ibcm_open.mutex);
   1778 	if (flow_exit)
   1779 		ibcm_flow_exit(&ibcm_close_flow);
   1780 }
   1781 
   1782 /*
   1783  * This function needs to be called twice to finish our flow
   1784  * control accounting when closing down a connection.  One
   1785  * call has send_done set to 1, while the other has it set to 0.
   1786  * Because of retries, this could get called more than once
   1787  * with either 0 or 1, but additional calls have no effect.
   1788  */
   1789 void
   1790 ibcm_close_done(ibcm_state_data_t *statep, int send_done)
   1791 {
   1792 	int flow_exit;
   1793 
   1794 	ASSERT(MUTEX_HELD(&statep->state_mutex));
   1795 	if (statep->close_flow == 1) {
   1796 		if (send_done)
   1797 			statep->close_flow = 3;
   1798 		else
   1799 			statep->close_flow = 2;
   1800 	} else if ((send_done && statep->close_flow == 2) ||
   1801 	    (!send_done && statep->close_flow == 3)) {
   1802 		statep->close_flow = 0;
   1803 		mutex_enter(&ibcm_open.mutex);
   1804 		if (ibcm_open.sends < ibcm_open.sends_lowat ||
   1805 		    ibcm_open.exit_deferred >= 4)
   1806 			flow_exit = 1;
   1807 		else {
   1808 			flow_exit = 0;
   1809 			ibcm_open.exit_deferred++;
   1810 		}
   1811 		mutex_exit(&ibcm_open.mutex);
   1812 		if (flow_exit)
   1813 			ibcm_flow_exit(&ibcm_close_flow);
   1814 	}
   1815 }
   1816 
   1817 void
   1818 ibcm_lapr_enter(void)
   1819 {
   1820 	ibcm_flow_enter(&ibcm_lapr_flow);
   1821 }
   1822 
   1823 void
   1824 ibcm_lapr_exit(void)
   1825 {
   1826 	ibcm_flow_exit(&ibcm_lapr_flow);
   1827 }
   1828 
   1829 void
   1830 ibcm_sa_access_enter()
   1831 {
   1832 	ibcm_flow_enter(&ibcm_saa_flow);
   1833 }
   1834 
   1835 void
   1836 ibcm_sa_access_exit()
   1837 {
   1838 	ibcm_flow_exit(&ibcm_saa_flow);
   1839 }
   1840 
   1841 static void
   1842 ibcm_sm_notice_handler(ibmf_saa_handle_t saa_handle,
   1843     ibmf_saa_subnet_event_t saa_event_code,
   1844     ibmf_saa_event_details_t *saa_event_details,
   1845     void *callback_arg)
   1846 {
   1847 	ibcm_port_info_t	*portp = (ibcm_port_info_t *)callback_arg;
   1848 	ibt_subnet_event_code_t code;
   1849 	ibt_subnet_event_t	event;
   1850 	uint8_t			event_status;
   1851 
   1852 	IBTF_DPRINTF_L3(cmlog, "ibcm_sm_notice_handler: saa_hdl %p, code = %d",
   1853 	    saa_handle, saa_event_code);
   1854 
   1855 	mutex_enter(&ibcm_sm_notice_serialize_lock);
   1856 
   1857 	switch (saa_event_code) {
   1858 	case IBMF_SAA_EVENT_MCG_CREATED:
   1859 		code = IBT_SM_EVENT_MCG_CREATED;
   1860 		break;
   1861 	case IBMF_SAA_EVENT_MCG_DELETED:
   1862 		code = IBT_SM_EVENT_MCG_DELETED;
   1863 		break;
   1864 	case IBMF_SAA_EVENT_GID_AVAILABLE:
   1865 		code = IBT_SM_EVENT_GID_AVAIL;
   1866 		ibcm_path_cache_purge();
   1867 		break;
   1868 	case IBMF_SAA_EVENT_GID_UNAVAILABLE:
   1869 		code = IBT_SM_EVENT_GID_UNAVAIL;
   1870 		ibcm_path_cache_purge();
   1871 		break;
   1872 	case IBMF_SAA_EVENT_SUBSCRIBER_STATUS_CHG:
   1873 		event_status =
   1874 		    saa_event_details->ie_producer_event_status_mask &
   1875 		    IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM;
   1876 		if (event_status == (portp->port_event_status &
   1877 		    IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM)) {
   1878 			mutex_exit(&ibcm_sm_notice_serialize_lock);
   1879 			return;	/* no change */
   1880 		}
   1881 		portp->port_event_status = event_status;
   1882 		if (event_status == IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM)
   1883 			code = IBT_SM_EVENT_AVAILABLE;
   1884 		else
   1885 			code = IBT_SM_EVENT_UNAVAILABLE;
   1886 		break;
   1887 	default:
   1888 		mutex_exit(&ibcm_sm_notice_serialize_lock);
   1889 		return;
   1890 	}
   1891 
   1892 	mutex_enter(&ibcm_global_hca_lock);
   1893 
   1894 	/* don't send the event if we're tearing down */
   1895 	if (!IBCM_ACCESS_HCA_OK(portp->port_hcap)) {
   1896 		mutex_exit(&ibcm_global_hca_lock);
   1897 		mutex_exit(&ibcm_sm_notice_serialize_lock);
   1898 		return;
   1899 	}
   1900 
   1901 	++(portp->port_hcap->hca_acc_cnt);
   1902 	mutex_exit(&ibcm_global_hca_lock);
   1903 
   1904 	event.sm_notice_gid = saa_event_details->ie_gid;
   1905 	ibtl_cm_sm_notice_handler(portp->port_sgid0, code, &event);
   1906 
   1907 	mutex_exit(&ibcm_sm_notice_serialize_lock);
   1908 
   1909 	ibcm_dec_hca_acc_cnt(portp->port_hcap);
   1910 }
   1911 
   1912 void
   1913 ibt_register_subnet_notices(ibt_clnt_hdl_t ibt_hdl,
   1914     ibt_sm_notice_handler_t sm_notice_handler, void *private)
   1915 {
   1916 	ibcm_port_info_t	*portp;
   1917 	ibcm_hca_info_t		*hcap;
   1918 	uint8_t			port;
   1919 	int			num_failed_sgids;
   1920 	ibtl_cm_sm_init_fail_t	*ifail;
   1921 	ib_gid_t		*sgidp;
   1922 
   1923 	IBTF_DPRINTF_L3(cmlog, "ibt_register_subnet_notices(%p, %s)",
   1924 	    ibt_hdl, ibtl_cm_get_clnt_name(ibt_hdl));
   1925 
   1926 	mutex_enter(&ibcm_sm_notice_serialize_lock);
   1927 
   1928 	ibtl_cm_set_sm_notice_handler(ibt_hdl, sm_notice_handler, private);
   1929 	if (sm_notice_handler == NULL) {
   1930 		mutex_exit(&ibcm_sm_notice_serialize_lock);
   1931 		return;
   1932 	}
   1933 
   1934 	/* for each port, if service is not available, make a call */
   1935 	mutex_enter(&ibcm_global_hca_lock);
   1936 	num_failed_sgids = 0;
   1937 	hcap = ibcm_hca_listp;
   1938 	while (hcap != NULL) {
   1939 		portp = hcap->hca_port_info;
   1940 		for (port = 0; port < hcap->hca_num_ports; port++) {
   1941 			if (!(portp->port_event_status &
   1942 			    IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM))
   1943 				num_failed_sgids++;
   1944 			portp++;
   1945 		}
   1946 		hcap = hcap->hca_next;
   1947 	}
   1948 	if (num_failed_sgids != 0) {
   1949 		ifail = kmem_alloc(sizeof (*ifail) +
   1950 		    (num_failed_sgids - 1) * sizeof (ib_gid_t), KM_SLEEP);
   1951 		ifail->smf_num_sgids = num_failed_sgids;
   1952 		ifail->smf_ibt_hdl = ibt_hdl;
   1953 		sgidp = &ifail->smf_sgid[0];
   1954 		hcap = ibcm_hca_listp;
   1955 		while (hcap != NULL) {
   1956 			portp = hcap->hca_port_info;
   1957 			for (port = 0; port < hcap->hca_num_ports; port++) {
   1958 				if (!(portp->port_event_status &
   1959 				    IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM))
   1960 					*sgidp++ = portp->port_sgid0;
   1961 				portp++;
   1962 			}
   1963 			hcap = hcap->hca_next;
   1964 		}
   1965 	}
   1966 	mutex_exit(&ibcm_global_hca_lock);
   1967 
   1968 	if (num_failed_sgids != 0) {
   1969 		ibtl_cm_sm_notice_init_failure(ifail);
   1970 		kmem_free(ifail, sizeof (*ifail) +
   1971 		    (num_failed_sgids - 1) * sizeof (ib_gid_t));
   1972 	}
   1973 	mutex_exit(&ibcm_sm_notice_serialize_lock);
   1974 }
   1975 
   1976 /* The following is run from a taskq because we've seen the stack overflow. */
   1977 static void
   1978 ibcm_init_saa(void *arg)
   1979 {
   1980 	ibcm_port_info_t		*portp = (ibcm_port_info_t *)arg;
   1981 	int				status;
   1982 	ib_guid_t			port_guid;
   1983 	ibmf_saa_subnet_event_args_t	event_args;
   1984 
   1985 	port_guid = portp->port_sgid0.gid_guid;
   1986 
   1987 	IBTF_DPRINTF_L3(cmlog, "ibcm_init_saa: port guid %llX", port_guid);
   1988 
   1989 	event_args.is_event_callback_arg = portp;
   1990 	event_args.is_event_callback = ibcm_sm_notice_handler;
   1991 
   1992 	if ((status = ibmf_sa_session_open(port_guid, 0, &event_args,
   1993 	    IBMF_VERSION, 0, &portp->port_ibmf_saa_hdl)) != IBMF_SUCCESS) {
   1994 		IBTF_DPRINTF_L2(cmlog, "ibcm_init_saa: "
   1995 		    "ibmf_sa_session_open failed for port guid %llX "
   1996 		    "status = %d", port_guid, status);
   1997 	} else {
   1998 		IBTF_DPRINTF_L2(cmlog, "ibcm_init_saa: "
   1999 		    "registered sa_hdl 0x%p for port guid %llX",
   2000 		    portp->port_ibmf_saa_hdl, port_guid);
   2001 	}
   2002 
   2003 	mutex_enter(&ibcm_sa_open_lock);
   2004 	portp->port_saa_open_in_progress = 0;
   2005 	cv_broadcast(&ibcm_sa_open_cv);
   2006 	mutex_exit(&ibcm_sa_open_lock);
   2007 }
   2008 
   2009 void
   2010 ibcm_init_saa_handle(ibcm_hca_info_t *hcap, uint8_t port)
   2011 {
   2012 	ibmf_saa_handle_t	saa_handle;
   2013 	uint8_t			port_index = port - 1;
   2014 	ibcm_port_info_t	*portp = &hcap->hca_port_info[port_index];
   2015 	ibt_status_t		ibt_status;
   2016 
   2017 	if (port_index >= hcap->hca_num_ports)
   2018 		return;
   2019 
   2020 	mutex_enter(&ibcm_sa_open_lock);
   2021 	if (portp->port_saa_open_in_progress) {
   2022 		mutex_exit(&ibcm_sa_open_lock);
   2023 		return;
   2024 	}
   2025 
   2026 	saa_handle = portp->port_ibmf_saa_hdl;
   2027 	if (saa_handle != NULL) {
   2028 		mutex_exit(&ibcm_sa_open_lock);
   2029 		return;
   2030 	}
   2031 
   2032 	portp->port_saa_open_in_progress = 1;
   2033 	mutex_exit(&ibcm_sa_open_lock);
   2034 
   2035 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(portp->port_event_status))
   2036 
   2037 	/* The assumption is that we're getting event notifications */
   2038 	portp->port_event_status = IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM;
   2039 
   2040 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(portp->port_event_status))
   2041 
   2042 	ibt_status = ibt_get_port_state_byguid(portp->port_hcap->hca_guid,
   2043 	    portp->port_num, &portp->port_sgid0, NULL);
   2044 	if (ibt_status != IBT_SUCCESS) {
   2045 		IBTF_DPRINTF_L2(cmlog, "ibcm_init_saa_handle: "
   2046 		    "ibt_get_port_state_byguid failed for guid %llX "
   2047 		    "with status %d", portp->port_hcap->hca_guid, ibt_status);
   2048 		mutex_enter(&ibcm_sa_open_lock);
   2049 		portp->port_saa_open_in_progress = 0;
   2050 		cv_broadcast(&ibcm_sa_open_cv);
   2051 		mutex_exit(&ibcm_sa_open_lock);
   2052 		return;
   2053 	}
   2054 	/* if the port is UP, try sa_session_open */
   2055 	(void) taskq_dispatch(ibcm_taskq, ibcm_init_saa, portp, TQ_SLEEP);
   2056 }
   2057 
   2058 
   2059 ibmf_saa_handle_t
   2060 ibcm_get_saa_handle(ibcm_hca_info_t *hcap, uint8_t port)
   2061 {
   2062 	ibmf_saa_handle_t	saa_handle;
   2063 	uint8_t			port_index = port - 1;
   2064 	ibcm_port_info_t	*portp = &hcap->hca_port_info[port_index];
   2065 	ibt_status_t		ibt_status;
   2066 
   2067 	if (port_index >= hcap->hca_num_ports)
   2068 		return (NULL);
   2069 
   2070 	mutex_enter(&ibcm_sa_open_lock);
   2071 	while (portp->port_saa_open_in_progress) {
   2072 		cv_wait(&ibcm_sa_open_cv, &ibcm_sa_open_lock);
   2073 	}
   2074 
   2075 	saa_handle = portp->port_ibmf_saa_hdl;
   2076 	if (saa_handle != NULL) {
   2077 		mutex_exit(&ibcm_sa_open_lock);
   2078 		return (saa_handle);
   2079 	}
   2080 
   2081 	portp->port_saa_open_in_progress = 1;
   2082 	mutex_exit(&ibcm_sa_open_lock);
   2083 
   2084 	ibt_status = ibt_get_port_state_byguid(portp->port_hcap->hca_guid,
   2085 	    portp->port_num, &portp->port_sgid0, NULL);
   2086 	if (ibt_status != IBT_SUCCESS) {
   2087 		IBTF_DPRINTF_L2(cmlog, "ibcm_get_saa_handle: "
   2088 		    "ibt_get_port_state_byguid failed for guid %llX "
   2089 		    "with status %d", portp->port_hcap->hca_guid, ibt_status);
   2090 		mutex_enter(&ibcm_sa_open_lock);
   2091 		portp->port_saa_open_in_progress = 0;
   2092 		cv_broadcast(&ibcm_sa_open_cv);
   2093 		mutex_exit(&ibcm_sa_open_lock);
   2094 		return (NULL);
   2095 	}
   2096 	/* if the port is UP, try sa_session_open */
   2097 	(void) taskq_dispatch(ibcm_taskq, ibcm_init_saa, portp, TQ_SLEEP);
   2098 
   2099 	mutex_enter(&ibcm_sa_open_lock);
   2100 	while (portp->port_saa_open_in_progress) {
   2101 		cv_wait(&ibcm_sa_open_cv, &ibcm_sa_open_lock);
   2102 	}
   2103 	saa_handle = portp->port_ibmf_saa_hdl;
   2104 	mutex_exit(&ibcm_sa_open_lock);
   2105 	return (saa_handle);
   2106 }
   2107 
   2108 
   2109 /*
   2110  * ibcm_hca_init_port():
   2111  * 	- Register port with IBMA
   2112  *
   2113  * Arguments:
   2114  *	hcap		- HCA's guid
   2115  *	port_index	- port number minus 1
   2116  *
   2117  * Return values:
   2118  *	IBCM_SUCCESS - success
   2119  */
   2120 ibt_status_t
   2121 ibcm_hca_init_port(ibcm_hca_info_t *hcap, uint8_t port_index)
   2122 {
   2123 	int			status;
   2124 	ibmf_register_info_t	*ibmf_reg;
   2125 
   2126 	IBTF_DPRINTF_L4(cmlog, "ibcm_hca_init_port: hcap = 0x%p port_num %d",
   2127 	    hcap, port_index + 1);
   2128 
   2129 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
   2130 
   2131 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(hcap->hca_port_info))
   2132 
   2133 	if (hcap->hca_port_info[port_index].port_ibmf_hdl == NULL) {
   2134 		/* Register with IBMF */
   2135 		ibmf_reg = &hcap->hca_port_info[port_index].port_ibmf_reg;
   2136 		ibmf_reg->ir_ci_guid = hcap->hca_guid;
   2137 		ibmf_reg->ir_port_num = port_index + 1;
   2138 		ibmf_reg->ir_client_class = COMM_MGT_MANAGER_AGENT;
   2139 
   2140 		/*
   2141 		 * register with management framework
   2142 		 */
   2143 		status = ibmf_register(ibmf_reg, IBMF_VERSION,
   2144 		    IBMF_REG_FLAG_NO_OFFLOAD, NULL, NULL,
   2145 		    &(hcap->hca_port_info[port_index].port_ibmf_hdl),
   2146 		    &(hcap->hca_port_info[port_index].port_ibmf_caps));
   2147 
   2148 		if (status != IBMF_SUCCESS) {
   2149 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_init_port: "
   2150 			    "ibmf_register failed for port_num %x, "
   2151 			    "status = %x", port_index + 1, status);
   2152 			return (ibcm_ibmf_analyze_error(status));
   2153 		}
   2154 
   2155 		hcap->hca_port_info[port_index].port_qp1.qp_cm =
   2156 		    IBMF_QP_HANDLE_DEFAULT;
   2157 		hcap->hca_port_info[port_index].port_qp1.qp_port =
   2158 		    &(hcap->hca_port_info[port_index]);
   2159 
   2160 		/*
   2161 		 * Register the read callback with IBMF.
   2162 		 * Since we just did an ibmf_register, handle is
   2163 		 * valid and ibcm_recv_cb() is valid so we can
   2164 		 * safely assert for success of ibmf_setup_recv_cb()
   2165 		 *
   2166 		 * Depending on the "state" of the HCA,
   2167 		 * CM may drop incoming packets
   2168 		 */
   2169 		status = ibmf_setup_async_cb(
   2170 		    hcap->hca_port_info[port_index].port_ibmf_hdl,
   2171 		    IBMF_QP_HANDLE_DEFAULT, ibcm_recv_cb,
   2172 		    &(hcap->hca_port_info[port_index].port_qp1), 0);
   2173 		ASSERT(status == IBMF_SUCCESS);
   2174 
   2175 		IBTF_DPRINTF_L5(cmlog, "ibcm_hca_init_port: "
   2176 		    "IBMF hdl[%x] = 0x%p", port_index,
   2177 		    hcap->hca_port_info[port_index].port_ibmf_hdl);
   2178 
   2179 		/* Attempt to get the saa_handle for this port */
   2180 		ibcm_init_saa_handle(hcap, port_index + 1);
   2181 	}
   2182 
   2183 	return (IBT_SUCCESS);
   2184 }
   2185 
   2186 /*
   2187  * useful, to re attempt to initialize port ibma handles from elsewhere in
   2188  * cm code
   2189  */
   2190 ibt_status_t
   2191 ibcm_hca_reinit_port(ibcm_hca_info_t *hcap, uint8_t port_index)
   2192 {
   2193 	ibt_status_t	status;
   2194 
   2195 	IBTF_DPRINTF_L5(cmlog, "ibcm_hca_reinit_port: hcap 0x%p port_num %d",
   2196 	    hcap, port_index + 1);
   2197 
   2198 	mutex_enter(&ibcm_global_hca_lock);
   2199 	status = ibcm_hca_init_port(hcap, port_index);
   2200 	mutex_exit(&ibcm_global_hca_lock);
   2201 	return (status);
   2202 }
   2203 
   2204 
   2205 /*
   2206  * ibcm_hca_fini_port():
   2207  * 	- Deregister port with IBMA
   2208  *
   2209  * Arguments:
   2210  *	hcap		- HCA's guid
   2211  *	port_index	- port number minus 1
   2212  *
   2213  * Return values:
   2214  *	IBCM_SUCCESS - success
   2215  */
   2216 static ibcm_status_t
   2217 ibcm_hca_fini_port(ibcm_hca_info_t *hcap, uint8_t port_index)
   2218 {
   2219 	int			ibmf_status;
   2220 	ibcm_status_t		ibcm_status;
   2221 
   2222 	IBTF_DPRINTF_L4(cmlog, "ibcm_hca_fini_port: hcap = 0x%p port_num %d ",
   2223 	    hcap, port_index + 1);
   2224 
   2225 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
   2226 
   2227 	if (hcap->hca_port_info[port_index].port_ibmf_saa_hdl != NULL) {
   2228 		IBTF_DPRINTF_L5(cmlog, "ibcm_hca_fini_port: "
   2229 		    "ibmf_sa_session_close IBMF SAA hdl %p",
   2230 		    hcap->hca_port_info[port_index].port_ibmf_saa_hdl);
   2231 
   2232 		ibmf_status = ibmf_sa_session_close(
   2233 		    &hcap->hca_port_info[port_index].port_ibmf_saa_hdl, 0);
   2234 		if (ibmf_status != IBMF_SUCCESS) {
   2235 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: "
   2236 			    "ibmf_sa_session_close of port %d returned %x",
   2237 			    port_index + 1, ibmf_status);
   2238 			return (IBCM_FAILURE);
   2239 		}
   2240 	}
   2241 
   2242 	if (hcap->hca_port_info[port_index].port_ibmf_hdl != NULL) {
   2243 		IBTF_DPRINTF_L5(cmlog, "ibcm_hca_fini_port: "
   2244 		    "ibmf_unregister IBMF Hdl %p",
   2245 		    hcap->hca_port_info[port_index].port_ibmf_hdl);
   2246 
   2247 		/* clean-up all the ibmf qp's allocated on this port */
   2248 		ibcm_status = ibcm_free_allqps(hcap, port_index + 1);
   2249 
   2250 		if (ibcm_status != IBCM_SUCCESS) {
   2251 
   2252 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: "
   2253 			    "ibcm_free_allqps failed for port_num %d",
   2254 			    port_index + 1);
   2255 			return (IBCM_FAILURE);
   2256 		}
   2257 
   2258 		/* Tear down the receive callback */
   2259 		ibmf_status = ibmf_tear_down_async_cb(
   2260 		    hcap->hca_port_info[port_index].port_ibmf_hdl,
   2261 		    IBMF_QP_HANDLE_DEFAULT, 0);
   2262 
   2263 		if (ibmf_status != IBMF_SUCCESS) {
   2264 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: "
   2265 			    "ibmf_tear_down_async_cb failed %d port_num %d",
   2266 			    ibmf_status, port_index + 1);
   2267 			return (IBCM_FAILURE);
   2268 		}
   2269 
   2270 		/* Now, unregister with IBMF */
   2271 		ibmf_status = ibmf_unregister(
   2272 		    &hcap->hca_port_info[port_index].port_ibmf_hdl, 0);
   2273 		IBTF_DPRINTF_L4(cmlog, "ibcm_hca_fini_port: "
   2274 		    "ibmf_unregister of port_num %x returned %x",
   2275 		    port_index + 1, ibmf_status);
   2276 
   2277 		if (ibmf_status == IBMF_SUCCESS)
   2278 			hcap->hca_port_info[port_index].port_ibmf_hdl = NULL;
   2279 		else {
   2280 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: "
   2281 			    "ibmf_unregister failed %d port_num %d",
   2282 			    ibmf_status, port_index + 1);
   2283 			return (IBCM_FAILURE);
   2284 		}
   2285 	}
   2286 	return (IBCM_SUCCESS);
   2287 }
   2288 
   2289 /*
   2290  * ibcm_comm_est_handler():
   2291  *	Check if the given channel is in ESTABLISHED state or not
   2292  *
   2293  * Arguments:
   2294  *	eventp	- A pointer to an ibt_async_event_t struct
   2295  *
   2296  * Return values: NONE
   2297  */
   2298 static void
   2299 ibcm_comm_est_handler(ibt_async_event_t *eventp)
   2300 {
   2301 	ibcm_state_data_t	*statep;
   2302 
   2303 	IBTF_DPRINTF_L4(cmlog, "ibcm_comm_est_handler:");
   2304 
   2305 	/* Both QP and EEC handles can't be NULL */
   2306 	if (eventp->ev_chan_hdl == NULL) {
   2307 		IBTF_DPRINTF_L2(cmlog, "ibcm_comm_est_handler: "
   2308 		    "both QP and EEC handles are NULL");
   2309 		return;
   2310 	}
   2311 
   2312 	/* get the "statep" from qp/eec handles */
   2313 	IBCM_GET_CHAN_PRIVATE(eventp->ev_chan_hdl, statep);
   2314 	if (statep == NULL) {
   2315 		IBTF_DPRINTF_L2(cmlog, "ibcm_comm_est_handler: statep is NULL");
   2316 		return;
   2317 	}
   2318 
   2319 	mutex_enter(&statep->state_mutex);
   2320 
   2321 	IBCM_RELEASE_CHAN_PRIVATE(eventp->ev_chan_hdl);
   2322 
   2323 	IBTF_DPRINTF_L4(cmlog, "ibcm_comm_est_handler: statep = %p", statep);
   2324 
   2325 	IBCM_REF_CNT_INCR(statep);
   2326 
   2327 	if ((statep->state == IBCM_STATE_REP_SENT) ||
   2328 	    (statep->state == IBCM_STATE_MRA_REP_RCVD)) {
   2329 		timeout_id_t	timer_val = statep->timerid;
   2330 
   2331 		statep->state = IBCM_STATE_TRANSIENT_ESTABLISHED;
   2332 
   2333 		if (timer_val) {
   2334 			statep->timerid = 0;
   2335 			mutex_exit(&statep->state_mutex);
   2336 			(void) untimeout(timer_val);
   2337 		} else
   2338 			mutex_exit(&statep->state_mutex);
   2339 
   2340 		/* CM doesn't have RTU message here */
   2341 		ibcm_cep_state_rtu(statep, NULL);
   2342 
   2343 	} else {
   2344 		if (statep->state == IBCM_STATE_ESTABLISHED ||
   2345 		    statep->state == IBCM_STATE_TRANSIENT_ESTABLISHED) {
   2346 			IBTF_DPRINTF_L4(cmlog, "ibcm_comm_est_handler: "
   2347 			    "Channel already in ESTABLISHED state");
   2348 		} else {
   2349 			/* An unexpected behavior from remote */
   2350 			IBTF_DPRINTF_L2(cmlog, "ibcm_comm_est_handler: "
   2351 			    "Unexpected in state = %d", statep->state);
   2352 		}
   2353 		mutex_exit(&statep->state_mutex);
   2354 
   2355 		ibcm_insert_trace(statep, IBCM_TRACE_INCOMING_COMEST);
   2356 	}
   2357 
   2358 	mutex_enter(&statep->state_mutex);
   2359 	IBCM_REF_CNT_DECR(statep);
   2360 	mutex_exit(&statep->state_mutex);
   2361 }
   2362 
   2363 
   2364 /*
   2365  * ibcm_async_handler():
   2366  *	CM's Async Handler
   2367  *	(Handles ATTACH, DETACH, COM_EST events)
   2368  *
   2369  * Arguments:
   2370  *	eventp	- A pointer to an ibt_async_event_t struct
   2371  *
   2372  * Return values: None
   2373  *
   2374  * NOTE : CM assumes that all HCA DR events are delivered sequentially
   2375  * i.e., until ibcm_async_handler  completes for a given HCA DR, framework
   2376  * shall not invoke ibcm_async_handler with another DR event for the same
   2377  * HCA
   2378  */
   2379 /* ARGSUSED */
   2380 void
   2381 ibcm_async_handler(void *clnt_hdl, ibt_hca_hdl_t hca_hdl,
   2382     ibt_async_code_t code, ibt_async_event_t *eventp)
   2383 {
   2384 	ibcm_hca_info_t		*hcap;
   2385 	ibcm_port_up_t		*pup;
   2386 
   2387 	IBTF_DPRINTF_L3(cmlog, "ibcm_async_handler: "
   2388 	    "clnt_hdl = %p, code = 0x%x, eventp = 0x%p",
   2389 	    clnt_hdl, code, eventp);
   2390 
   2391 	mutex_enter(&ibcm_global_hca_lock);
   2392 
   2393 	/* If fini is going to complete successfully, then return */
   2394 	if (ibcm_finit_state != IBCM_FINIT_IDLE) {
   2395 
   2396 		/*
   2397 		 * This finit state implies one of the following:
   2398 		 * Init either didn't start or didn't complete OR
   2399 		 * Fini is about to return SUCCESS and release the global lock.
   2400 		 * In all these cases, it is safe to ignore the async.
   2401 		 */
   2402 
   2403 		IBTF_DPRINTF_L2(cmlog, "ibcm_async_handler: ignoring event %x, "
   2404 		    "as either init didn't complete or fini about to succeed",
   2405 		    code);
   2406 		mutex_exit(&ibcm_global_hca_lock);
   2407 		return;
   2408 	}
   2409 
   2410 	switch (code) {
   2411 	case IBT_PORT_CHANGE_EVENT:
   2412 		if ((eventp->ev_port_flags & IBT_PORT_CHANGE_SM_LID) == 0)
   2413 			break;
   2414 	/* FALLTHROUGH */
   2415 	case IBT_CLNT_REREG_EVENT:
   2416 	case IBT_EVENT_PORT_UP:
   2417 		mutex_exit(&ibcm_global_hca_lock);
   2418 		pup = kmem_alloc(sizeof (ibcm_port_up_t), KM_SLEEP);
   2419 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pup))
   2420 		pup->pup_hca_guid = eventp->ev_hca_guid;
   2421 		pup->pup_port = eventp->ev_port;
   2422 		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*pup))
   2423 		(void) taskq_dispatch(ibcm_taskq,
   2424 		    ibcm_service_record_rewrite_task, pup, TQ_SLEEP);
   2425 		ibcm_path_cache_purge();
   2426 		return;
   2427 
   2428 	case IBT_HCA_ATTACH_EVENT:
   2429 
   2430 		/* eventp->ev_hcaguid is the HCA GUID of interest */
   2431 		ibcm_hca_attach(eventp->ev_hca_guid);
   2432 		break;
   2433 
   2434 	case IBT_HCA_DETACH_EVENT:
   2435 
   2436 		/* eventp->ev_hca_guid is the HCA GUID of interest */
   2437 		if ((hcap = ibcm_find_hcap_entry(eventp->ev_hca_guid)) ==
   2438 		    NULL) {
   2439 			IBTF_DPRINTF_L2(cmlog, "ibcm_async_handler:"
   2440 			    " hca %llX doesn't exist", eventp->ev_hca_guid);
   2441 			break;
   2442 		}
   2443 
   2444 		(void) ibcm_hca_detach(hcap);
   2445 		break;
   2446 
   2447 	case IBT_EVENT_COM_EST_QP:
   2448 		/* eventp->ev_qp_hdl is the ibt_qp_hdl_t of interest */
   2449 	case IBT_EVENT_COM_EST_EEC:
   2450 		/* eventp->ev_eec_hdl is the ibt_eec_hdl_t of interest */
   2451 		ibcm_comm_est_handler(eventp);
   2452 		break;
   2453 	default:
   2454 		break;
   2455 	}
   2456 
   2457 	/* Unblock, any blocked fini/init operations */
   2458 	mutex_exit(&ibcm_global_hca_lock);
   2459 }
   2460