Home | History | Annotate | Download | only in ibd
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #ifndef _SYS_IB_CLIENTS_IBD_H
     28 #define	_SYS_IB_CLIENTS_IBD_H
     29 
     30 #ifdef __cplusplus
     31 extern "C" {
     32 #endif
     33 
     34 /* The following macros are used in both ibd.c and ibd_cm.c */
     35 
     36 /*
     37  * Completion queue polling control
     38  */
     39 #define	IBD_CQ_POLLING			0x1
     40 #define	IBD_REDO_CQ_POLLING		0x2
     41 
     42 /*
     43  * Maximum length for returning chained mps back to crossbow.
     44  * Also used as the maximum number of rx wc's polled at a time.
     45  */
     46 #define	IBD_MAX_RX_MP_LEN		16
     47 
     48 /*
     49  * When doing multiple-send-wr, this value determines how many to do at
     50  * a time (in a single ibt_post_send).
     51  */
     52 #define	IBD_MAX_TX_POST_MULTIPLE	4
     53 
     54 /*
     55  * Flag bits for resources to reap
     56  */
     57 #define	IBD_RSRC_SWQE			0x1
     58 #define	IBD_RSRC_LSOBUF			0x2
     59 #define	IBD_RSRC_RC_SWQE		0x4
     60 #define	IBD_RSRC_RC_TX_LARGEBUF		0x8
     61 
     62 /*
     63  * Async operation types
     64  */
     65 #define	IBD_ASYNC_GETAH			1
     66 #define	IBD_ASYNC_JOIN			2
     67 #define	IBD_ASYNC_LEAVE			3
     68 #define	IBD_ASYNC_PROMON		4
     69 #define	IBD_ASYNC_PROMOFF		5
     70 #define	IBD_ASYNC_REAP			6
     71 #define	IBD_ASYNC_TRAP			7
     72 #define	IBD_ASYNC_SCHED			8
     73 #define	IBD_ASYNC_LINK			9
     74 #define	IBD_ASYNC_EXIT			10
     75 #define	IBD_ASYNC_RC_TOO_BIG		11
     76 #define	IBD_ASYNC_RC_CLOSE_ACT_CHAN		12
     77 #define	IBD_ASYNC_RC_RECYCLE_ACE		13
     78 
     79 /*
     80  * Miscellaneous constants
     81  */
     82 #define	IBD_SEND			0
     83 #define	IBD_RECV			1
     84 
     85 /*
     86  * Thresholds
     87  *
     88  * When waiting for resources (swqes or lso buffers) to become available,
     89  * the first two thresholds below determine how long to wait before informing
     90  * the network layer to start sending packets again. The IBD_TX_POLL_THRESH
     91  * determines how low the available swqes should go before we start polling
     92  * the completion queue.
     93  */
     94 #define	IBD_FREE_LSOS_THRESH		8
     95 #define	IBD_FREE_SWQES_THRESH		20
     96 #define	IBD_TX_POLL_THRESH		80
     97 
     98 #ifdef DEBUG
     99 void debug_print(int l, char *fmt, ...);
    100 #define	DPRINT		debug_print
    101 #else
    102 #define	DPRINT		0 &&
    103 #endif
    104 
    105 /*
    106  * AH and MCE active list manipulation:
    107  *
    108  * Multicast disable requests and MCG delete traps are two cases
    109  * where the active AH entry for the mcg (if any unreferenced one exists)
    110  * will be moved to the free list (to force the next Tx to the mcg to
    111  * join the MCG in SendOnly mode). Port up handling will also move AHs
    112  * from active to free list.
    113  *
    114  * In the case when some transmits are still pending on an entry
    115  * for an mcg, but a multicast disable has already been issued on the
    116  * mcg, there are some options to consider to preserve the join state
    117  * to ensure the emitted packet is properly routed on the IBA fabric.
    118  * For the AH, we can
    119  * 1. take out of active list at multicast disable time.
    120  * 2. take out of active list only when last pending Tx completes.
    121  * For the MCE, we can
    122  * 3. take out of active list at multicast disable time.
    123  * 4. take out of active list only when last pending Tx completes.
    124  * 5. move from active list to stale list at multicast disable time.
    125  * We choose to use 2,4. We use option 4 so that if a multicast enable
    126  * is tried before the pending Tx completes, the enable code finds the
    127  * mce in the active list and just has to make sure it will not be reaped
    128  * (ie the mcg leave done) when the pending Tx does complete. Alternatively,
    129  * a stale list (#5) that would be checked in the enable code would need
    130  * to be implemented. Option 2 is used, because otherwise, a Tx attempt
    131  * after the multicast disable would try to put an AH in the active list,
    132  * and associate the mce it finds in the active list to this new AH,
    133  * whereas the mce is already associated with the previous AH (taken off
    134  * the active list), and will be removed once the pending Tx's complete
    135  * (unless a reference count on mce's is implemented). One implication of
    136  * using 2,4 is that new Tx's posted before the pending Tx's complete will
    137  * grab new references on the AH, further delaying the leave.
    138  *
    139  * In the case of mcg delete (or create) trap when the port is sendonly
    140  * joined, the AH and MCE handling is different: the AH and MCE has to be
    141  * immediately taken off the active lists (forcing a join and path lookup
    142  * at the next Tx is the only guaranteed means of ensuring a proper Tx
    143  * to an mcg as it is repeatedly created and deleted and goes thru
    144  * reincarnations).
    145  *
    146  * When a port is already sendonly joined, and a multicast enable is
    147  * attempted, the same mce structure is promoted; this ensures only a
    148  * single mce on the active list tracks the most powerful join state.
    149  *
    150  * In the case of port up event handling, the MCE for sendonly membership
    151  * is freed up, and the ACE is put into the free list as soon as possible
    152  * (depending on whether posted Tx's have completed). For fullmembership
    153  * MCE's though, the ACE is similarly handled; but the MCE is kept around
    154  * (a re-JOIN is attempted) only if the DLPI leave has not already been
    155  * done; else the mce is deconstructed (mc_fullreap case).
    156  *
    157  * MCG creation and deletion trap handling:
    158  *
    159  * These traps are unreliable (meaning sometimes the trap might never
    160  * be delivered to the subscribed nodes) and may arrive out-of-order
    161  * since they use UD transport. An alternative to relying on these
    162  * unreliable traps is to poll for mcg presence every so often, but
    163  * instead of doing that, we try to be as conservative as possible
    164  * while handling the traps, and hope that the traps do arrive at
    165  * the subscribed nodes soon. Note that if a node is fullmember
    166  * joined to an mcg, it can not possibly receive a mcg create/delete
    167  * trap for that mcg (by fullmember definition); if it does, it is
    168  * an old trap from a previous incarnation of the mcg.
    169  *
    170  * Whenever a trap is received, the driver cleans up its sendonly
    171  * membership to the group; we choose to do a sendonly leave even
    172  * on a creation trap to handle the case of a prior deletion of the mcg
    173  * having gone unnoticed. Consider an example scenario:
    174  * T1: MCG M is deleted, and fires off deletion trap D1.
    175  * T2: MCG M is recreated, fires off creation trap C1, which is lost.
    176  * T3: Node N tries to transmit to M, joining in sendonly mode.
    177  * T4: MCG M is deleted, and fires off deletion trap D2.
    178  * T5: N receives a deletion trap, but can not distinguish D1 from D2.
    179  *     If the trap is D2, then a LEAVE is not required, since the mcg
    180  *     is already deleted; but if it is D1, a LEAVE is required. A safe
    181  *     approach is to always LEAVE, but the SM may be confused if it
    182  *     receives a LEAVE without a prior JOIN.
    183  *
    184  * Management of the non-membership to an mcg is similar to the above,
    185  * except that if the interface is in promiscuous mode, it is required
    186  * to attempt to re-join the mcg after receiving a trap. Unfortunately,
    187  * if the re-join attempt fails (in which case a warning message needs
    188  * to be printed), it is not clear whether it failed due to the mcg not
    189  * existing, or some fabric/hca issues, due to the delayed nature of
    190  * trap delivery. Querying the SA to establish presence/absence of the
    191  * mcg is also racy at best. Thus, the driver just prints a warning
    192  * message when it can not rejoin after receiving a create trap, although
    193  * this might be (on rare occasions) a mis-warning if the create trap is
    194  * received after the mcg was deleted.
    195  */
    196 
    197 /*
    198  * Implementation of atomic "recycle" bits and reference count
    199  * on address handles. This utilizes the fact that max reference
    200  * count on any handle is limited by number of send wqes, thus
    201  * high bits in the ac_ref field can be used as the recycle bits,
    202  * and only the low bits hold the number of pending Tx requests.
    203  * This atomic AH reference counting allows the Tx completion
    204  * handler not to acquire the id_ac_mutex to process every completion,
    205  * thus reducing lock contention problems between completion and
    206  * the Tx path.
    207  */
    208 #define	CYCLEVAL		0x80000
    209 #define	CLEAR_REFCYCLE(ace)	(ace)->ac_ref = 0
    210 #define	CYCLE_SET(ace)		(((ace)->ac_ref & CYCLEVAL) == CYCLEVAL)
    211 #define	GET_REF(ace)		((ace)->ac_ref)
    212 #define	GET_REF_CYCLE(ace) (				\
    213 	/*						\
    214 	 * Make sure "cycle" bit is set.		\
    215 	 */						\
    216 	ASSERT(CYCLE_SET(ace)),				\
    217 	((ace)->ac_ref & ~(CYCLEVAL))			\
    218 )
    219 #define	INC_REF(ace, num) {				\
    220 	atomic_add_32(&(ace)->ac_ref, num);		\
    221 }
    222 #define	SET_CYCLE_IF_REF(ace) (				\
    223 	CYCLE_SET(ace) ? B_TRUE :			\
    224 	    atomic_add_32_nv(&ace->ac_ref, CYCLEVAL) ==	\
    225 		CYCLEVAL ?				\
    226 		/*					\
    227 		 * Clear the "cycle" bit we just set;	\
    228 		 * ref count known to be 0 from above.	\
    229 		 */					\
    230 		CLEAR_REFCYCLE(ace), B_FALSE :		\
    231 		/*					\
    232 		 * We set "cycle" bit; let caller know.	\
    233 		 */					\
    234 		B_TRUE					\
    235 )
    236 #define	DEC_REF_DO_CYCLE(ace) (				\
    237 	atomic_dec_32_nv(&ace->ac_ref) == CYCLEVAL ?	\
    238 		/*					\
    239 		 * Ref count known to be 0 from above.	\
    240 		 */					\
    241 		B_TRUE :				\
    242 		B_FALSE					\
    243 )
    244 
    245 /*
    246  * Address handle entries maintained by the driver are kept in the
    247  * free and active lists. Each entry starts out in the free list;
    248  * it migrates to the active list when primed using ibt_get_paths()
    249  * and ibt_modify_ud_dest() for transmission to a specific destination.
    250  * In the active list, the entry has a reference count indicating the
    251  * number of ongoing/uncompleted transmits that reference it. The
    252  * entry is left in the active list even after the reference count
    253  * goes to 0, since successive transmits can find it there and do
    254  * not need to set up another entry (ie the path information is
    255  * cached using the active list). Entries on the active list are
    256  * also hashed using the destination link address as a key for faster
    257  * lookups during transmits.
    258  *
    259  * For any destination address (unicast or multicast, whatever the
    260  * join states), there will be at most one entry in the active list.
    261  * Entries with a 0 reference count on the active list can be reused
    262  * for a transmit to a new destination, if the free list is empty.
    263  *
    264  * The AH free list insertion/deletion is protected with the id_ac_mutex,
    265  * since the async thread and Tx callback handlers insert/delete. The
    266  * active list does not need a lock (all operations are done by the
    267  * async thread) but updates to the reference count are atomically
    268  * done (increments done by Tx path, decrements by the Tx callback handler).
    269  */
    270 #define	IBD_ACACHE_INSERT_FREE(state, ce) \
    271 	list_insert_head(&state->id_ah_free, ce)
    272 #define	IBD_ACACHE_GET_FREE(state) \
    273 	list_get_head(&state->id_ah_free)
    274 #define	IBD_ACACHE_INSERT_ACTIVE(state, ce) {			\
    275 	int _ret_;						\
    276 	list_insert_head(&state->id_ah_active, ce);		\
    277 	_ret_ = mod_hash_insert(state->id_ah_active_hash,	\
    278 	    (mod_hash_key_t)&ce->ac_mac, (mod_hash_val_t)ce);	\
    279 	ASSERT(_ret_ == 0);					\
    280 	state->id_ac_hot_ace = ce;				\
    281 }
    282 #define	IBD_ACACHE_PULLOUT_ACTIVE(state, ce) {			\
    283 	list_remove(&state->id_ah_active, ce);			\
    284 	if (state->id_ac_hot_ace == ce)				\
    285 		state->id_ac_hot_ace = NULL;			\
    286 	(void) mod_hash_remove(state->id_ah_active_hash,	\
    287 	    (mod_hash_key_t)&ce->ac_mac, (mod_hash_val_t)ce);	\
    288 }
    289 #define	IBD_ACACHE_GET_ACTIVE(state) \
    290 	list_get_head(&state->id_ah_active)
    291 
    292 /*
    293  * Padding for nd6 Neighbor Solicitation and Advertisement needs to be at
    294  * front of optional src/tgt link layer address. Right now Solaris inserts
    295  * padding by default at the end. The routine which is doing is nce_xmit()
    296  * in ip_ndp.c. It copies the nd_lla_addr after the nd_opt_hdr_t. So when
    297  * the packet comes down from IP layer to the IBD driver, it is in the
    298  * following format: [IPoIB_PTXHDR_T][INET6 packet][ICMP6][OPT_ND_HDR_T]
    299  * This size is 2 bytes followed by [22 bytes of ipoib_machdr]. As a result
    300  * machdr is not 4 byte aligned and had 2 bytes of padding at the end.
    301  *
    302  * The send routine at IBD driver changes this packet as follows:
    303  * [IPoIB_HDR_T][INET6 packet][ICMP6][OPT_ND_HDR_T + 2 bytes of padding]
    304  * followed by [22 bytes of ipoib_machdr] resulting in machdr 4 byte
    305  * aligned.
    306  *
    307  * At the receiving side again ibd_process_rx takes the above packet and
    308  * removes the two bytes of front padding and inserts it at the end. This
    309  * is since the IP layer does not understand padding at the front.
    310  */
    311 #define	IBD_PAD_NSNA(ip6h, len, type) {					\
    312 	uchar_t 	*nd_lla_ptr;					\
    313 	icmp6_t 	*icmp6;						\
    314 	nd_opt_hdr_t	*opt;						\
    315 	int 		i;						\
    316 									\
    317 	icmp6 = (icmp6_t *)&ip6h[1];					\
    318 	len -= sizeof (nd_neighbor_advert_t);				\
    319 	if (((icmp6->icmp6_type == ND_NEIGHBOR_SOLICIT) ||		\
    320 	    (icmp6->icmp6_type == ND_NEIGHBOR_ADVERT)) &&		\
    321 	    (len != 0)) {						\
    322 		opt = (nd_opt_hdr_t *)((uint8_t *)ip6h			\
    323 		    + IPV6_HDR_LEN + sizeof (nd_neighbor_advert_t));	\
    324 		ASSERT(opt != NULL);					\
    325 		nd_lla_ptr = (uchar_t *)&opt[1];			\
    326 		if (type == IBD_SEND) {					\
    327 			for (i = IPOIB_ADDRL; i > 0; i--)		\
    328 				*(nd_lla_ptr + i + 1) =			\
    329 				    *(nd_lla_ptr + i - 1);		\
    330 		} else {						\
    331 			for (i = 0; i < IPOIB_ADDRL; i++)		\
    332 				*(nd_lla_ptr + i) =			\
    333 				    *(nd_lla_ptr + i + 2);		\
    334 		}							\
    335 		*(nd_lla_ptr + i) = 0;					\
    336 		*(nd_lla_ptr + i + 1) = 0;				\
    337 	}								\
    338 }
    339 
    340 
    341 /*
    342  * IETF defined IPoIB encapsulation header, with 2b of ethertype
    343  * followed by 2 reserved bytes. This is at the start of the
    344  * datagram sent to and received over the wire by the driver.
    345  */
    346 typedef struct ipoib_header {
    347 	ushort_t	ipoib_type;
    348 	ushort_t	ipoib_mbz;
    349 } ipoib_hdr_t;
    350 
    351 #define	IPOIB_HDRSIZE	sizeof (struct ipoib_header)
    352 
    353 /*
    354  * IETF defined IPoIB link address; IBA QPN, followed by GID,
    355  * which has a prefix and suffix, as reported via ARP.
    356  */
    357 typedef struct ipoib_mac {
    358 	uint32_t	ipoib_qpn;
    359 	uint32_t	ipoib_gidpref[2];
    360 	uint32_t	ipoib_gidsuff[2];
    361 } ipoib_mac_t;
    362 
    363 #define	IPOIB_ADDRL	sizeof (struct ipoib_mac)
    364 
    365 /*
    366  * Pseudo header prepended to datagram in DLIOCRAW transmit path
    367  * and when GLD hands the datagram to the gldm_send entry point.
    368  */
    369 typedef struct ipoib_ptxhdr {
    370 	ipoib_mac_t	ipoib_dest;
    371 	ipoib_hdr_t	ipoib_rhdr;
    372 } ipoib_ptxhdr_t;
    373 
    374 #define	IPOIBDLSAP(p, offset)	((ipoib_ptxhdr_t *)((caddr_t)(p)+offset))
    375 
    376 /*
    377  * The pseudo-GRH structure that sits before the data in the
    378  * receive buffer, and is overlaid on top of the real GRH.
    379  * The driver sets the ipoib_vertcflow to 0 if the pseudo-GRH
    380  * does not hold valid information. If it is indicated valid,
    381  * the driver must additionally provide the sender's qpn in
    382  * network byte order in ipoib_sqpn, and not touch the
    383  * remaining parts which were DMA'ed in by the IBA hardware.
    384  */
    385 typedef struct ipoib_pgrh {
    386 	uint32_t	ipoib_vertcflow;
    387 	uint32_t	ipoib_sqpn;
    388 	uint32_t	ipoib_sgid_pref[2];
    389 	uint32_t	ipoib_sgid_suff[2];
    390 	uint32_t	ipoib_dgid_pref[2];
    391 	uint32_t	ipoib_dgid_suff[2];
    392 } ipoib_pgrh_t;
    393 
    394 /*
    395  * The GRH is also dma'ed into recv buffers, thus space needs
    396  * to be allocated for them.
    397  */
    398 #define	IPOIB_GRH_SIZE	sizeof (ipoib_pgrh_t)
    399 
    400 /* support  the RC (reliable connected) mode */
    401 #define	IBD_MAC_ADDR_RC		0x80000000
    402 /* support the UC (unreliable connected) mode */
    403 #define	IBD_MAC_ADDR_UC		0x40000000
    404 
    405 #define	IBD_RC_SERVICE_ID 0x100000000000000ULL
    406 
    407 /*
    408  * Legacy OFED had used a wrong service ID (one additional zero digit) for
    409  * many years. To interop with legacy OFED, we support this wrong service ID
    410  * here.
    411  */
    412 #define	IBD_RC_SERVICE_ID_OFED_INTEROP 0x1000000000000000ULL
    413 
    414 #define	IBD_RC_MIN_CQ_SIZE	0x7f
    415 
    416 /* Number of ibt_wc_t provided for each RC channel */
    417 #define	IBD_RC_MAX_CQ_WC	0x3f
    418 
    419 #if defined(_KERNEL) && !defined(_BOOT)
    420 
    421 #include <sys/ib/ibtl/ibti.h>
    422 #include <sys/ib/ib_pkt_hdrs.h>
    423 #include <sys/list.h>
    424 #include <sys/mac_provider.h>
    425 #include <sys/mac_ib.h>
    426 #include <sys/modhash.h>
    427 
    428 /* State of a reliable connected channel (ibd_rc_chan_t->chan_state) */
    429 typedef enum {
    430 	IBD_RC_STATE_INIT = 0,
    431 
    432 	/* Active side */
    433 	IBD_RC_STATE_ACT_REP_RECV,	/* reply received */
    434 	IBD_RC_STATE_ACT_ESTAB,		/* established, ready to send */
    435 	IBD_RC_STATE_ACT_REJECT,	/* rejected */
    436 	/* Someone else is closing this channel, please don't re-close it */
    437 	IBD_RC_STATE_ACT_CLOSING,
    438 	IBD_RC_STATE_ACT_CLOSED,
    439 	IBD_RC_STATE_ACT_ERROR,
    440 
    441 	/* Passive side */
    442 	IBD_RC_STATE_PAS_REQ_RECV,	/* request received */
    443 	IBD_RC_STATE_PAS_ESTAB,		/* established, ready to receive */
    444 	IBD_RC_STATE_PAS_REJECT,	/* rejected */
    445 
    446 	IBD_RC_STATE_PAS_CLOSED
    447 } ibd_rc_chan_state_t;
    448 
    449 /*
    450  * Structure to encapsulate various types of async requests.
    451  */
    452 typedef struct ibd_acache_rq {
    453 	struct list_node 	rq_list; 	/* list of pending work */
    454 	int			rq_op;		/* what operation */
    455 	ipoib_mac_t		rq_mac;
    456 	ib_gid_t		rq_gid;
    457 	void			*rq_ptr;
    458 	void			*rq_ptr2;
    459 } ibd_req_t;
    460 
    461 typedef struct ibd_mcache {
    462 	struct list_node	mc_list;	/* full/non list */
    463 	uint8_t			mc_jstate;
    464 	boolean_t		mc_fullreap;
    465 	ibt_mcg_info_t		mc_info;
    466 	ibd_req_t		mc_req;		/* to queue LEAVE req */
    467 } ibd_mce_t;
    468 
    469 typedef struct ibd_acache_s {
    470 	struct list_node	ac_list;	/* free/active list */
    471 	ibt_ud_dest_hdl_t	ac_dest;
    472 	ipoib_mac_t		ac_mac;
    473 	uint32_t		ac_ref;
    474 	ibd_mce_t		*ac_mce;	/* for MCG AHs */
    475 
    476 	/* For Reliable Connected mode */
    477 	struct ibd_rc_chan_s	*ac_chan;
    478 	/* protect tx_too_big_ongoing */
    479 	kmutex_t		tx_too_big_mutex;
    480 	/* Deal with too big packet */
    481 	boolean_t		tx_too_big_ongoing;
    482 } ibd_ace_t;
    483 
    484 #define	IBD_MAX_SQSEG	59
    485 #define	IBD_MAX_RQSEG	1
    486 
    487 typedef enum {
    488 	IBD_WQE_SEND,
    489 	IBD_WQE_RECV
    490 } ibd_wqe_type_t;
    491 
    492 typedef enum {
    493 	IBD_WQE_TXBUF = 1,
    494 	IBD_WQE_LSOBUF = 2,
    495 	IBD_WQE_MAPPED = 3,
    496 	IBD_WQE_RC_COPYBUF = 4
    497 } ibd_wqe_buftype_t;
    498 
    499 #ifdef DEBUG
    500 typedef struct ibd_rc_stat_s {
    501 	kstat_named_t		rc_rcv_trans_byte;
    502 	kstat_named_t		rc_rcv_trans_pkt;
    503 	kstat_named_t		rc_rcv_copy_byte;
    504 	kstat_named_t		rc_rcv_copy_pkt;
    505 	kstat_named_t		rc_rcv_alloc_fail;
    506 
    507 	kstat_named_t		rc_rcq_invoke;
    508 	kstat_named_t		rc_rcq_err;	/* fail in rcq handler */
    509 	kstat_named_t		rc_scq_invoke;
    510 
    511 	kstat_named_t		rc_rwqe_short;	/* short rwqe */
    512 
    513 	kstat_named_t		rc_xmt_bytes;
    514 	/* pkt size <= ibd_rc_tx_copy_thresh */
    515 	kstat_named_t		rc_xmt_small_pkt;
    516 	kstat_named_t		rc_xmt_fragmented_pkt;
    517 	/* fail in ibt_map_mem_iov() */
    518 	kstat_named_t		rc_xmt_map_fail_pkt;
    519 	/* succ in ibt_map_mem_iov() */
    520 	kstat_named_t		rc_xmt_map_succ_pkt;
    521 
    522 	kstat_named_t		rc_ace_not_found;	/* ace not found */
    523 	/* no swqe even after recycle */
    524 	kstat_named_t		rc_scq_no_swqe;
    525 	/* no tx large buf even after recycle */
    526 	kstat_named_t		rc_scq_no_largebuf;
    527 
    528 	/* short swqe in ibd_send() */
    529 	kstat_named_t		rc_swqe_short;
    530 	/* call mac_tx_update() when there is enough swqe */
    531 	kstat_named_t		rc_swqe_mac_update;
    532 	/* short large buf in ibd_send() */
    533 	kstat_named_t		rc_xmt_buf_short;
    534 	/* call mac_tx_update() when there is enough Tx large buffers */
    535 	kstat_named_t rc_xmt_buf_mac_update;
    536 
    537 	kstat_named_t		rc_conn_succ;	/* # of success connect */
    538 	kstat_named_t		rc_conn_fail;	/* # of fail connect */
    539 	/* ace->ac_chan == NULL for unicast packet */
    540 	kstat_named_t		rc_null_conn;
    541 	/* not in active established state */
    542 	kstat_named_t		rc_no_estab_conn;
    543 
    544 	kstat_named_t		rc_act_close;	/* call ibd_rc_act_close() */
    545 	kstat_named_t		rc_pas_close;	/* call ibd_rc_pas_close() */
    546 	kstat_named_t		rc_delay_ace_recycle;
    547 	kstat_named_t		rc_act_close_simultaneous;
    548 
    549 	kstat_named_t		rc_reset_cnt;	/* # of Reset RC channel */
    550 } ibd_rc_stat_t;
    551 #endif
    552 
    553 typedef struct ibd_rc_chan_list_s {
    554 	/* This mutex protects chan_list and ibd_rc_chan_t.next */
    555 	kmutex_t		chan_list_mutex;
    556 	struct ibd_rc_chan_s	*chan_list;
    557 } ibd_rc_chan_list_t;
    558 
    559 typedef struct ibd_rc_tx_largebuf_s {
    560 	struct ibd_rc_tx_largebuf_s	*lb_next;
    561 	uint8_t				*lb_buf;
    562 } ibd_rc_tx_largebuf_t;
    563 
    564 /*
    565  * Pre-registered copybuf used for send and receive
    566  */
    567 typedef struct ibd_copybuf_s {
    568 	ibt_wr_ds_t		ic_sgl;
    569 	uint8_t			*ic_bufaddr;
    570 } ibd_copybuf_t;
    571 
    572 typedef struct ibd_wqe_s {
    573 	struct ibd_wqe_s	*w_next;
    574 	ibd_copybuf_t		w_copybuf;
    575 	mblk_t			*im_mblk;
    576 } ibd_wqe_t;
    577 
    578 /*
    579  * Send WQE
    580  */
    581 typedef struct ibd_swqe_s {
    582 	ibd_wqe_t		w_ibd_swqe;
    583 	ibd_wqe_buftype_t	w_buftype;
    584 	ibt_send_wr_t		w_swr;
    585 	ibd_ace_t		*w_ahandle;
    586 	ibt_mi_hdl_t		w_mi_hdl;
    587 	ibt_wr_ds_t		w_sgl[IBD_MAX_SQSEG];
    588 	ibd_rc_tx_largebuf_t	*w_rc_tx_largebuf;
    589 } ibd_swqe_t;
    590 
    591 #define	swqe_next		w_ibd_swqe.w_next
    592 #define	swqe_copybuf		w_ibd_swqe.w_copybuf
    593 #define	swqe_im_mblk		w_ibd_swqe.im_mblk
    594 #define	SWQE_TO_WQE(swqe)	(ibd_wqe_t *)&((swqe)->w_ibd_swqe)
    595 #define	WQE_TO_SWQE(wqe)	(ibd_swqe_t *)wqe
    596 
    597 /*
    598  * Receive WQE
    599  */
    600 typedef struct ibd_rwqe_s {
    601 	ibd_wqe_t		w_ibd_rwqe;
    602 	struct ibd_state_s	*w_state;
    603 	ibt_recv_wr_t		w_rwr;
    604 	frtn_t			w_freemsg_cb;
    605 	boolean_t		w_freeing_wqe;
    606 	struct ibd_rc_chan_s	*w_chan;
    607 } ibd_rwqe_t;
    608 
    609 #define	rwqe_next		w_ibd_rwqe.w_next
    610 #define	rwqe_copybuf		w_ibd_rwqe.w_copybuf
    611 #define	rwqe_im_mblk		w_ibd_rwqe.im_mblk
    612 #define	RWQE_TO_WQE(rwqe)	(ibd_wqe_t *)&((rwqe)->w_ibd_rwqe)
    613 #define	WQE_TO_RWQE(wqe)	(ibd_rwqe_t *)wqe
    614 
    615 typedef struct ibd_list_s {
    616 	kmutex_t		dl_mutex;
    617 	ibd_wqe_t		*dl_head;
    618 	union {
    619 		boolean_t	pending_sends;
    620 		uint32_t	bufs_outstanding;
    621 	} ustat;
    622 	uint32_t		dl_cnt;
    623 } ibd_list_t;
    624 
    625 #define	dl_pending_sends	ustat.pending_sends
    626 #define	dl_bufs_outstanding	ustat.bufs_outstanding
    627 
    628 /*
    629  * LSO buffers
    630  *
    631  * Under normal circumstances we should never need to use any buffer
    632  * that's larger than MTU.  Unfortunately, IB HCA has limitations
    633  * on the length of SGL that are much smaller than those for regular
    634  * ethernet NICs.  Since the network layer doesn't care to limit the
    635  * number of mblk fragments in any send mp chain, we end up having to
    636  * use these larger-than-MTU sized (larger than id_tx_buf_sz actually)
    637  * buffers occasionally.
    638  */
    639 typedef struct ibd_lsobuf_s {
    640 	struct ibd_lsobuf_s *lb_next;
    641 	uint8_t		*lb_buf;
    642 	int		lb_isfree;
    643 } ibd_lsobuf_t;
    644 
    645 typedef struct ibd_lsobkt_s {
    646 	uint8_t		*bkt_mem;
    647 	ibd_lsobuf_t	*bkt_bufl;
    648 	ibd_lsobuf_t	*bkt_free_head;
    649 	ibt_mr_hdl_t	bkt_mr_hdl;
    650 	ibt_mr_desc_t	bkt_mr_desc;
    651 	uint_t		bkt_nelem;
    652 	uint_t		bkt_nfree;
    653 } ibd_lsobkt_t;
    654 
    655 /*
    656  * Posting to a single software rx post queue is contentious,
    657  * so break it out to (multiple) an array of queues.
    658  *
    659  * Try to ensure rx_queue structs fall in different cache lines using a filler.
    660  * Note: the RX_QUEUE_CACHE_LINE needs to change if the struct changes.
    661  */
    662 #define	RX_QUEUE_CACHE_LINE \
    663 	(64 - (sizeof (kmutex_t) + sizeof (ibd_wqe_t *) + sizeof (uint_t)))
    664 typedef struct ibd_rx_queue_s {
    665 	kmutex_t		rx_post_lock;
    666 	ibd_wqe_t		*rx_head;
    667 	uint_t			rx_cnt;
    668 	uint8_t			rx_pad[RX_QUEUE_CACHE_LINE];
    669 } ibd_rx_queue_t;
    670 
    671 /*
    672  * This structure maintains information per port per HCA
    673  * (per network interface).
    674  */
    675 typedef struct ibd_state_s {
    676 	dev_info_t		*id_dip;
    677 	ibt_clnt_hdl_t		id_ibt_hdl;
    678 	ibt_hca_hdl_t		id_hca_hdl;
    679 	ibt_pd_hdl_t		id_pd_hdl;
    680 	kmem_cache_t		*id_req_kmc;
    681 
    682 	ibd_list_t		id_tx_rel_list;
    683 
    684 	uint32_t		id_running;
    685 
    686 	uint32_t		id_max_sqseg;
    687 	uint32_t		id_max_sqseg_hiwm;
    688 	ibd_list_t		id_tx_list;
    689 	ddi_softintr_t		id_tx;
    690 	uint32_t		id_tx_sends;
    691 
    692 	kmutex_t		id_txpost_lock;
    693 	ibd_swqe_t		*id_tx_head;
    694 	ibd_swqe_t		*id_tx_tail;
    695 	int			id_tx_busy;
    696 
    697 	uint_t			id_tx_buf_sz;
    698 	uint8_t			*id_tx_bufs;
    699 	ibd_swqe_t		*id_tx_wqes;
    700 	ibt_mr_hdl_t		id_tx_mr_hdl;
    701 	ibt_mr_desc_t		id_tx_mr_desc;
    702 
    703 	kmutex_t		id_lso_lock;
    704 	ibd_lsobkt_t		*id_lso;
    705 
    706 	kmutex_t		id_scq_poll_lock;
    707 	int			id_scq_poll_busy;
    708 
    709 	ibt_cq_hdl_t		id_scq_hdl;
    710 	ibt_wc_t		*id_txwcs;
    711 	uint32_t		id_txwcs_size;
    712 
    713 	int			id_rx_nqueues;
    714 	ibd_rx_queue_t		*id_rx_queues;
    715 	int			id_rx_post_queue_index;
    716 	uint32_t		id_rx_post_active;
    717 
    718 	ibd_rwqe_t		*id_rx_wqes;
    719 	uint8_t			*id_rx_bufs;
    720 	ibt_mr_hdl_t		id_rx_mr_hdl;
    721 	ibt_mr_desc_t		id_rx_mr_desc;
    722 	uint_t			id_rx_buf_sz;
    723 	uint32_t		id_num_rwqe;
    724 	ibd_list_t		id_rx_list;
    725 	ddi_softintr_t		id_rx;
    726 	uint32_t		id_rx_bufs_outstanding_limit;
    727 	uint32_t		id_rx_allocb;
    728 	uint32_t		id_rx_allocb_failed;
    729 	ibd_list_t		id_rx_free_list;
    730 
    731 	kmutex_t		id_rcq_poll_lock;
    732 	int			id_rcq_poll_busy;
    733 	uint32_t		id_rxwcs_size;
    734 	ibt_wc_t		*id_rxwcs;
    735 	ibt_cq_hdl_t		id_rcq_hdl;
    736 
    737 	ibt_channel_hdl_t	id_chnl_hdl;
    738 	ib_pkey_t		id_pkey;
    739 	uint16_t		id_pkix;
    740 	uint8_t			id_port;
    741 	ibt_mcg_info_t		*id_mcinfo;
    742 
    743 	mac_handle_t		id_mh;
    744 	mac_resource_handle_t	id_rh;
    745 	ib_gid_t		id_sgid;
    746 	ib_qpn_t		id_qpnum;
    747 	ipoib_mac_t		id_macaddr;
    748 	ib_gid_t		id_mgid;
    749 	ipoib_mac_t		id_bcaddr;
    750 
    751 	int			id_mtu;
    752 	uchar_t			id_scope;
    753 
    754 	kmutex_t		id_acache_req_lock;
    755 	kcondvar_t		id_acache_req_cv;
    756 	struct list		id_req_list;
    757 	kt_did_t		id_async_thrid;
    758 
    759 	kmutex_t		id_ac_mutex;
    760 	ibd_ace_t		*id_ac_hot_ace;
    761 	struct list		id_ah_active;
    762 	struct list		id_ah_free;
    763 	ipoib_mac_t		id_ah_addr;
    764 	ibd_req_t		id_ah_req;
    765 	char			id_ah_op;
    766 	uint64_t		id_ah_error;
    767 	ibd_ace_t		*id_ac_list;
    768 	mod_hash_t		*id_ah_active_hash;
    769 
    770 	kmutex_t		id_mc_mutex;
    771 	struct list		id_mc_full;
    772 	struct list		id_mc_non;
    773 
    774 	kmutex_t		id_trap_lock;
    775 	kcondvar_t		id_trap_cv;
    776 	boolean_t		id_trap_stop;
    777 	uint32_t		id_trap_inprog;
    778 
    779 	char			id_prom_op;
    780 
    781 	kmutex_t		id_sched_lock;
    782 	int			id_sched_needed;
    783 	int			id_sched_cnt;
    784 	int			id_sched_lso_cnt;
    785 
    786 	kmutex_t		id_link_mutex;
    787 	link_state_t		id_link_state;
    788 	uint64_t		id_link_speed;
    789 
    790 	uint64_t		id_num_intrs;
    791 	uint64_t		id_tx_short;
    792 	uint32_t		id_num_swqe;
    793 
    794 	uint64_t		id_xmt_bytes;
    795 	uint64_t		id_rcv_bytes;
    796 	uint64_t		id_multi_xmt;
    797 	uint64_t		id_brd_xmt;
    798 	uint64_t		id_multi_rcv;
    799 	uint64_t		id_brd_rcv;
    800 	uint64_t		id_xmt_pkt;
    801 	uint64_t		id_rcv_pkt;
    802 
    803 	uint32_t		id_hwcksum_capab;
    804 	boolean_t		id_lso_policy;
    805 	boolean_t		id_lso_capable;
    806 	uint_t			id_lso_maxlen;
    807 	int			id_hca_res_lkey_capab;
    808 	ibt_lkey_t		id_res_lkey;
    809 
    810 	boolean_t		id_bgroup_created;
    811 	kmutex_t		id_macst_lock;
    812 	kcondvar_t		id_macst_cv;
    813 	uint32_t		id_mac_state;
    814 
    815 	/* For Reliable Connected Mode */
    816 	boolean_t		id_enable_rc;
    817 	boolean_t		rc_enable_srq;
    818 
    819 	int			rc_mtu;
    820 	uint32_t		rc_tx_max_sqseg;
    821 
    822 	ibt_srv_hdl_t		rc_listen_hdl;
    823 	ibt_sbind_hdl_t		rc_listen_bind;
    824 	ibt_srv_hdl_t		rc_listen_hdl_OFED_interop;
    825 	ibt_sbind_hdl_t		rc_listen_bind_OFED_interop;
    826 
    827 	ibd_rc_chan_list_t	rc_pass_chan_list;
    828 	/* obsolete active channel list */
    829 	ibd_rc_chan_list_t	rc_obs_act_chan_list;
    830 
    831 	kmutex_t		rc_ace_recycle_lock;
    832 	ibd_ace_t		*rc_ace_recycle;
    833 
    834 	/* Send */
    835 	/*
    836 	 * This mutex protects rc_tx_largebuf_free_head, rc_tx_largebuf_nfree
    837 	 * and ibd_rc_tx_largebuf_t->lb_next
    838 	 */
    839 	kmutex_t		rc_tx_large_bufs_lock;
    840 	ibd_rc_tx_largebuf_t	*rc_tx_largebuf_free_head;
    841 	uint_t			rc_tx_largebuf_nfree;
    842 	/* The chunk of whole Tx large buffers */
    843 	uint8_t			*rc_tx_mr_bufs;
    844 	ibt_mr_hdl_t		rc_tx_mr_hdl;
    845 	ibt_mr_desc_t		rc_tx_mr_desc;
    846 	ibd_rc_tx_largebuf_t	*rc_tx_largebuf_desc_base;	/* base addr */
    847 
    848 	boolean_t		rc_enable_iov_map;
    849 	uint_t			rc_max_sqseg_hiwm;
    850 
    851 	/* For SRQ */
    852 	uint32_t 		rc_srq_size;
    853 	ibt_srq_hdl_t		rc_srq_hdl;
    854 	ibd_list_t		rc_srq_rwqe_list;
    855 	ibd_list_t		rc_srq_free_list;
    856 	ibd_rwqe_t		*rc_srq_rwqes;
    857 	uint8_t			*rc_srq_rx_bufs;
    858 	ibt_mr_hdl_t		rc_srq_rx_mr_hdl;
    859 	ibt_mr_desc_t		rc_srq_rx_mr_desc;
    860 
    861 	/* For chained receive */
    862 	kmutex_t		rc_rx_lock;
    863 	mblk_t			*rc_rx_mp;
    864 	mblk_t			*rc_rx_mp_tail;
    865 	uint32_t		rc_rx_mp_len;
    866 
    867 	/* Counters for RC mode */
    868 	/* RX */
    869 	/*
    870 	 * # of Received packets. These packets are directly transferred to GLD
    871 	 * without copy it
    872 	 */
    873 	uint64_t		rc_rcv_trans_byte;
    874 	uint64_t		rc_rcv_trans_pkt;
    875 	/*
    876 	 * # of Received packets. We will allocate new buffers for these packet,
    877 	 * copy their content into new buffers, then transfer to GLD
    878 	 */
    879 	uint64_t		rc_rcv_copy_byte;
    880 	uint64_t		rc_rcv_copy_pkt;
    881 	uint64_t		rc_rcv_alloc_fail;
    882 
    883 #ifdef DEBUG
    884 	uint64_t		rc_rwqe_short;	/* short rwqe */
    885 #endif
    886 
    887 	/* # of invoke Receive CQ handler */
    888 	uint64_t		rc_rcq_invoke;
    889 	/* wc->wc_status != IBT_WC_SUCCESS */
    890 	uint64_t		rc_rcq_err;
    891 
    892 	/* Tx */
    893 	uint64_t		rc_xmt_bytes;
    894 
    895 	/* pkt size <= ibd_rc_tx_copy_thresh */
    896 	uint64_t		rc_xmt_small_pkt;
    897 	uint64_t		rc_xmt_fragmented_pkt;
    898 	/* fail in ibt_map_mem_iov() */
    899 	uint64_t		rc_xmt_map_fail_pkt;
    900 	/* succ in ibt_map_mem_iov() */
    901 	uint64_t		rc_xmt_map_succ_pkt;
    902 
    903 	uint64_t		rc_ace_not_found;
    904 
    905 	uint64_t		rc_xmt_drop_too_long_pkt;
    906 	uint64_t		rc_xmt_icmp_too_long_pkt;
    907 	uint64_t		rc_xmt_reenter_too_long_pkt;
    908 
    909 	/* short swqe in ibd_send() */
    910 	uint64_t		rc_swqe_short;
    911 	/* call mac_tx_update when there is enough swqe */
    912 	uint64_t		rc_swqe_mac_update;
    913 	/* short tx large copy buf in ibd_send() */
    914 	uint64_t		rc_xmt_buf_short;
    915 	/* call mac_tx_update when there is enough Tx copy buf */
    916 	uint64_t		rc_xmt_buf_mac_update;
    917 
    918 	/* No swqe even after call swqe recycle function */
    919 	uint64_t		rc_scq_no_swqe;
    920 	/* No large Tx buf even after call swqe recycle function */
    921 	uint64_t		rc_scq_no_largebuf;
    922 	/* # of invoke Send CQ handler */
    923 	uint64_t		rc_scq_invoke;
    924 
    925 	/* Connection setup and close */
    926 	uint64_t		rc_conn_succ;	/* time of succ connect */
    927 	uint64_t		rc_conn_fail;	/* time of fail connect */
    928 	/* ace->ac_chan == NULL for unicast packet */
    929 	uint64_t		rc_null_conn;
    930 	/* not in active established state */
    931 	uint64_t		rc_no_estab_conn;
    932 
    933 	uint64_t		rc_act_close;	/* call ibd_rc_act_close() */
    934 	uint64_t		rc_pas_close;	/* call ibd_rc_pas_close() */
    935 	uint64_t		rc_delay_ace_recycle;
    936 	uint64_t		rc_act_close_simultaneous;
    937 
    938 	/* the counter of reset RC channel */
    939 	uint64_t		rc_reset_cnt;
    940 
    941 #ifdef DEBUG
    942 	kstat_t 		*rc_ksp;
    943 #endif
    944 } ibd_state_t;
    945 
    946 typedef struct ibd_rc_msg_hello_s {
    947 	uint32_t reserved_qpn;
    948 	uint32_t rx_mtu;
    949 } ibd_rc_msg_hello_t;
    950 
    951 typedef struct ibd_rc_chan_s {
    952 	struct ibd_rc_chan_s	*next;
    953 	/* channel hdl that we'll be using for Reliable Connected Mode */
    954 	ibt_channel_hdl_t	chan_hdl;
    955 	struct ibd_state_s	*state;
    956 	ibd_ace_t		*ace;
    957 	ibd_rc_chan_state_t	chan_state;
    958 
    959 	/* used to judge duplicate connection */
    960 	ib_gid_t		requester_gid;
    961 	ib_pkey_t		requester_pkey;
    962 
    963 	ibd_list_t		tx_wqe_list;	/* free wqe list */
    964 	ibd_list_t		tx_rel_list;	/* for swqe recycle */
    965 
    966 	ibd_swqe_t		*tx_wqes;
    967 
    968 	/* start address of Tx Buffers */
    969 	uint8_t			*tx_mr_bufs;
    970 	ibt_mr_hdl_t		tx_mr_hdl;
    971 	ibt_mr_desc_t		tx_mr_desc;
    972 
    973 	ibt_cq_hdl_t		scq_hdl;	/* Tx completion queue */
    974 	ibt_wc_t		tx_wc[IBD_RC_MAX_CQ_WC];
    975 	ddi_softintr_t		scq_softintr;
    976 
    977 	uint32_t		tx_trans_error_cnt;
    978 
    979 	/* For chained send */
    980 	kmutex_t		tx_post_lock;
    981 	ibd_swqe_t		*tx_head;
    982 	ibd_swqe_t		*tx_tail;
    983 	int			tx_busy;
    984 
    985 	/* For tx buffer recycle */
    986 	kmutex_t		tx_poll_lock;
    987 	int			tx_poll_busy;
    988 
    989 	/* Rx */
    990 	ibd_list_t		rx_wqe_list;	/* used by ibt_post_recv */
    991 	ibd_list_t		rx_free_list;	/* free rwqe list */
    992 
    993 	ibt_cq_hdl_t		rcq_hdl;	/* Rx completion queue */
    994 	ibt_wc_t		rx_wc[IBD_RC_MAX_CQ_WC];
    995 
    996 	ibd_rwqe_t		*rx_rwqes;	/* the chuck of whole rwqes */
    997 	uint8_t			*rx_bufs;	/* the chuck of whole Rx bufs */
    998 	ibt_mr_hdl_t		rx_mr_hdl;	/* ibt_mr_hdl_t for rx_bufs */
    999 	ibt_mr_desc_t		rx_mr_desc;	/* ibt_mr_desc_t for rx_bufs */
   1000 
   1001 	/* For chained receive */
   1002 	kmutex_t		rx_lock;
   1003 	mblk_t			*rx_mp;
   1004 	mblk_t			*rx_mp_tail;
   1005 	uint32_t		rx_mp_len;
   1006 
   1007 	uint32_t 		rcq_size;
   1008 	uint32_t 		scq_size;
   1009 	/*
   1010 	 * We need two channels for each connection.
   1011 	 * One channel for Tx; another channel for Rx.
   1012 	 * If "is_tx_chan == B_TRUE", this is a Tx channel.
   1013 	 */
   1014 	boolean_t		is_tx_chan;
   1015 } ibd_rc_chan_t;
   1016 
   1017 /*
   1018  * The following functions are defined in "ibd.c".
   1019  * They are also used by "ibd_cm.c"
   1020  */
   1021 void ibd_print_warn(ibd_state_t *, char *, ...);
   1022 void ibd_unmap_mem(ibd_state_t *, ibd_swqe_t *);
   1023 void ibd_queue_work_slot(ibd_state_t *, ibd_req_t *, int);
   1024 boolean_t ibd_acache_recycle(ibd_state_t *, ipoib_mac_t *, boolean_t);
   1025 void ibd_dec_ref_ace(ibd_state_t *, ibd_ace_t *);
   1026 ibd_ace_t *ibd_acache_find(ibd_state_t *, ipoib_mac_t *, boolean_t, int);
   1027 
   1028 /*
   1029  * The following functions are defined in "ibd_cm.c".
   1030  * They are also used in "ibd.c".
   1031  */
   1032 void ibd_async_rc_process_too_big(ibd_state_t *, ibd_req_t *);
   1033 void ibd_async_rc_close_act_chan(ibd_state_t *, ibd_req_t *);
   1034 void ibd_async_rc_recycle_ace(ibd_state_t *, ibd_req_t *);
   1035 
   1036 /* Connection Setup/Close Functions */
   1037 ibt_status_t ibd_rc_listen(ibd_state_t *);
   1038 void ibd_rc_stop_listen(ibd_state_t *);
   1039 ibt_status_t ibd_rc_connect(ibd_state_t *, ibd_ace_t *, ibt_path_info_t *,
   1040     uint64_t);
   1041 void ibd_rc_try_connect(ibd_state_t *, ibd_ace_t *,  ibt_path_info_t *);
   1042 void ibd_rc_signal_act_close(ibd_state_t *, ibd_ace_t *);
   1043 void ibd_rc_signal_ace_recycle(ibd_state_t *, ibd_ace_t *);
   1044 int ibd_rc_close_all_chan(ibd_state_t *);
   1045 
   1046 /* Receive Functions */
   1047 int ibd_rc_init_srq_list(ibd_state_t *);
   1048 void ibd_rc_fini_srq_list(ibd_state_t *);
   1049 
   1050 /* Send Functions */
   1051 int ibd_rc_init_tx_largebuf_list(ibd_state_t *);
   1052 void ibd_rc_fini_tx_largebuf_list(ibd_state_t *);
   1053 ibd_swqe_t *ibd_rc_acquire_swqes(ibd_rc_chan_t *);
   1054 void ibd_rc_post_send(ibd_rc_chan_t *, ibd_swqe_t *);
   1055 void ibd_rc_drain_scq(ibd_rc_chan_t *, ibt_cq_hdl_t);
   1056 void ibd_rc_tx_cleanup(ibd_swqe_t *);
   1057 
   1058 /* Others */
   1059 void ibd_rc_get_conf(ibd_state_t *);
   1060 int ibd_rc_init_stats(ibd_state_t *);
   1061 
   1062 #endif /* _KERNEL && !_BOOT */
   1063 
   1064 #ifdef __cplusplus
   1065 }
   1066 #endif
   1067 
   1068 #endif	/* _SYS_IB_CLIENTS_IBD_H */
   1069