Home | History | Annotate | Download | only in rds
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 /*
     26  * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved.
     27  *
     28  * This software is available to you under a choice of one of two
     29  * licenses.  You may choose to be licensed under the terms of the GNU
     30  * General Public License (GPL) Version 2, available from the file
     31  * COPYING in the main directory of this source tree, or the
     32  * OpenIB.org BSD license below:
     33  *
     34  *     Redistribution and use in source and binary forms, with or
     35  *     without modification, are permitted provided that the following
     36  *     conditions are met:
     37  *
     38  *	- Redistributions of source code must retain the above
     39  *	  copyright notice, this list of conditions and the following
     40  *	  disclaimer.
     41  *
     42  *	- Redistributions in binary form must reproduce the above
     43  *	  copyright notice, this list of conditions and the following
     44  *	  disclaimer in the documentation and/or other materials
     45  *	  provided with the distribution.
     46  *
     47  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     48  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     49  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
     50  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
     51  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
     52  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     53  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     54  * SOFTWARE.
     55  *
     56  */
     57 /*
     58  * Sun elects to include this software in Sun product
     59  * under the OpenIB BSD license.
     60  *
     61  *
     62  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     63  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     64  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     65  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     66  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     67  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     68  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     69  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     70  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     71  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     72  * POSSIBILITY OF SUCH DAMAGE.
     73  */
     74 
     75 #ifndef _RDSIB_EP_H
     76 #define	_RDSIB_EP_H
     77 
     78 #ifdef __cplusplus
     79 extern "C" {
     80 #endif
     81 
     82 #include <netinet/in.h>
     83 
     84 /*
     85  * Control channel or Data channel
     86  */
     87 typedef enum rds_ep_type_s {
     88 	RDS_EP_TYPE_CTRL		= 1,
     89 	RDS_EP_TYPE_DATA		= 2
     90 } rds_ep_type_t;
     91 
     92 /*
     93  * Channel States
     94  *
     95  * RDS_EP_STATE_UNCONNECTED - Initial state when rds_ep_t is created
     96  * RDS_EP_STATE_ACTIVE_PENDING - Active side connection in progress
     97  * RDS_EP_STATE_PASSIVE_PENDING - Passice side connection in progress
     98  * RDS_EP_STATE_CONNECTED - Channel is connected
     99  * RDS_EP_STATE_DESTROY_TIMEWAIT - Channel is closed
    100  */
    101 typedef enum rds_ep_state_s {
    102 	RDS_EP_STATE_UNCONNECTED		= 0,
    103 	RDS_EP_STATE_ACTIVE_PENDING		= 1,
    104 	RDS_EP_STATE_PASSIVE_PENDING		= 2,
    105 	RDS_EP_STATE_CONNECTED			= 3,
    106 	RDS_EP_STATE_CLOSING			= 4,
    107 	RDS_EP_STATE_CLOSED			= 5,
    108 	RDS_EP_STATE_ERROR			= 6
    109 } rds_ep_state_t;
    110 
    111 /*
    112  * Session State Machine Diagram
    113  *
    114  *                     -----------------
    115  *                    |       (6)       |
    116  *                    |                 |
    117  *                    v                 |
    118  *             --> (Created)-------->(Failed)
    119  *            |     |         (5)       ^
    120  *            |     |(1)                |
    121  *            |     |                   |(9)
    122  *            |     v                   |
    123  *            |    (Init)<--------------|
    124  *            |     | |       (8)       |
    125  *            |     | |                 |
    126  *            |  (2)|  --------------   |
    127  *        (11)|     |         (7)    |  |
    128  *            |     v                v  |
    129  *            |    (Connected)------>(Error)
    130  *            |     |         (10)
    131  *            |     |(3)
    132  *            |     |
    133  *            |     v
    134  *            |    (Closed)
    135  *            |     |
    136  *            |     |(4)
    137  *            |     |
    138  *            |     v
    139  *             --- (Fini) ------->(Destroy)
    140  *                         (12)
    141  *
    142  *	(1) rds_session_init()
    143  *	(2) rds_session_open()
    144  *	(3) rds_session_close()
    145  *	(4) rds_session_fini()
    146  *	(4) rds_passive_session_fini()
    147  *	(5) Failure in rds_session_init()
    148  *	(6) rds_sendmsg(3SOCKET)/Incoming CM REQ
    149  *	(7) Failure in rds_session_open()
    150  *	(8) rds_session_close(), rds_get_ibaddr() and rds_session_reinit()
    151  *	(9) rds_session_close() and rds_session_fini()
    152  *	(9) rds_cleanup_passive_session() and rds_passive_session_fini()
    153  *	(10) Connection Error/Incoming REQ
    154  *	(11) rds_sendmsg(3SOCKET)/Incoming REQ
    155  *
    156  *
    157  * Created   - Session is allocated and inserted into the sessionlist but
    158  *             not all members are initialized.
    159  * Init      - All members are initialized, send buffer pool is allocated.
    160  * Connected - Data and ctrl RC channels are opened.
    161  * Closed    - Data and ctrl RC channels are closed.
    162  * Fini      - Send buffer pool and buffers in the receive pool are freed.
    163  * Destroy   - Session is removed from the session list and is ready to be
    164  *             freed.
    165  * Failed    - Session initialization has failed (send buffer pool allocation).
    166  * Error     - (1) Failed to open the RC channels.
    167  *             (2) An error occurred on the RC channels while sending.
    168  *             (3) Received a new CM REQ message on the existing connection.
    169  */
    170 typedef enum rds_session_state_s {
    171 	RDS_SESSION_STATE_CREATED		= 0,
    172 	RDS_SESSION_STATE_FAILED		= 1,
    173 	RDS_SESSION_STATE_INIT			= 2,
    174 	RDS_SESSION_STATE_CONNECTED		= 3,
    175 	RDS_SESSION_STATE_HCA_CLOSING		= 4,
    176 	RDS_SESSION_STATE_ERROR			= 5,
    177 	RDS_SESSION_STATE_ACTIVE_CLOSING	= 6,
    178 	RDS_SESSION_STATE_PASSIVE_CLOSING	= 7,
    179 	RDS_SESSION_STATE_CLOSED		= 8,
    180 	RDS_SESSION_STATE_FINI			= 9,
    181 	RDS_SESSION_STATE_DESTROY		= 10
    182 } rds_session_state_t;
    183 
    184 #define	RDS_SESSION_TRANSITION(sp, state)			\
    185 		rw_enter(&sp->session_lock, RW_WRITER);		\
    186 		sp->session_state = state;			\
    187 		rw_exit(&sp->session_lock)
    188 
    189 /* Active or Passive */
    190 #define	RDS_SESSION_ACTIVE	1
    191 #define	RDS_SESSION_PASSIVE	2
    192 
    193 /*
    194  * RDS QP Information
    195  *
    196  * lock  - Synchronize access
    197  * depth - Max number of WRs that can be posted.
    198  * level - Number of outstanding WRs in the QP
    199  * lwm   - Water mark at which to post more receive WRs.
    200  * taskqpending - Indicates if a taskq thread is dispatched to post receive
    201  *		WRs in the RQ
    202  */
    203 typedef struct rds_qp_s {
    204 	kmutex_t		qp_lock;
    205 	uint32_t		qp_depth;
    206 	uint32_t		qp_level;
    207 	uint32_t		qp_lwm;
    208 	boolean_t		qp_taskqpending;
    209 } rds_qp_t;
    210 
    211 /*
    212  * RDS EndPoint(One end of RC connection)
    213  *
    214  * sp        - Parent Session
    215  * type      - Control or Data Channel
    216  * remip     - Same as session_remip
    217  * myip      - Same as session_myip
    218  * snd_lkey  - LKey for the send buffer pool
    219  * hca_guid  - HCA guid
    220  * snd_mrhdl - Memory handle for the send buffer pool
    221  * lock      - Protects the members
    222  * state     - See rds_ep_state_t
    223  * chanhdl   - RC channel handle
    224  * sendcq    - Send CQ handle
    225  * recvcq    - Recv CQ handle
    226  * sndpool   - Send buffer Pool
    227  * rcvpool   - Recv buffer Pool
    228  * segfbp    - First packet of a segmented message.
    229  * seglbp    - Last packet of a segmented message.
    230  * lbufid    - Last successful buffer that was received by the remote.
    231  *             Valid only during session failover/reconnect.
    232  * rbufid    - Last buffer (remote buffer) that was received successfully
    233  *             from the remote node.
    234  * ds        - SGL used for send acknowledgement.
    235  * ackwr     - WR to send acknowledgement.
    236  * ackhdl    - Memory handle for 'ack_addr'.
    237  * ack_rkey  - RKey for 'ack_addr'.
    238  * ack_addr  - Memory region to receive RDMA acknowledgement from remote.
    239  */
    240 typedef struct rds_ep_s {
    241 	struct rds_session_s	*ep_sp;
    242 	rds_ep_type_t		ep_type;
    243 	ipaddr_t		ep_remip;
    244 	ipaddr_t		ep_myip;
    245 	ibt_lkey_t		ep_snd_lkey;
    246 	ib_guid_t		ep_hca_guid;
    247 	ibt_mr_hdl_t		ep_snd_mrhdl;
    248 	kmutex_t		ep_lock;
    249 	rds_ep_state_t		ep_state;
    250 	ibt_channel_hdl_t	ep_chanhdl;
    251 	ibt_cq_hdl_t		ep_sendcq;
    252 	ibt_cq_hdl_t		ep_recvcq;
    253 	rds_bufpool_t		ep_sndpool;
    254 	rds_bufpool_t		ep_rcvpool;
    255 	rds_qp_t		ep_recvqp;
    256 	uint_t			ep_rdmacnt;
    257 	rds_buf_t		*ep_segfbp;
    258 	rds_buf_t		*ep_seglbp;
    259 	uintptr_t		ep_lbufid;
    260 	uintptr_t		ep_rbufid;
    261 	ibt_wr_ds_t		ep_ackds;
    262 	ibt_send_wr_t		ep_ackwr;
    263 	ibt_mr_hdl_t		ep_ackhdl;
    264 	ibt_rkey_t		ep_ack_rkey;
    265 	uintptr_t		ep_ack_addr;
    266 } rds_ep_t;
    267 
    268 /*
    269  * One end of an RDS session
    270  *
    271  * nextp   - Pointer to the next session in the session list.
    272  *           This is protected by rds_state_t:rds_sessionlock.
    273  * remip   - IP address of the node having the remote end of the session.
    274  * myip    - IP address of this end of the session.
    275  * lgid    - IB local (source) gid, hosting "myip".
    276  * rgid    - IB remote (destination) gid, hosting "remip".
    277  * lock    - Provides read/write access to members of the session.
    278  * type    - Identifies which end of session (active or passive).
    279  * state   - State of session (rds_session_state_t).
    280  * dataep  - Data endpoint
    281  * ctrlep  - Control endpoint
    282  * failover- Flag to indicate that an error occured and the session is
    283  *           re-connecting.
    284  * portmap_lock - To serialize access to portmap.
    285  * portmap - Bitmap of sockets.
    286  *           The maximum number of sockets seem to be 65536, the portmap has
    287  *           1 bit for each remote socket. A set bit indicates that the
    288  *           corresponding remote socket is stalled and vice versa.
    289  */
    290 typedef struct rds_session_s {
    291 	struct rds_session_s	*session_nextp;
    292 	ipaddr_t		session_remip;
    293 	ipaddr_t		session_myip;
    294 	ib_guid_t		session_hca_guid;
    295 	ib_gid_t		session_lgid;
    296 	ib_gid_t		session_rgid;
    297 	krwlock_t		session_lock;
    298 	uint8_t			session_type;
    299 	uint8_t			session_state;
    300 	struct rds_ep_s		session_dataep;
    301 	struct rds_ep_s		session_ctrlep;
    302 	uint_t			session_failover;
    303 	krwlock_t		session_local_portmap_lock;
    304 	krwlock_t		session_remote_portmap_lock;
    305 	uint8_t			session_local_portmap[RDS_PORT_MAP_SIZE];
    306 	uint8_t			session_remote_portmap[RDS_PORT_MAP_SIZE];
    307 	ibt_path_info_t		session_pinfo;
    308 } rds_session_t;
    309 
    310 /* defined in rds_ep.c */
    311 int rds_ep_init(rds_ep_t *ep, ib_guid_t hca_guid);
    312 rds_session_t *rds_session_create(rds_state_t *statep, ipaddr_t destip,
    313     ipaddr_t srcip, ibt_cm_req_rcv_t *reqp, uint8_t type);
    314 int rds_session_init(rds_session_t *sp);
    315 int rds_session_reinit(rds_session_t *sp, ib_gid_t lgid);
    316 void rds_session_open(rds_session_t *sp);
    317 void rds_session_close(rds_session_t *sp, ibt_execution_mode_t mode,
    318     uint_t wait);
    319 rds_session_t *rds_session_lkup(rds_state_t *statep, ipaddr_t destip,
    320     ib_guid_t node_guid);
    321 void rds_recycle_session(rds_session_t *sp);
    322 void rds_session_active(rds_session_t *sp);
    323 void rds_close_sessions(void *arg);
    324 void rds_received_msg(rds_ep_t *ep, rds_buf_t *bp);
    325 void rds_handle_control_message(rds_session_t *sp, rds_ctrl_pkt_t *cp);
    326 void rds_handle_send_error(rds_ep_t *ep);
    327 void rds_session_fini(rds_session_t *sp);
    328 void rds_passive_session_fini(rds_session_t *sp);
    329 void rds_cleanup_passive_session(void *arg);
    330 
    331 /* defined in rds_ib.c */
    332 ibt_channel_hdl_t rds_ep_alloc_rc_channel(rds_ep_t *ep, uint8_t hca_port);
    333 void rds_ep_free_rc_channel(rds_ep_t *ep);
    334 void rds_post_recv_buf(void *arg);
    335 void rds_poll_send_completions(ibt_cq_hdl_t cq, struct rds_ep_s *ep,
    336     boolean_t lock);
    337 
    338 /* defined in rds_cm.c */
    339 int rds_open_rc_channel(rds_ep_t *ep, ibt_path_info_t *pinfo,
    340     ibt_execution_mode_t mode, ibt_channel_hdl_t *chanhdl);
    341 int rds_close_rc_channel(ibt_channel_hdl_t chanhdl, ibt_execution_mode_t mode);
    342 
    343 int rds_deliver_new_msg(mblk_t *mp, ipaddr_t local_addr, ipaddr_t rem_addr,
    344     in_port_t local_port, in_port_t rem_port, zoneid_t zoneid);
    345 
    346 /* defined in rds_sc.c */
    347 int rds_sc_path_lookup(ipaddr_t *localip, ipaddr_t *remip);
    348 
    349 #ifdef __cplusplus
    350 }
    351 #endif
    352 
    353 #endif	/* _RDSIB_EP_H */
    354