1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * LDoms virtual disk client (vdc) device driver 29 * 30 * This driver runs on a guest logical domain and communicates with the virtual 31 * disk server (vds) driver running on the service domain which is exporting 32 * virtualized "disks" to the guest logical domain. 33 * 34 * The driver can be divided into four sections: 35 * 36 * 1) generic device driver housekeeping 37 * _init, _fini, attach, detach, ops structures, etc. 38 * 39 * 2) communication channel setup 40 * Setup the communications link over the LDC channel that vdc uses to 41 * talk to the vDisk server. Initialise the descriptor ring which 42 * allows the LDC clients to transfer data via memory mappings. 43 * 44 * 3) Support exported to upper layers (filesystems, etc) 45 * The upper layers call into vdc via strategy(9E) and DKIO(7I) 46 * ioctl calls. vdc will copy the data to be written to the descriptor 47 * ring or maps the buffer to store the data read by the vDisk 48 * server into the descriptor ring. It then sends a message to the 49 * vDisk server requesting it to complete the operation. 50 * 51 * 4) Handling responses from vDisk server. 52 * The vDisk server will ACK some or all of the messages vdc sends to it 53 * (this is configured during the handshake). Upon receipt of an ACK 54 * vdc will check the descriptor ring and signal to the upper layer 55 * code waiting on the IO. 56 */ 57 58 #include <sys/atomic.h> 59 #include <sys/conf.h> 60 #include <sys/disp.h> 61 #include <sys/ddi.h> 62 #include <sys/dkio.h> 63 #include <sys/efi_partition.h> 64 #include <sys/fcntl.h> 65 #include <sys/file.h> 66 #include <sys/kstat.h> 67 #include <sys/mach_descrip.h> 68 #include <sys/modctl.h> 69 #include <sys/mdeg.h> 70 #include <sys/note.h> 71 #include <sys/open.h> 72 #include <sys/sdt.h> 73 #include <sys/stat.h> 74 #include <sys/sunddi.h> 75 #include <sys/types.h> 76 #include <sys/promif.h> 77 #include <sys/var.h> 78 #include <sys/vtoc.h> 79 #include <sys/archsystm.h> 80 #include <sys/sysmacros.h> 81 82 #include <sys/cdio.h> 83 #include <sys/dktp/fdisk.h> 84 #include <sys/dktp/dadkio.h> 85 #include <sys/mhd.h> 86 #include <sys/scsi/generic/sense.h> 87 #include <sys/scsi/impl/uscsi.h> 88 #include <sys/scsi/impl/services.h> 89 #include <sys/scsi/targets/sddef.h> 90 91 #include <sys/ldoms.h> 92 #include <sys/ldc.h> 93 #include <sys/vio_common.h> 94 #include <sys/vio_mailbox.h> 95 #include <sys/vio_util.h> 96 #include <sys/vdsk_common.h> 97 #include <sys/vdsk_mailbox.h> 98 #include <sys/vdc.h> 99 100 #define VD_OLDVTOC_LIMIT 0x7fffffff 101 102 /* 103 * function prototypes 104 */ 105 106 /* standard driver functions */ 107 static int vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred); 108 static int vdc_close(dev_t dev, int flag, int otyp, cred_t *cred); 109 static int vdc_strategy(struct buf *buf); 110 static int vdc_print(dev_t dev, char *str); 111 static int vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk); 112 static int vdc_read(dev_t dev, struct uio *uio, cred_t *cred); 113 static int vdc_write(dev_t dev, struct uio *uio, cred_t *cred); 114 static int vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 115 cred_t *credp, int *rvalp); 116 static int vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred); 117 static int vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred); 118 119 static int vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, 120 void *arg, void **resultp); 121 static int vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); 122 static int vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); 123 static int vdc_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, 124 int mod_flags, char *name, caddr_t valuep, int *lengthp); 125 126 /* setup */ 127 static void vdc_min(struct buf *bufp); 128 static int vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen); 129 static int vdc_do_ldc_init(vdc_t *vdc, vdc_server_t *srvr); 130 static int vdc_start_ldc_connection(vdc_t *vdc); 131 static int vdc_create_device_nodes(vdc_t *vdc); 132 static int vdc_create_device_nodes_efi(vdc_t *vdc); 133 static int vdc_create_device_nodes_vtoc(vdc_t *vdc); 134 static void vdc_create_io_kstats(vdc_t *vdc); 135 static void vdc_create_err_kstats(vdc_t *vdc); 136 static void vdc_set_err_kstats(vdc_t *vdc); 137 static int vdc_get_md_node(dev_info_t *dip, md_t **mdpp, 138 mde_cookie_t *vd_nodep); 139 static int vdc_init_ports(vdc_t *vdc, md_t *mdp, mde_cookie_t vd_nodep); 140 static void vdc_fini_ports(vdc_t *vdc); 141 static void vdc_switch_server(vdc_t *vdcp); 142 static int vdc_do_ldc_up(vdc_t *vdc); 143 static void vdc_terminate_ldc(vdc_t *vdc, vdc_server_t *srvr); 144 static int vdc_init_descriptor_ring(vdc_t *vdc); 145 static void vdc_destroy_descriptor_ring(vdc_t *vdc); 146 static int vdc_setup_devid(vdc_t *vdc); 147 static void vdc_store_label_efi(vdc_t *, efi_gpt_t *, efi_gpe_t *); 148 static void vdc_store_label_vtoc(vdc_t *, struct dk_geom *, 149 struct extvtoc *); 150 static void vdc_store_label_unk(vdc_t *vdc); 151 static boolean_t vdc_is_opened(vdc_t *vdc); 152 static void vdc_update_size(vdc_t *vdc, size_t, size_t, size_t); 153 static int vdc_update_vio_bsize(vdc_t *vdc, uint32_t); 154 155 /* handshake with vds */ 156 static int vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver); 157 static int vdc_ver_negotiation(vdc_t *vdcp); 158 static int vdc_init_attr_negotiation(vdc_t *vdc); 159 static int vdc_attr_negotiation(vdc_t *vdcp); 160 static int vdc_init_dring_negotiate(vdc_t *vdc); 161 static int vdc_dring_negotiation(vdc_t *vdcp); 162 static int vdc_send_rdx(vdc_t *vdcp); 163 static int vdc_rdx_exchange(vdc_t *vdcp); 164 static boolean_t vdc_is_supported_version(vio_ver_msg_t *ver_msg); 165 166 /* processing incoming messages from vDisk server */ 167 static void vdc_process_msg_thread(vdc_t *vdc); 168 static int vdc_recv(vdc_t *vdc, vio_msg_t *msgp, size_t *nbytesp); 169 170 static uint_t vdc_handle_cb(uint64_t event, caddr_t arg); 171 static int vdc_process_data_msg(vdc_t *vdc, vio_msg_t *msg); 172 static int vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg); 173 static int vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg); 174 static int vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *msg); 175 static int vdc_send_request(vdc_t *vdcp, int operation, 176 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 177 int cb_type, void *cb_arg, vio_desc_direction_t dir); 178 static int vdc_map_to_shared_dring(vdc_t *vdcp, int idx); 179 static int vdc_populate_descriptor(vdc_t *vdcp, int operation, 180 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 181 int cb_type, void *cb_arg, vio_desc_direction_t dir); 182 static int vdc_do_sync_op(vdc_t *vdcp, int operation, caddr_t addr, 183 size_t nbytes, int slice, diskaddr_t offset, int cb_type, 184 void *cb_arg, vio_desc_direction_t dir, boolean_t); 185 186 static int vdc_wait_for_response(vdc_t *vdcp, vio_msg_t *msgp); 187 static int vdc_drain_response(vdc_t *vdcp, vio_cb_type_t cb_type, 188 struct buf *buf); 189 static int vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx); 190 static int vdc_populate_mem_hdl(vdc_t *vdcp, vdc_local_desc_t *ldep); 191 static int vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg); 192 193 /* dkio */ 194 static int vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode, 195 int *rvalp); 196 static int vd_process_efi_ioctl(void *vdisk, int cmd, uintptr_t arg); 197 static void vdc_create_fake_geometry(vdc_t *vdc); 198 static int vdc_validate_geometry(vdc_t *vdc); 199 static void vdc_validate(vdc_t *vdc); 200 static void vdc_validate_task(void *arg); 201 static int vdc_null_copy_func(vdc_t *vdc, void *from, void *to, 202 int mode, int dir); 203 static int vdc_get_wce_convert(vdc_t *vdc, void *from, void *to, 204 int mode, int dir); 205 static int vdc_set_wce_convert(vdc_t *vdc, void *from, void *to, 206 int mode, int dir); 207 static int vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to, 208 int mode, int dir); 209 static int vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, 210 int mode, int dir); 211 static int vdc_get_extvtoc_convert(vdc_t *vdc, void *from, void *to, 212 int mode, int dir); 213 static int vdc_set_extvtoc_convert(vdc_t *vdc, void *from, void *to, 214 int mode, int dir); 215 static int vdc_get_geom_convert(vdc_t *vdc, void *from, void *to, 216 int mode, int dir); 217 static int vdc_set_geom_convert(vdc_t *vdc, void *from, void *to, 218 int mode, int dir); 219 static int vdc_get_efi_convert(vdc_t *vdc, void *from, void *to, 220 int mode, int dir); 221 static int vdc_set_efi_convert(vdc_t *vdc, void *from, void *to, 222 int mode, int dir); 223 224 static void vdc_ownership_update(vdc_t *vdc, int ownership_flags); 225 static int vdc_access_set(vdc_t *vdc, uint64_t flags, int mode); 226 static vdc_io_t *vdc_failfast_io_queue(vdc_t *vdc, struct buf *buf); 227 static int vdc_failfast_check_resv(vdc_t *vdc); 228 229 /* 230 * Module variables 231 */ 232 233 /* 234 * Tunable variables to control how long vdc waits before timing out on 235 * various operations 236 */ 237 static int vdc_hshake_retries = 3; 238 239 static int vdc_timeout = 0; /* units: seconds */ 240 static int vdc_ldcup_timeout = 1; /* units: seconds */ 241 242 static uint64_t vdc_hz_min_ldc_delay; 243 static uint64_t vdc_min_timeout_ldc = 1 * MILLISEC; 244 static uint64_t vdc_hz_max_ldc_delay; 245 static uint64_t vdc_max_timeout_ldc = 100 * MILLISEC; 246 247 static uint64_t vdc_ldc_read_init_delay = 1 * MILLISEC; 248 static uint64_t vdc_ldc_read_max_delay = 100 * MILLISEC; 249 250 /* values for dumping - need to run in a tighter loop */ 251 static uint64_t vdc_usec_timeout_dump = 100 * MILLISEC; /* 0.1s units: ns */ 252 static int vdc_dump_retries = 100; 253 254 static uint16_t vdc_scsi_timeout = 60; /* 60s units: seconds */ 255 256 static uint64_t vdc_ownership_delay = 6 * MICROSEC; /* 6s units: usec */ 257 258 /* Count of the number of vdc instances attached */ 259 static volatile uint32_t vdc_instance_count = 0; 260 261 /* Tunable to log all SCSI errors */ 262 static boolean_t vdc_scsi_log_error = B_FALSE; 263 264 /* Soft state pointer */ 265 static void *vdc_state; 266 267 /* 268 * Controlling the verbosity of the error/debug messages 269 * 270 * vdc_msglevel - controls level of messages 271 * vdc_matchinst - 64-bit variable where each bit corresponds 272 * to the vdc instance the vdc_msglevel applies. 273 */ 274 int vdc_msglevel = 0x0; 275 uint64_t vdc_matchinst = 0ull; 276 277 /* 278 * Supported vDisk protocol version pairs. 279 * 280 * The first array entry is the latest and preferred version. 281 */ 282 static const vio_ver_t vdc_version[] = {{1, 1}}; 283 284 static struct cb_ops vdc_cb_ops = { 285 vdc_open, /* cb_open */ 286 vdc_close, /* cb_close */ 287 vdc_strategy, /* cb_strategy */ 288 vdc_print, /* cb_print */ 289 vdc_dump, /* cb_dump */ 290 vdc_read, /* cb_read */ 291 vdc_write, /* cb_write */ 292 vdc_ioctl, /* cb_ioctl */ 293 nodev, /* cb_devmap */ 294 nodev, /* cb_mmap */ 295 nodev, /* cb_segmap */ 296 nochpoll, /* cb_chpoll */ 297 vdc_prop_op, /* cb_prop_op */ 298 NULL, /* cb_str */ 299 D_MP | D_64BIT, /* cb_flag */ 300 CB_REV, /* cb_rev */ 301 vdc_aread, /* cb_aread */ 302 vdc_awrite /* cb_awrite */ 303 }; 304 305 static struct dev_ops vdc_ops = { 306 DEVO_REV, /* devo_rev */ 307 0, /* devo_refcnt */ 308 vdc_getinfo, /* devo_getinfo */ 309 nulldev, /* devo_identify */ 310 nulldev, /* devo_probe */ 311 vdc_attach, /* devo_attach */ 312 vdc_detach, /* devo_detach */ 313 nodev, /* devo_reset */ 314 &vdc_cb_ops, /* devo_cb_ops */ 315 NULL, /* devo_bus_ops */ 316 nulldev, /* devo_power */ 317 ddi_quiesce_not_needed, /* devo_quiesce */ 318 }; 319 320 static struct modldrv modldrv = { 321 &mod_driverops, 322 "virtual disk client", 323 &vdc_ops, 324 }; 325 326 static struct modlinkage modlinkage = { 327 MODREV_1, 328 &modldrv, 329 NULL 330 }; 331 332 /* -------------------------------------------------------------------------- */ 333 334 /* 335 * Device Driver housekeeping and setup 336 */ 337 338 int 339 _init(void) 340 { 341 int status; 342 343 if ((status = ddi_soft_state_init(&vdc_state, sizeof (vdc_t), 1)) != 0) 344 return (status); 345 if ((status = mod_install(&modlinkage)) != 0) 346 ddi_soft_state_fini(&vdc_state); 347 return (status); 348 } 349 350 int 351 _info(struct modinfo *modinfop) 352 { 353 return (mod_info(&modlinkage, modinfop)); 354 } 355 356 int 357 _fini(void) 358 { 359 int status; 360 361 if ((status = mod_remove(&modlinkage)) != 0) 362 return (status); 363 ddi_soft_state_fini(&vdc_state); 364 return (0); 365 } 366 367 static int 368 vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) 369 { 370 _NOTE(ARGUNUSED(dip)) 371 372 int instance = VDCUNIT((dev_t)arg); 373 vdc_t *vdc = NULL; 374 375 switch (cmd) { 376 case DDI_INFO_DEVT2DEVINFO: 377 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 378 *resultp = NULL; 379 return (DDI_FAILURE); 380 } 381 *resultp = vdc->dip; 382 return (DDI_SUCCESS); 383 case DDI_INFO_DEVT2INSTANCE: 384 *resultp = (void *)(uintptr_t)instance; 385 return (DDI_SUCCESS); 386 default: 387 *resultp = NULL; 388 return (DDI_FAILURE); 389 } 390 } 391 392 static int 393 vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 394 { 395 kt_did_t failfast_tid, ownership_tid; 396 int instance; 397 int rv; 398 vdc_server_t *srvr; 399 vdc_t *vdc = NULL; 400 401 switch (cmd) { 402 case DDI_DETACH: 403 /* the real work happens below */ 404 break; 405 case DDI_SUSPEND: 406 /* nothing to do for this non-device */ 407 return (DDI_SUCCESS); 408 default: 409 return (DDI_FAILURE); 410 } 411 412 ASSERT(cmd == DDI_DETACH); 413 instance = ddi_get_instance(dip); 414 DMSGX(1, "[%d] Entered\n", instance); 415 416 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 417 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 418 return (DDI_FAILURE); 419 } 420 421 /* 422 * This function is called when vdc is detached or if it has failed to 423 * attach. In that case, the attach may have fail before the vdisk type 424 * has been set so we can't call vdc_is_opened(). However as the attach 425 * has failed, we know that the vdisk is not opened and we can safely 426 * detach. 427 */ 428 if (vdc->vdisk_type != VD_DISK_TYPE_UNK && vdc_is_opened(vdc)) { 429 DMSG(vdc, 0, "[%d] Cannot detach: device is open", instance); 430 return (DDI_FAILURE); 431 } 432 433 if (vdc->dkio_flush_pending) { 434 DMSG(vdc, 0, 435 "[%d] Cannot detach: %d outstanding DKIO flushes\n", 436 instance, vdc->dkio_flush_pending); 437 return (DDI_FAILURE); 438 } 439 440 if (vdc->validate_pending) { 441 DMSG(vdc, 0, 442 "[%d] Cannot detach: %d outstanding validate request\n", 443 instance, vdc->validate_pending); 444 return (DDI_FAILURE); 445 } 446 447 DMSG(vdc, 0, "[%d] proceeding...\n", instance); 448 449 /* If we took ownership, release ownership */ 450 mutex_enter(&vdc->ownership_lock); 451 if (vdc->ownership & VDC_OWNERSHIP_GRANTED) { 452 rv = vdc_access_set(vdc, VD_ACCESS_SET_CLEAR, FKIOCTL); 453 if (rv == 0) { 454 vdc_ownership_update(vdc, VDC_OWNERSHIP_NONE); 455 } 456 } 457 mutex_exit(&vdc->ownership_lock); 458 459 /* mark instance as detaching */ 460 vdc->lifecycle = VDC_LC_DETACHING; 461 462 /* 463 * Try and disable callbacks to prevent another handshake. We have to 464 * disable callbacks for all servers. 465 */ 466 for (srvr = vdc->server_list; srvr != NULL; srvr = srvr->next) { 467 rv = ldc_set_cb_mode(srvr->ldc_handle, LDC_CB_DISABLE); 468 DMSG(vdc, 0, "callback disabled (ldc=%lu, rv=%d)\n", 469 srvr->ldc_id, rv); 470 } 471 472 if (vdc->initialized & VDC_THREAD) { 473 mutex_enter(&vdc->read_lock); 474 if ((vdc->read_state == VDC_READ_WAITING) || 475 (vdc->read_state == VDC_READ_RESET)) { 476 vdc->read_state = VDC_READ_RESET; 477 cv_signal(&vdc->read_cv); 478 } 479 480 mutex_exit(&vdc->read_lock); 481 482 /* wake up any thread waiting for connection to come online */ 483 mutex_enter(&vdc->lock); 484 if (vdc->state == VDC_STATE_INIT_WAITING) { 485 DMSG(vdc, 0, 486 "[%d] write reset - move to resetting state...\n", 487 instance); 488 vdc->state = VDC_STATE_RESETTING; 489 cv_signal(&vdc->initwait_cv); 490 } 491 mutex_exit(&vdc->lock); 492 493 /* now wait until state transitions to VDC_STATE_DETACH */ 494 thread_join(vdc->msg_proc_thr->t_did); 495 ASSERT(vdc->state == VDC_STATE_DETACH); 496 DMSG(vdc, 0, "[%d] Reset thread exit and join ..\n", 497 vdc->instance); 498 } 499 500 mutex_enter(&vdc->lock); 501 502 if (vdc->initialized & VDC_DRING) 503 vdc_destroy_descriptor_ring(vdc); 504 505 vdc_fini_ports(vdc); 506 507 if (vdc->failfast_thread) { 508 failfast_tid = vdc->failfast_thread->t_did; 509 vdc->failfast_interval = 0; 510 cv_signal(&vdc->failfast_cv); 511 } else { 512 failfast_tid = 0; 513 } 514 515 if (vdc->ownership & VDC_OWNERSHIP_WANTED) { 516 ownership_tid = vdc->ownership_thread->t_did; 517 vdc->ownership = VDC_OWNERSHIP_NONE; 518 cv_signal(&vdc->ownership_cv); 519 } else { 520 ownership_tid = 0; 521 } 522 523 mutex_exit(&vdc->lock); 524 525 if (failfast_tid != 0) 526 thread_join(failfast_tid); 527 528 if (ownership_tid != 0) 529 thread_join(ownership_tid); 530 531 if (vdc->initialized & VDC_MINOR) 532 ddi_remove_minor_node(dip, NULL); 533 534 if (vdc->io_stats) { 535 kstat_delete(vdc->io_stats); 536 vdc->io_stats = NULL; 537 } 538 539 if (vdc->err_stats) { 540 kstat_delete(vdc->err_stats); 541 vdc->err_stats = NULL; 542 } 543 544 if (vdc->initialized & VDC_LOCKS) { 545 mutex_destroy(&vdc->lock); 546 mutex_destroy(&vdc->read_lock); 547 mutex_destroy(&vdc->ownership_lock); 548 cv_destroy(&vdc->initwait_cv); 549 cv_destroy(&vdc->dring_free_cv); 550 cv_destroy(&vdc->membind_cv); 551 cv_destroy(&vdc->sync_pending_cv); 552 cv_destroy(&vdc->sync_blocked_cv); 553 cv_destroy(&vdc->read_cv); 554 cv_destroy(&vdc->running_cv); 555 cv_destroy(&vdc->ownership_cv); 556 cv_destroy(&vdc->failfast_cv); 557 cv_destroy(&vdc->failfast_io_cv); 558 } 559 560 if (vdc->minfo) 561 kmem_free(vdc->minfo, sizeof (struct dk_minfo)); 562 563 if (vdc->cinfo) 564 kmem_free(vdc->cinfo, sizeof (struct dk_cinfo)); 565 566 if (vdc->vtoc) 567 kmem_free(vdc->vtoc, sizeof (struct extvtoc)); 568 569 if (vdc->geom) 570 kmem_free(vdc->geom, sizeof (struct dk_geom)); 571 572 if (vdc->devid) { 573 ddi_devid_unregister(dip); 574 ddi_devid_free(vdc->devid); 575 } 576 577 if (vdc->initialized & VDC_SOFT_STATE) 578 ddi_soft_state_free(vdc_state, instance); 579 580 DMSG(vdc, 0, "[%d] End %p\n", instance, (void *)vdc); 581 582 return (DDI_SUCCESS); 583 } 584 585 586 static int 587 vdc_do_attach(dev_info_t *dip) 588 { 589 int instance; 590 vdc_t *vdc = NULL; 591 int status; 592 md_t *mdp; 593 mde_cookie_t vd_node; 594 595 ASSERT(dip != NULL); 596 597 instance = ddi_get_instance(dip); 598 if (ddi_soft_state_zalloc(vdc_state, instance) != DDI_SUCCESS) { 599 cmn_err(CE_NOTE, "[%d] Couldn't alloc state structure", 600 instance); 601 return (DDI_FAILURE); 602 } 603 604 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 605 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 606 return (DDI_FAILURE); 607 } 608 609 /* 610 * We assign the value to initialized in this case to zero out the 611 * variable and then set bits in it to indicate what has been done 612 */ 613 vdc->initialized = VDC_SOFT_STATE; 614 615 vdc_hz_min_ldc_delay = drv_usectohz(vdc_min_timeout_ldc); 616 vdc_hz_max_ldc_delay = drv_usectohz(vdc_max_timeout_ldc); 617 618 vdc->dip = dip; 619 vdc->instance = instance; 620 vdc->vdisk_type = VD_DISK_TYPE_UNK; 621 vdc->vdisk_label = VD_DISK_LABEL_UNK; 622 vdc->state = VDC_STATE_INIT; 623 vdc->lifecycle = VDC_LC_ATTACHING; 624 vdc->session_id = 0; 625 vdc->vdisk_bsize = DEV_BSIZE; 626 vdc->vio_bmask = 0; 627 vdc->vio_bshift = 0; 628 vdc->max_xfer_sz = maxphys / vdc->vdisk_bsize; 629 630 /* 631 * We assume, for now, that the vDisk server will export 'read' 632 * operations to us at a minimum (this is needed because of checks 633 * in vdc for supported operations early in the handshake process). 634 * The vDisk server will return ENOTSUP if this is not the case. 635 * The value will be overwritten during the attribute exchange with 636 * the bitmask of operations exported by server. 637 */ 638 vdc->operations = VD_OP_MASK_READ; 639 640 vdc->vtoc = NULL; 641 vdc->geom = NULL; 642 vdc->cinfo = NULL; 643 vdc->minfo = NULL; 644 645 mutex_init(&vdc->lock, NULL, MUTEX_DRIVER, NULL); 646 cv_init(&vdc->initwait_cv, NULL, CV_DRIVER, NULL); 647 cv_init(&vdc->dring_free_cv, NULL, CV_DRIVER, NULL); 648 cv_init(&vdc->membind_cv, NULL, CV_DRIVER, NULL); 649 cv_init(&vdc->running_cv, NULL, CV_DRIVER, NULL); 650 651 vdc->threads_pending = 0; 652 vdc->sync_op_pending = B_FALSE; 653 vdc->sync_op_blocked = B_FALSE; 654 cv_init(&vdc->sync_pending_cv, NULL, CV_DRIVER, NULL); 655 cv_init(&vdc->sync_blocked_cv, NULL, CV_DRIVER, NULL); 656 657 mutex_init(&vdc->ownership_lock, NULL, MUTEX_DRIVER, NULL); 658 cv_init(&vdc->ownership_cv, NULL, CV_DRIVER, NULL); 659 cv_init(&vdc->failfast_cv, NULL, CV_DRIVER, NULL); 660 cv_init(&vdc->failfast_io_cv, NULL, CV_DRIVER, NULL); 661 662 /* init blocking msg read functionality */ 663 mutex_init(&vdc->read_lock, NULL, MUTEX_DRIVER, NULL); 664 cv_init(&vdc->read_cv, NULL, CV_DRIVER, NULL); 665 vdc->read_state = VDC_READ_IDLE; 666 667 vdc->initialized |= VDC_LOCKS; 668 669 /* get device and port MD node for this disk instance */ 670 if (vdc_get_md_node(dip, &mdp, &vd_node) != 0) { 671 cmn_err(CE_NOTE, "[%d] Could not get machine description node", 672 instance); 673 return (DDI_FAILURE); 674 } 675 676 if (vdc_init_ports(vdc, mdp, vd_node) != 0) { 677 cmn_err(CE_NOTE, "[%d] Error initialising ports", instance); 678 return (DDI_FAILURE); 679 } 680 681 (void) md_fini_handle(mdp); 682 683 /* Create the kstats for saving the I/O statistics used by iostat(1M) */ 684 vdc_create_io_kstats(vdc); 685 vdc_create_err_kstats(vdc); 686 687 /* Initialize remaining structures before starting the msg thread */ 688 vdc->vdisk_label = VD_DISK_LABEL_UNK; 689 vdc->vtoc = kmem_zalloc(sizeof (struct extvtoc), KM_SLEEP); 690 vdc->geom = kmem_zalloc(sizeof (struct dk_geom), KM_SLEEP); 691 vdc->minfo = kmem_zalloc(sizeof (struct dk_minfo), KM_SLEEP); 692 693 /* initialize the thread responsible for managing state with server */ 694 vdc->msg_proc_thr = thread_create(NULL, 0, vdc_process_msg_thread, 695 vdc, 0, &p0, TS_RUN, minclsyspri); 696 if (vdc->msg_proc_thr == NULL) { 697 cmn_err(CE_NOTE, "[%d] Failed to create msg processing thread", 698 instance); 699 return (DDI_FAILURE); 700 } 701 702 vdc->initialized |= VDC_THREAD; 703 704 atomic_inc_32(&vdc_instance_count); 705 706 /* 707 * Check the disk label. This will send requests and do the handshake. 708 * We don't really care about the disk label now. What we really need is 709 * the handshake do be done so that we know the type of the disk (slice 710 * or full disk) and the appropriate device nodes can be created. 711 */ 712 713 mutex_enter(&vdc->lock); 714 (void) vdc_validate_geometry(vdc); 715 mutex_exit(&vdc->lock); 716 717 /* 718 * Now that we have the device info we can create the device nodes 719 */ 720 status = vdc_create_device_nodes(vdc); 721 if (status) { 722 DMSG(vdc, 0, "[%d] Failed to create device nodes", 723 instance); 724 goto return_status; 725 } 726 727 /* 728 * Setup devid 729 */ 730 if (vdc_setup_devid(vdc)) { 731 DMSG(vdc, 0, "[%d] No device id available\n", instance); 732 } 733 734 /* 735 * Fill in the fields of the error statistics kstat that were not 736 * available when creating the kstat 737 */ 738 vdc_set_err_kstats(vdc); 739 740 ddi_report_dev(dip); 741 vdc->lifecycle = VDC_LC_ONLINE; 742 DMSG(vdc, 0, "[%d] Attach tasks successful\n", instance); 743 744 return_status: 745 DMSG(vdc, 0, "[%d] Attach completed\n", instance); 746 return (status); 747 } 748 749 static int 750 vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 751 { 752 int status; 753 754 switch (cmd) { 755 case DDI_ATTACH: 756 if ((status = vdc_do_attach(dip)) != 0) 757 (void) vdc_detach(dip, DDI_DETACH); 758 return (status); 759 case DDI_RESUME: 760 /* nothing to do for this non-device */ 761 return (DDI_SUCCESS); 762 default: 763 return (DDI_FAILURE); 764 } 765 } 766 767 static int 768 vdc_do_ldc_init(vdc_t *vdc, vdc_server_t *srvr) 769 { 770 int status = 0; 771 ldc_status_t ldc_state; 772 ldc_attr_t ldc_attr; 773 774 ASSERT(vdc != NULL); 775 ASSERT(srvr != NULL); 776 777 ldc_attr.devclass = LDC_DEV_BLK; 778 ldc_attr.instance = vdc->instance; 779 ldc_attr.mode = LDC_MODE_UNRELIABLE; /* unreliable transport */ 780 ldc_attr.mtu = VD_LDC_MTU; 781 782 if ((srvr->state & VDC_LDC_INIT) == 0) { 783 status = ldc_init(srvr->ldc_id, &ldc_attr, 784 &srvr->ldc_handle); 785 if (status != 0) { 786 DMSG(vdc, 0, "[%d] ldc_init(chan %ld) returned %d", 787 vdc->instance, srvr->ldc_id, status); 788 return (status); 789 } 790 srvr->state |= VDC_LDC_INIT; 791 } 792 status = ldc_status(srvr->ldc_handle, &ldc_state); 793 if (status != 0) { 794 DMSG(vdc, 0, "[%d] Cannot discover LDC status [err=%d]", 795 vdc->instance, status); 796 goto init_exit; 797 } 798 srvr->ldc_state = ldc_state; 799 800 if ((srvr->state & VDC_LDC_CB) == 0) { 801 status = ldc_reg_callback(srvr->ldc_handle, vdc_handle_cb, 802 (caddr_t)srvr); 803 if (status != 0) { 804 DMSG(vdc, 0, "[%d] LDC callback reg. failed (%d)", 805 vdc->instance, status); 806 goto init_exit; 807 } 808 srvr->state |= VDC_LDC_CB; 809 } 810 811 /* 812 * At this stage we have initialised LDC, we will now try and open 813 * the connection. 814 */ 815 if (srvr->ldc_state == LDC_INIT) { 816 status = ldc_open(srvr->ldc_handle); 817 if (status != 0) { 818 DMSG(vdc, 0, "[%d] ldc_open(chan %ld) returned %d", 819 vdc->instance, srvr->ldc_id, status); 820 goto init_exit; 821 } 822 srvr->state |= VDC_LDC_OPEN; 823 } 824 825 init_exit: 826 if (status) { 827 vdc_terminate_ldc(vdc, srvr); 828 } 829 830 return (status); 831 } 832 833 static int 834 vdc_start_ldc_connection(vdc_t *vdc) 835 { 836 int status = 0; 837 838 ASSERT(vdc != NULL); 839 840 ASSERT(MUTEX_HELD(&vdc->lock)); 841 842 status = vdc_do_ldc_up(vdc); 843 844 DMSG(vdc, 0, "[%d] Finished bringing up LDC\n", vdc->instance); 845 846 return (status); 847 } 848 849 static int 850 vdc_stop_ldc_connection(vdc_t *vdcp) 851 { 852 int status; 853 854 ASSERT(vdcp != NULL); 855 856 ASSERT(MUTEX_HELD(&vdcp->lock)); 857 858 DMSG(vdcp, 0, ": Resetting connection to vDisk server : state %d\n", 859 vdcp->state); 860 861 status = ldc_down(vdcp->curr_server->ldc_handle); 862 DMSG(vdcp, 0, "ldc_down() = %d\n", status); 863 864 vdcp->initialized &= ~VDC_HANDSHAKE; 865 DMSG(vdcp, 0, "initialized=%x\n", vdcp->initialized); 866 867 return (status); 868 } 869 870 static void 871 vdc_create_io_kstats(vdc_t *vdc) 872 { 873 if (vdc->io_stats != NULL) { 874 DMSG(vdc, 0, "[%d] I/O kstat already exists\n", vdc->instance); 875 return; 876 } 877 878 vdc->io_stats = kstat_create(VDC_DRIVER_NAME, vdc->instance, NULL, 879 "disk", KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT); 880 if (vdc->io_stats != NULL) { 881 vdc->io_stats->ks_lock = &vdc->lock; 882 kstat_install(vdc->io_stats); 883 } else { 884 cmn_err(CE_NOTE, "[%d] Failed to create kstat: I/O statistics" 885 " will not be gathered", vdc->instance); 886 } 887 } 888 889 static void 890 vdc_create_err_kstats(vdc_t *vdc) 891 { 892 vd_err_stats_t *stp; 893 char kstatmodule_err[KSTAT_STRLEN]; 894 char kstatname[KSTAT_STRLEN]; 895 int ndata = (sizeof (vd_err_stats_t) / sizeof (kstat_named_t)); 896 int instance = vdc->instance; 897 898 if (vdc->err_stats != NULL) { 899 DMSG(vdc, 0, "[%d] ERR kstat already exists\n", vdc->instance); 900 return; 901 } 902 903 (void) snprintf(kstatmodule_err, sizeof (kstatmodule_err), 904 "%serr", VDC_DRIVER_NAME); 905 (void) snprintf(kstatname, sizeof (kstatname), 906 "%s%d,err", VDC_DRIVER_NAME, instance); 907 908 vdc->err_stats = kstat_create(kstatmodule_err, instance, kstatname, 909 "device_error", KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT); 910 911 if (vdc->err_stats == NULL) { 912 cmn_err(CE_NOTE, "[%d] Failed to create kstat: Error statistics" 913 " will not be gathered", instance); 914 return; 915 } 916 917 stp = (vd_err_stats_t *)vdc->err_stats->ks_data; 918 kstat_named_init(&stp->vd_softerrs, "Soft Errors", 919 KSTAT_DATA_UINT32); 920 kstat_named_init(&stp->vd_transerrs, "Transport Errors", 921 KSTAT_DATA_UINT32); 922 kstat_named_init(&stp->vd_protoerrs, "Protocol Errors", 923 KSTAT_DATA_UINT32); 924 kstat_named_init(&stp->vd_vid, "Vendor", 925 KSTAT_DATA_CHAR); 926 kstat_named_init(&stp->vd_pid, "Product", 927 KSTAT_DATA_CHAR); 928 kstat_named_init(&stp->vd_capacity, "Size", 929 KSTAT_DATA_ULONGLONG); 930 931 vdc->err_stats->ks_update = nulldev; 932 933 kstat_install(vdc->err_stats); 934 } 935 936 static void 937 vdc_set_err_kstats(vdc_t *vdc) 938 { 939 vd_err_stats_t *stp; 940 941 if (vdc->err_stats == NULL) 942 return; 943 944 mutex_enter(&vdc->lock); 945 946 stp = (vd_err_stats_t *)vdc->err_stats->ks_data; 947 ASSERT(stp != NULL); 948 949 stp->vd_capacity.value.ui64 = vdc->vdisk_size * vdc->vdisk_bsize; 950 (void) strcpy(stp->vd_vid.value.c, "SUN"); 951 (void) strcpy(stp->vd_pid.value.c, "VDSK"); 952 953 mutex_exit(&vdc->lock); 954 } 955 956 static int 957 vdc_create_device_nodes_efi(vdc_t *vdc) 958 { 959 ddi_remove_minor_node(vdc->dip, "h"); 960 ddi_remove_minor_node(vdc->dip, "h,raw"); 961 962 if (ddi_create_minor_node(vdc->dip, "wd", S_IFBLK, 963 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 964 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 965 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'wd'", 966 vdc->instance); 967 return (EIO); 968 } 969 970 /* if any device node is created we set this flag */ 971 vdc->initialized |= VDC_MINOR; 972 973 if (ddi_create_minor_node(vdc->dip, "wd,raw", S_IFCHR, 974 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 975 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 976 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'wd,raw'", 977 vdc->instance); 978 return (EIO); 979 } 980 981 return (0); 982 } 983 984 static int 985 vdc_create_device_nodes_vtoc(vdc_t *vdc) 986 { 987 ddi_remove_minor_node(vdc->dip, "wd"); 988 ddi_remove_minor_node(vdc->dip, "wd,raw"); 989 990 if (ddi_create_minor_node(vdc->dip, "h", S_IFBLK, 991 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 992 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 993 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'h'", 994 vdc->instance); 995 return (EIO); 996 } 997 998 /* if any device node is created we set this flag */ 999 vdc->initialized |= VDC_MINOR; 1000 1001 if (ddi_create_minor_node(vdc->dip, "h,raw", S_IFCHR, 1002 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 1003 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 1004 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'h,raw'", 1005 vdc->instance); 1006 return (EIO); 1007 } 1008 1009 return (0); 1010 } 1011 1012 /* 1013 * Function: 1014 * vdc_create_device_nodes 1015 * 1016 * Description: 1017 * This function creates the block and character device nodes under 1018 * /devices. It is called as part of the attach(9E) of the instance 1019 * during the handshake with vds after vds has sent the attributes 1020 * to vdc. 1021 * 1022 * If the device is of type VD_DISK_TYPE_SLICE then the minor node 1023 * of 2 is used in keeping with the Solaris convention that slice 2 1024 * refers to a whole disk. Slices start at 'a' 1025 * 1026 * Parameters: 1027 * vdc - soft state pointer 1028 * 1029 * Return Values 1030 * 0 - Success 1031 * EIO - Failed to create node 1032 * EINVAL - Unknown type of disk exported 1033 */ 1034 static int 1035 vdc_create_device_nodes(vdc_t *vdc) 1036 { 1037 char name[sizeof ("s,raw")]; 1038 dev_info_t *dip = NULL; 1039 int instance, status; 1040 int num_slices = 1; 1041 int i; 1042 1043 ASSERT(vdc != NULL); 1044 1045 instance = vdc->instance; 1046 dip = vdc->dip; 1047 1048 switch (vdc->vdisk_type) { 1049 case VD_DISK_TYPE_DISK: 1050 num_slices = V_NUMPAR; 1051 break; 1052 case VD_DISK_TYPE_SLICE: 1053 num_slices = 1; 1054 break; 1055 case VD_DISK_TYPE_UNK: 1056 default: 1057 return (EINVAL); 1058 } 1059 1060 /* 1061 * Minor nodes are different for EFI disks: EFI disks do not have 1062 * a minor node 'g' for the minor number corresponding to slice 1063 * VD_EFI_WD_SLICE (slice 7) instead they have a minor node 'wd' 1064 * representing the whole disk. 1065 */ 1066 for (i = 0; i < num_slices; i++) { 1067 1068 if (i == VD_EFI_WD_SLICE) { 1069 if (vdc->vdisk_label == VD_DISK_LABEL_EFI) 1070 status = vdc_create_device_nodes_efi(vdc); 1071 else 1072 status = vdc_create_device_nodes_vtoc(vdc); 1073 if (status != 0) 1074 return (status); 1075 continue; 1076 } 1077 1078 (void) snprintf(name, sizeof (name), "%c", 'a' + i); 1079 if (ddi_create_minor_node(dip, name, S_IFBLK, 1080 VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 1081 cmn_err(CE_NOTE, "[%d] Couldn't add block node '%s'", 1082 instance, name); 1083 return (EIO); 1084 } 1085 1086 /* if any device node is created we set this flag */ 1087 vdc->initialized |= VDC_MINOR; 1088 1089 (void) snprintf(name, sizeof (name), "%c%s", 'a' + i, ",raw"); 1090 1091 if (ddi_create_minor_node(dip, name, S_IFCHR, 1092 VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 1093 cmn_err(CE_NOTE, "[%d] Couldn't add raw node '%s'", 1094 instance, name); 1095 return (EIO); 1096 } 1097 } 1098 1099 return (0); 1100 } 1101 1102 /* 1103 * Driver prop_op(9e) entry point function. Return the number of blocks for 1104 * the partition in question or forward the request to the property facilities. 1105 */ 1106 static int 1107 vdc_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags, 1108 char *name, caddr_t valuep, int *lengthp) 1109 { 1110 int instance = ddi_get_instance(dip); 1111 vdc_t *vdc; 1112 uint64_t nblocks; 1113 uint_t blksize; 1114 1115 vdc = ddi_get_soft_state(vdc_state, instance); 1116 1117 if (dev == DDI_DEV_T_ANY || vdc == NULL) { 1118 return (ddi_prop_op(dev, dip, prop_op, mod_flags, 1119 name, valuep, lengthp)); 1120 } 1121 1122 mutex_enter(&vdc->lock); 1123 (void) vdc_validate_geometry(vdc); 1124 if (vdc->vdisk_label == VD_DISK_LABEL_UNK) { 1125 mutex_exit(&vdc->lock); 1126 return (ddi_prop_op(dev, dip, prop_op, mod_flags, 1127 name, valuep, lengthp)); 1128 } 1129 nblocks = vdc->slice[VDCPART(dev)].nblocks; 1130 blksize = vdc->vdisk_bsize; 1131 mutex_exit(&vdc->lock); 1132 1133 return (ddi_prop_op_nblocks_blksize(dev, dip, prop_op, mod_flags, 1134 name, valuep, lengthp, nblocks, blksize)); 1135 } 1136 1137 /* 1138 * Function: 1139 * vdc_is_opened 1140 * 1141 * Description: 1142 * This function checks if any slice of a given virtual disk is 1143 * currently opened. 1144 * 1145 * Parameters: 1146 * vdc - soft state pointer 1147 * 1148 * Return Values 1149 * B_TRUE - at least one slice is opened. 1150 * B_FALSE - no slice is opened. 1151 */ 1152 static boolean_t 1153 vdc_is_opened(vdc_t *vdc) 1154 { 1155 int i, nslices; 1156 1157 switch (vdc->vdisk_type) { 1158 case VD_DISK_TYPE_DISK: 1159 nslices = V_NUMPAR; 1160 break; 1161 case VD_DISK_TYPE_SLICE: 1162 nslices = 1; 1163 break; 1164 case VD_DISK_TYPE_UNK: 1165 default: 1166 ASSERT(0); 1167 } 1168 1169 /* check if there's any layered open */ 1170 for (i = 0; i < nslices; i++) { 1171 if (vdc->open_lyr[i] > 0) 1172 return (B_TRUE); 1173 } 1174 1175 /* check if there is any other kind of open */ 1176 for (i = 0; i < OTYPCNT; i++) { 1177 if (vdc->open[i] != 0) 1178 return (B_TRUE); 1179 } 1180 1181 return (B_FALSE); 1182 } 1183 1184 static int 1185 vdc_mark_opened(vdc_t *vdc, int slice, int flag, int otyp) 1186 { 1187 uint8_t slicemask; 1188 int i; 1189 1190 ASSERT(otyp < OTYPCNT); 1191 ASSERT(slice < V_NUMPAR); 1192 ASSERT(MUTEX_HELD(&vdc->lock)); 1193 1194 slicemask = 1 << slice; 1195 1196 /* check if slice is already exclusively opened */ 1197 if (vdc->open_excl & slicemask) 1198 return (EBUSY); 1199 1200 /* if open exclusive, check if slice is already opened */ 1201 if (flag & FEXCL) { 1202 if (vdc->open_lyr[slice] > 0) 1203 return (EBUSY); 1204 for (i = 0; i < OTYPCNT; i++) { 1205 if (vdc->open[i] & slicemask) 1206 return (EBUSY); 1207 } 1208 vdc->open_excl |= slicemask; 1209 } 1210 1211 /* mark slice as opened */ 1212 if (otyp == OTYP_LYR) { 1213 vdc->open_lyr[slice]++; 1214 } else { 1215 vdc->open[otyp] |= slicemask; 1216 } 1217 1218 return (0); 1219 } 1220 1221 static void 1222 vdc_mark_closed(vdc_t *vdc, int slice, int flag, int otyp) 1223 { 1224 uint8_t slicemask; 1225 1226 ASSERT(otyp < OTYPCNT); 1227 ASSERT(slice < V_NUMPAR); 1228 ASSERT(MUTEX_HELD(&vdc->lock)); 1229 1230 slicemask = 1 << slice; 1231 1232 if (otyp == OTYP_LYR) { 1233 ASSERT(vdc->open_lyr[slice] > 0); 1234 vdc->open_lyr[slice]--; 1235 } else { 1236 vdc->open[otyp] &= ~slicemask; 1237 } 1238 1239 if (flag & FEXCL) 1240 vdc->open_excl &= ~slicemask; 1241 } 1242 1243 static int 1244 vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred) 1245 { 1246 _NOTE(ARGUNUSED(cred)) 1247 1248 int instance, nodelay; 1249 int slice, status = 0; 1250 vdc_t *vdc; 1251 1252 ASSERT(dev != NULL); 1253 instance = VDCUNIT(*dev); 1254 1255 if (otyp >= OTYPCNT) 1256 return (EINVAL); 1257 1258 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1259 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1260 return (ENXIO); 1261 } 1262 1263 DMSG(vdc, 0, "minor = %d flag = %x, otyp = %x\n", 1264 getminor(*dev), flag, otyp); 1265 1266 slice = VDCPART(*dev); 1267 1268 nodelay = flag & (FNDELAY | FNONBLOCK); 1269 1270 if ((flag & FWRITE) && (!nodelay) && 1271 !(VD_OP_SUPPORTED(vdc->operations, VD_OP_BWRITE))) { 1272 return (EROFS); 1273 } 1274 1275 mutex_enter(&vdc->lock); 1276 1277 status = vdc_mark_opened(vdc, slice, flag, otyp); 1278 1279 if (status != 0) { 1280 mutex_exit(&vdc->lock); 1281 return (status); 1282 } 1283 1284 if (nodelay) { 1285 1286 /* don't resubmit a validate request if there's already one */ 1287 if (vdc->validate_pending > 0) { 1288 mutex_exit(&vdc->lock); 1289 return (0); 1290 } 1291 1292 /* call vdc_validate() asynchronously to avoid blocking */ 1293 if (taskq_dispatch(system_taskq, vdc_validate_task, 1294 (void *)vdc, TQ_NOSLEEP) == NULL) { 1295 vdc_mark_closed(vdc, slice, flag, otyp); 1296 mutex_exit(&vdc->lock); 1297 return (ENXIO); 1298 } 1299 1300 vdc->validate_pending++; 1301 mutex_exit(&vdc->lock); 1302 return (0); 1303 } 1304 1305 mutex_exit(&vdc->lock); 1306 1307 vdc_validate(vdc); 1308 1309 mutex_enter(&vdc->lock); 1310 1311 if (vdc->vdisk_label == VD_DISK_LABEL_UNK || 1312 vdc->slice[slice].nblocks == 0) { 1313 vdc_mark_closed(vdc, slice, flag, otyp); 1314 status = EIO; 1315 } 1316 1317 mutex_exit(&vdc->lock); 1318 1319 return (status); 1320 } 1321 1322 static int 1323 vdc_close(dev_t dev, int flag, int otyp, cred_t *cred) 1324 { 1325 _NOTE(ARGUNUSED(cred)) 1326 1327 int instance; 1328 int slice; 1329 int rv, rval; 1330 vdc_t *vdc; 1331 1332 instance = VDCUNIT(dev); 1333 1334 if (otyp >= OTYPCNT) 1335 return (EINVAL); 1336 1337 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1338 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1339 return (ENXIO); 1340 } 1341 1342 DMSG(vdc, 0, "[%d] flag = %x, otyp = %x\n", instance, flag, otyp); 1343 1344 slice = VDCPART(dev); 1345 1346 /* 1347 * Attempt to flush the W$ on a close operation. If this is 1348 * not a supported IOCTL command or the backing device is read-only 1349 * do not fail the close operation. 1350 */ 1351 rv = vd_process_ioctl(dev, DKIOCFLUSHWRITECACHE, NULL, FKIOCTL, &rval); 1352 1353 if (rv != 0 && rv != ENOTSUP && rv != ENOTTY && rv != EROFS) { 1354 DMSG(vdc, 0, "[%d] flush failed with error %d on close\n", 1355 instance, rv); 1356 return (EIO); 1357 } 1358 1359 mutex_enter(&vdc->lock); 1360 vdc_mark_closed(vdc, slice, flag, otyp); 1361 mutex_exit(&vdc->lock); 1362 1363 return (0); 1364 } 1365 1366 static int 1367 vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 1368 { 1369 _NOTE(ARGUNUSED(credp)) 1370 1371 return (vd_process_ioctl(dev, cmd, (caddr_t)arg, mode, rvalp)); 1372 } 1373 1374 static int 1375 vdc_print(dev_t dev, char *str) 1376 { 1377 cmn_err(CE_NOTE, "vdc%d: %s", VDCUNIT(dev), str); 1378 return (0); 1379 } 1380 1381 static int 1382 vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 1383 { 1384 int rv; 1385 size_t nbytes = nblk * DEV_BSIZE; 1386 int instance = VDCUNIT(dev); 1387 vdc_t *vdc = NULL; 1388 diskaddr_t vio_blkno; 1389 1390 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1391 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1392 return (ENXIO); 1393 } 1394 1395 DMSG(vdc, 2, "[%d] dump %ld bytes at block 0x%lx : addr=0x%p\n", 1396 instance, nbytes, blkno, (void *)addr); 1397 1398 /* convert logical block to vio block */ 1399 if ((blkno & vdc->vio_bmask) != 0) { 1400 DMSG(vdc, 0, "Misaligned block number (%lu)\n", blkno); 1401 return (EINVAL); 1402 } 1403 vio_blkno = blkno >> vdc->vio_bshift; 1404 1405 rv = vdc_send_request(vdc, VD_OP_BWRITE, addr, nbytes, 1406 VDCPART(dev), vio_blkno, CB_STRATEGY, 0, VIO_write_dir); 1407 if (rv) { 1408 DMSG(vdc, 0, "Failed to do a disk dump (err=%d)\n", rv); 1409 return (rv); 1410 } 1411 1412 if (ddi_in_panic()) 1413 (void) vdc_drain_response(vdc, CB_STRATEGY, NULL); 1414 1415 DMSG(vdc, 0, "[%d] End\n", instance); 1416 1417 return (0); 1418 } 1419 1420 /* -------------------------------------------------------------------------- */ 1421 1422 /* 1423 * Disk access routines 1424 * 1425 */ 1426 1427 /* 1428 * vdc_strategy() 1429 * 1430 * Return Value: 1431 * 0: As per strategy(9E), the strategy() function must return 0 1432 * [ bioerror(9f) sets b_flags to the proper error code ] 1433 */ 1434 static int 1435 vdc_strategy(struct buf *buf) 1436 { 1437 diskaddr_t vio_blkno; 1438 int rv = -1; 1439 vdc_t *vdc = NULL; 1440 int instance = VDCUNIT(buf->b_edev); 1441 int op = (buf->b_flags & B_READ) ? VD_OP_BREAD : VD_OP_BWRITE; 1442 int slice; 1443 1444 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1445 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1446 bioerror(buf, ENXIO); 1447 biodone(buf); 1448 return (0); 1449 } 1450 1451 DMSG(vdc, 2, "[%d] %s %ld bytes at block %llx : b_addr=0x%p\n", 1452 instance, (buf->b_flags & B_READ) ? "Read" : "Write", 1453 buf->b_bcount, buf->b_lblkno, (void *)buf->b_un.b_addr); 1454 1455 bp_mapin(buf); 1456 1457 if ((long)buf->b_private == VD_SLICE_NONE) { 1458 /* I/O using an absolute disk offset */ 1459 slice = VD_SLICE_NONE; 1460 } else { 1461 slice = VDCPART(buf->b_edev); 1462 } 1463 1464 /* 1465 * In the buf structure, b_lblkno represents a logical block number 1466 * using a block size of 512 bytes. For the VIO request, this block 1467 * number has to be converted to be represented with the block size 1468 * used by the VIO protocol. 1469 */ 1470 if ((buf->b_lblkno & vdc->vio_bmask) != 0) { 1471 bioerror(buf, EINVAL); 1472 biodone(buf); 1473 return (0); 1474 } 1475 vio_blkno = buf->b_lblkno >> vdc->vio_bshift; 1476 1477 rv = vdc_send_request(vdc, op, (caddr_t)buf->b_un.b_addr, 1478 buf->b_bcount, slice, vio_blkno, 1479 CB_STRATEGY, buf, (op == VD_OP_BREAD) ? VIO_read_dir : 1480 VIO_write_dir); 1481 1482 /* 1483 * If the request was successfully sent, the strategy call returns and 1484 * the ACK handler calls the bioxxx functions when the vDisk server is 1485 * done otherwise we handle the error here. 1486 */ 1487 if (rv) { 1488 DMSG(vdc, 0, "Failed to read/write (err=%d)\n", rv); 1489 bioerror(buf, rv); 1490 biodone(buf); 1491 } else if (ddi_in_panic()) { 1492 rv = vdc_drain_response(vdc, CB_STRATEGY, buf); 1493 if (rv != 0) { 1494 bioerror(buf, EIO); 1495 biodone(buf); 1496 } 1497 } 1498 1499 return (0); 1500 } 1501 1502 /* 1503 * Function: 1504 * vdc_min 1505 * 1506 * Description: 1507 * Routine to limit the size of a data transfer. Used in 1508 * conjunction with physio(9F). 1509 * 1510 * Arguments: 1511 * bp - pointer to the indicated buf(9S) struct. 1512 * 1513 */ 1514 static void 1515 vdc_min(struct buf *bufp) 1516 { 1517 vdc_t *vdc = NULL; 1518 int instance = VDCUNIT(bufp->b_edev); 1519 1520 vdc = ddi_get_soft_state(vdc_state, instance); 1521 VERIFY(vdc != NULL); 1522 1523 if (bufp->b_bcount > (vdc->max_xfer_sz * vdc->vdisk_bsize)) { 1524 bufp->b_bcount = vdc->max_xfer_sz * vdc->vdisk_bsize; 1525 } 1526 } 1527 1528 static int 1529 vdc_read(dev_t dev, struct uio *uio, cred_t *cred) 1530 { 1531 _NOTE(ARGUNUSED(cred)) 1532 1533 DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 1534 return (physio(vdc_strategy, NULL, dev, B_READ, vdc_min, uio)); 1535 } 1536 1537 static int 1538 vdc_write(dev_t dev, struct uio *uio, cred_t *cred) 1539 { 1540 _NOTE(ARGUNUSED(cred)) 1541 1542 DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 1543 return (physio(vdc_strategy, NULL, dev, B_WRITE, vdc_min, uio)); 1544 } 1545 1546 static int 1547 vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred) 1548 { 1549 _NOTE(ARGUNUSED(cred)) 1550 1551 DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 1552 return (aphysio(vdc_strategy, anocancel, dev, B_READ, vdc_min, aio)); 1553 } 1554 1555 static int 1556 vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred) 1557 { 1558 _NOTE(ARGUNUSED(cred)) 1559 1560 DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 1561 return (aphysio(vdc_strategy, anocancel, dev, B_WRITE, vdc_min, aio)); 1562 } 1563 1564 1565 /* -------------------------------------------------------------------------- */ 1566 1567 /* 1568 * Handshake support 1569 */ 1570 1571 1572 /* 1573 * Function: 1574 * vdc_init_ver_negotiation() 1575 * 1576 * Description: 1577 * 1578 * Arguments: 1579 * vdc - soft state pointer for this instance of the device driver. 1580 * 1581 * Return Code: 1582 * 0 - Success 1583 */ 1584 static int 1585 vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver) 1586 { 1587 vio_ver_msg_t pkt; 1588 size_t msglen = sizeof (pkt); 1589 int status = -1; 1590 1591 ASSERT(vdc != NULL); 1592 ASSERT(mutex_owned(&vdc->lock)); 1593 1594 DMSG(vdc, 0, "[%d] Entered.\n", vdc->instance); 1595 1596 /* 1597 * set the Session ID to a unique value 1598 * (the lower 32 bits of the clock tick) 1599 */ 1600 vdc->session_id = ((uint32_t)gettick() & 0xffffffff); 1601 DMSG(vdc, 0, "[%d] Set SID to 0x%lx\n", vdc->instance, vdc->session_id); 1602 1603 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1604 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1605 pkt.tag.vio_subtype_env = VIO_VER_INFO; 1606 pkt.tag.vio_sid = vdc->session_id; 1607 pkt.dev_class = VDEV_DISK; 1608 pkt.ver_major = ver.major; 1609 pkt.ver_minor = ver.minor; 1610 1611 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1612 DMSG(vdc, 0, "[%d] Ver info sent (status = %d)\n", 1613 vdc->instance, status); 1614 if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 1615 DMSG(vdc, 0, "[%d] Failed to send Ver negotiation info: " 1616 "id(%lx) rv(%d) size(%ld)", vdc->instance, 1617 vdc->curr_server->ldc_handle, status, msglen); 1618 if (msglen != sizeof (vio_ver_msg_t)) 1619 status = ENOMSG; 1620 } 1621 1622 return (status); 1623 } 1624 1625 /* 1626 * Function: 1627 * vdc_ver_negotiation() 1628 * 1629 * Description: 1630 * 1631 * Arguments: 1632 * vdcp - soft state pointer for this instance of the device driver. 1633 * 1634 * Return Code: 1635 * 0 - Success 1636 */ 1637 static int 1638 vdc_ver_negotiation(vdc_t *vdcp) 1639 { 1640 vio_msg_t vio_msg; 1641 int status; 1642 1643 if (status = vdc_init_ver_negotiation(vdcp, vdc_version[0])) 1644 return (status); 1645 1646 /* release lock and wait for response */ 1647 mutex_exit(&vdcp->lock); 1648 status = vdc_wait_for_response(vdcp, &vio_msg); 1649 mutex_enter(&vdcp->lock); 1650 if (status) { 1651 DMSG(vdcp, 0, 1652 "[%d] Failed waiting for Ver negotiation response, rv(%d)", 1653 vdcp->instance, status); 1654 return (status); 1655 } 1656 1657 /* check type and sub_type ... */ 1658 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1659 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 1660 DMSG(vdcp, 0, "[%d] Invalid ver negotiation response\n", 1661 vdcp->instance); 1662 return (EPROTO); 1663 } 1664 1665 return (vdc_handle_ver_msg(vdcp, (vio_ver_msg_t *)&vio_msg)); 1666 } 1667 1668 /* 1669 * Function: 1670 * vdc_init_attr_negotiation() 1671 * 1672 * Description: 1673 * 1674 * Arguments: 1675 * vdc - soft state pointer for this instance of the device driver. 1676 * 1677 * Return Code: 1678 * 0 - Success 1679 */ 1680 static int 1681 vdc_init_attr_negotiation(vdc_t *vdc) 1682 { 1683 vd_attr_msg_t pkt; 1684 size_t msglen = sizeof (pkt); 1685 int status; 1686 1687 ASSERT(vdc != NULL); 1688 ASSERT(mutex_owned(&vdc->lock)); 1689 1690 DMSG(vdc, 0, "[%d] entered\n", vdc->instance); 1691 1692 /* fill in tag */ 1693 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1694 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1695 pkt.tag.vio_subtype_env = VIO_ATTR_INFO; 1696 pkt.tag.vio_sid = vdc->session_id; 1697 /* fill in payload */ 1698 pkt.max_xfer_sz = vdc->max_xfer_sz; 1699 pkt.vdisk_block_size = vdc->vdisk_bsize; 1700 pkt.xfer_mode = VIO_DRING_MODE_V1_0; 1701 pkt.operations = 0; /* server will set bits of valid operations */ 1702 pkt.vdisk_type = 0; /* server will set to valid device type */ 1703 pkt.vdisk_media = 0; /* server will set to valid media type */ 1704 pkt.vdisk_size = 0; /* server will set to valid size */ 1705 1706 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1707 DMSG(vdc, 0, "Attr info sent (status = %d)\n", status); 1708 1709 if ((status != 0) || (msglen != sizeof (vd_attr_msg_t))) { 1710 DMSG(vdc, 0, "[%d] Failed to send Attr negotiation info: " 1711 "id(%lx) rv(%d) size(%ld)", vdc->instance, 1712 vdc->curr_server->ldc_handle, status, msglen); 1713 if (msglen != sizeof (vd_attr_msg_t)) 1714 status = ENOMSG; 1715 } 1716 1717 return (status); 1718 } 1719 1720 /* 1721 * Function: 1722 * vdc_attr_negotiation() 1723 * 1724 * Description: 1725 * 1726 * Arguments: 1727 * vdc - soft state pointer for this instance of the device driver. 1728 * 1729 * Return Code: 1730 * 0 - Success 1731 */ 1732 static int 1733 vdc_attr_negotiation(vdc_t *vdcp) 1734 { 1735 int status; 1736 vio_msg_t vio_msg; 1737 1738 if (status = vdc_init_attr_negotiation(vdcp)) 1739 return (status); 1740 1741 /* release lock and wait for response */ 1742 mutex_exit(&vdcp->lock); 1743 status = vdc_wait_for_response(vdcp, &vio_msg); 1744 mutex_enter(&vdcp->lock); 1745 if (status) { 1746 DMSG(vdcp, 0, 1747 "[%d] Failed waiting for Attr negotiation response, rv(%d)", 1748 vdcp->instance, status); 1749 return (status); 1750 } 1751 1752 /* check type and sub_type ... */ 1753 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1754 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 1755 DMSG(vdcp, 0, "[%d] Invalid attr negotiation response\n", 1756 vdcp->instance); 1757 return (EPROTO); 1758 } 1759 1760 return (vdc_handle_attr_msg(vdcp, (vd_attr_msg_t *)&vio_msg)); 1761 } 1762 1763 1764 /* 1765 * Function: 1766 * vdc_init_dring_negotiate() 1767 * 1768 * Description: 1769 * 1770 * Arguments: 1771 * vdc - soft state pointer for this instance of the device driver. 1772 * 1773 * Return Code: 1774 * 0 - Success 1775 */ 1776 static int 1777 vdc_init_dring_negotiate(vdc_t *vdc) 1778 { 1779 vio_dring_reg_msg_t pkt; 1780 size_t msglen = sizeof (pkt); 1781 int status = -1; 1782 int retry; 1783 int nretries = 10; 1784 1785 ASSERT(vdc != NULL); 1786 ASSERT(mutex_owned(&vdc->lock)); 1787 1788 for (retry = 0; retry < nretries; retry++) { 1789 status = vdc_init_descriptor_ring(vdc); 1790 if (status != EAGAIN) 1791 break; 1792 drv_usecwait(vdc_min_timeout_ldc); 1793 } 1794 1795 if (status != 0) { 1796 DMSG(vdc, 0, "[%d] Failed to init DRing (status = %d)\n", 1797 vdc->instance, status); 1798 return (status); 1799 } 1800 1801 DMSG(vdc, 0, "[%d] Init of descriptor ring completed (status = %d)\n", 1802 vdc->instance, status); 1803 1804 /* fill in tag */ 1805 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1806 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1807 pkt.tag.vio_subtype_env = VIO_DRING_REG; 1808 pkt.tag.vio_sid = vdc->session_id; 1809 /* fill in payload */ 1810 pkt.dring_ident = 0; 1811 pkt.num_descriptors = vdc->dring_len; 1812 pkt.descriptor_size = vdc->dring_entry_size; 1813 pkt.options = (VIO_TX_DRING | VIO_RX_DRING); 1814 pkt.ncookies = vdc->dring_cookie_count; 1815 pkt.cookie[0] = vdc->dring_cookie[0]; /* for now just one cookie */ 1816 1817 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1818 if (status != 0) { 1819 DMSG(vdc, 0, "[%d] Failed to register DRing (err = %d)", 1820 vdc->instance, status); 1821 } 1822 1823 return (status); 1824 } 1825 1826 1827 /* 1828 * Function: 1829 * vdc_dring_negotiation() 1830 * 1831 * Description: 1832 * 1833 * Arguments: 1834 * vdc - soft state pointer for this instance of the device driver. 1835 * 1836 * Return Code: 1837 * 0 - Success 1838 */ 1839 static int 1840 vdc_dring_negotiation(vdc_t *vdcp) 1841 { 1842 int status; 1843 vio_msg_t vio_msg; 1844 1845 if (status = vdc_init_dring_negotiate(vdcp)) 1846 return (status); 1847 1848 /* release lock and wait for response */ 1849 mutex_exit(&vdcp->lock); 1850 status = vdc_wait_for_response(vdcp, &vio_msg); 1851 mutex_enter(&vdcp->lock); 1852 if (status) { 1853 DMSG(vdcp, 0, 1854 "[%d] Failed waiting for Dring negotiation response," 1855 " rv(%d)", vdcp->instance, status); 1856 return (status); 1857 } 1858 1859 /* check type and sub_type ... */ 1860 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1861 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 1862 DMSG(vdcp, 0, "[%d] Invalid Dring negotiation response\n", 1863 vdcp->instance); 1864 return (EPROTO); 1865 } 1866 1867 return (vdc_handle_dring_reg_msg(vdcp, 1868 (vio_dring_reg_msg_t *)&vio_msg)); 1869 } 1870 1871 1872 /* 1873 * Function: 1874 * vdc_send_rdx() 1875 * 1876 * Description: 1877 * 1878 * Arguments: 1879 * vdc - soft state pointer for this instance of the device driver. 1880 * 1881 * Return Code: 1882 * 0 - Success 1883 */ 1884 static int 1885 vdc_send_rdx(vdc_t *vdcp) 1886 { 1887 vio_msg_t msg; 1888 size_t msglen = sizeof (vio_msg_t); 1889 int status; 1890 1891 /* 1892 * Send an RDX message to vds to indicate we are ready 1893 * to send data 1894 */ 1895 msg.tag.vio_msgtype = VIO_TYPE_CTRL; 1896 msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 1897 msg.tag.vio_subtype_env = VIO_RDX; 1898 msg.tag.vio_sid = vdcp->session_id; 1899 status = vdc_send(vdcp, (caddr_t)&msg, &msglen); 1900 if (status != 0) { 1901 DMSG(vdcp, 0, "[%d] Failed to send RDX message (%d)", 1902 vdcp->instance, status); 1903 } 1904 1905 return (status); 1906 } 1907 1908 /* 1909 * Function: 1910 * vdc_handle_rdx() 1911 * 1912 * Description: 1913 * 1914 * Arguments: 1915 * vdc - soft state pointer for this instance of the device driver. 1916 * msgp - received msg 1917 * 1918 * Return Code: 1919 * 0 - Success 1920 */ 1921 static int 1922 vdc_handle_rdx(vdc_t *vdcp, vio_rdx_msg_t *msgp) 1923 { 1924 _NOTE(ARGUNUSED(vdcp)) 1925 _NOTE(ARGUNUSED(msgp)) 1926 1927 ASSERT(msgp->tag.vio_msgtype == VIO_TYPE_CTRL); 1928 ASSERT(msgp->tag.vio_subtype == VIO_SUBTYPE_ACK); 1929 ASSERT(msgp->tag.vio_subtype_env == VIO_RDX); 1930 1931 DMSG(vdcp, 1, "[%d] Got an RDX msg", vdcp->instance); 1932 1933 return (0); 1934 } 1935 1936 /* 1937 * Function: 1938 * vdc_rdx_exchange() 1939 * 1940 * Description: 1941 * 1942 * Arguments: 1943 * vdc - soft state pointer for this instance of the device driver. 1944 * 1945 * Return Code: 1946 * 0 - Success 1947 */ 1948 static int 1949 vdc_rdx_exchange(vdc_t *vdcp) 1950 { 1951 int status; 1952 vio_msg_t vio_msg; 1953 1954 if (status = vdc_send_rdx(vdcp)) 1955 return (status); 1956 1957 /* release lock and wait for response */ 1958 mutex_exit(&vdcp->lock); 1959 status = vdc_wait_for_response(vdcp, &vio_msg); 1960 mutex_enter(&vdcp->lock); 1961 if (status) { 1962 DMSG(vdcp, 0, "[%d] Failed waiting for RDX response, rv(%d)", 1963 vdcp->instance, status); 1964 return (status); 1965 } 1966 1967 /* check type and sub_type ... */ 1968 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1969 vio_msg.tag.vio_subtype != VIO_SUBTYPE_ACK) { 1970 DMSG(vdcp, 0, "[%d] Invalid RDX response\n", vdcp->instance); 1971 return (EPROTO); 1972 } 1973 1974 return (vdc_handle_rdx(vdcp, (vio_rdx_msg_t *)&vio_msg)); 1975 } 1976 1977 1978 /* -------------------------------------------------------------------------- */ 1979 1980 /* 1981 * LDC helper routines 1982 */ 1983 1984 static int 1985 vdc_recv(vdc_t *vdc, vio_msg_t *msgp, size_t *nbytesp) 1986 { 1987 int status; 1988 uint64_t delay_time; 1989 size_t len; 1990 1991 /* 1992 * Until we get a blocking ldc read we have to retry until the entire 1993 * LDC message has arrived before ldc_read() will return that message. 1994 * If ldc_read() succeed but returns a zero length message then that 1995 * means that the LDC queue is empty and we have to wait for a 1996 * notification from the LDC callback which will set the read_state to 1997 * VDC_READ_PENDING. Note we also bail out if the channel is reset or 1998 * goes away. 1999 */ 2000 delay_time = vdc_ldc_read_init_delay; 2001 2002 for (;;) { 2003 2004 len = *nbytesp; 2005 /* 2006 * vdc->curr_server is protected by vdc->lock but to avoid 2007 * contentions we don't take the lock here. We can do this 2008 * safely because vdc_recv() is only called from thread 2009 * process_msg_thread() which is also the only thread that 2010 * can change vdc->curr_server. 2011 */ 2012 status = ldc_read(vdc->curr_server->ldc_handle, 2013 (caddr_t)msgp, &len); 2014 2015 if (status == EAGAIN) { 2016 delay_time *= 2; 2017 if (delay_time >= vdc_ldc_read_max_delay) 2018 delay_time = vdc_ldc_read_max_delay; 2019 delay(delay_time); 2020 continue; 2021 } 2022 2023 if (status != 0) { 2024 DMSG(vdc, 0, "ldc_read returned %d\n", status); 2025 break; 2026 } 2027 2028 if (len != 0) { 2029 *nbytesp = len; 2030 break; 2031 } 2032 2033 mutex_enter(&vdc->read_lock); 2034 2035 while (vdc->read_state != VDC_READ_PENDING) { 2036 2037 /* detect if the connection has been reset */ 2038 if (vdc->read_state == VDC_READ_RESET) { 2039 mutex_exit(&vdc->read_lock); 2040 return (ECONNRESET); 2041 } 2042 2043 vdc->read_state = VDC_READ_WAITING; 2044 cv_wait(&vdc->read_cv, &vdc->read_lock); 2045 } 2046 2047 vdc->read_state = VDC_READ_IDLE; 2048 mutex_exit(&vdc->read_lock); 2049 2050 delay_time = vdc_ldc_read_init_delay; 2051 } 2052 2053 return (status); 2054 } 2055 2056 2057 2058 #ifdef DEBUG 2059 void 2060 vdc_decode_tag(vdc_t *vdcp, vio_msg_t *msg) 2061 { 2062 char *ms, *ss, *ses; 2063 switch (msg->tag.vio_msgtype) { 2064 #define Q(_s) case _s : ms = #_s; break; 2065 Q(VIO_TYPE_CTRL) 2066 Q(VIO_TYPE_DATA) 2067 Q(VIO_TYPE_ERR) 2068 #undef Q 2069 default: ms = "unknown"; break; 2070 } 2071 2072 switch (msg->tag.vio_subtype) { 2073 #define Q(_s) case _s : ss = #_s; break; 2074 Q(VIO_SUBTYPE_INFO) 2075 Q(VIO_SUBTYPE_ACK) 2076 Q(VIO_SUBTYPE_NACK) 2077 #undef Q 2078 default: ss = "unknown"; break; 2079 } 2080 2081 switch (msg->tag.vio_subtype_env) { 2082 #define Q(_s) case _s : ses = #_s; break; 2083 Q(VIO_VER_INFO) 2084 Q(VIO_ATTR_INFO) 2085 Q(VIO_DRING_REG) 2086 Q(VIO_DRING_UNREG) 2087 Q(VIO_RDX) 2088 Q(VIO_PKT_DATA) 2089 Q(VIO_DESC_DATA) 2090 Q(VIO_DRING_DATA) 2091 #undef Q 2092 default: ses = "unknown"; break; 2093 } 2094 2095 DMSG(vdcp, 3, "(%x/%x/%x) message : (%s/%s/%s)\n", 2096 msg->tag.vio_msgtype, msg->tag.vio_subtype, 2097 msg->tag.vio_subtype_env, ms, ss, ses); 2098 } 2099 #endif 2100 2101 /* 2102 * Function: 2103 * vdc_send() 2104 * 2105 * Description: 2106 * The function encapsulates the call to write a message using LDC. 2107 * If LDC indicates that the call failed due to the queue being full, 2108 * we retry the ldc_write(), otherwise we return the error returned by LDC. 2109 * 2110 * Arguments: 2111 * ldc_handle - LDC handle for the channel this instance of vdc uses 2112 * pkt - address of LDC message to be sent 2113 * msglen - the size of the message being sent. When the function 2114 * returns, this contains the number of bytes written. 2115 * 2116 * Return Code: 2117 * 0 - Success. 2118 * EINVAL - pkt or msglen were NULL 2119 * ECONNRESET - The connection was not up. 2120 * EWOULDBLOCK - LDC queue is full 2121 * xxx - other error codes returned by ldc_write 2122 */ 2123 static int 2124 vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen) 2125 { 2126 size_t size = 0; 2127 int status = 0; 2128 clock_t delay_ticks; 2129 2130 ASSERT(vdc != NULL); 2131 ASSERT(mutex_owned(&vdc->lock)); 2132 ASSERT(msglen != NULL); 2133 ASSERT(*msglen != 0); 2134 2135 #ifdef DEBUG 2136 vdc_decode_tag(vdc, (vio_msg_t *)(uintptr_t)pkt); 2137 #endif 2138 /* 2139 * Wait indefinitely to send if channel 2140 * is busy, but bail out if we succeed or 2141 * if the channel closes or is reset. 2142 */ 2143 delay_ticks = vdc_hz_min_ldc_delay; 2144 do { 2145 size = *msglen; 2146 status = ldc_write(vdc->curr_server->ldc_handle, pkt, &size); 2147 if (status == EWOULDBLOCK) { 2148 delay(delay_ticks); 2149 /* geometric backoff */ 2150 delay_ticks *= 2; 2151 if (delay_ticks > vdc_hz_max_ldc_delay) 2152 delay_ticks = vdc_hz_max_ldc_delay; 2153 } 2154 } while (status == EWOULDBLOCK); 2155 2156 /* if LDC had serious issues --- reset vdc state */ 2157 if (status == EIO || status == ECONNRESET) { 2158 /* LDC had serious issues --- reset vdc state */ 2159 mutex_enter(&vdc->read_lock); 2160 if ((vdc->read_state == VDC_READ_WAITING) || 2161 (vdc->read_state == VDC_READ_RESET)) 2162 cv_signal(&vdc->read_cv); 2163 vdc->read_state = VDC_READ_RESET; 2164 mutex_exit(&vdc->read_lock); 2165 2166 /* wake up any waiters in the reset thread */ 2167 if (vdc->state == VDC_STATE_INIT_WAITING) { 2168 DMSG(vdc, 0, "[%d] write reset - " 2169 "vdc is resetting ..\n", vdc->instance); 2170 vdc->state = VDC_STATE_RESETTING; 2171 cv_signal(&vdc->initwait_cv); 2172 } 2173 2174 return (ECONNRESET); 2175 } 2176 2177 /* return the last size written */ 2178 *msglen = size; 2179 2180 return (status); 2181 } 2182 2183 /* 2184 * Function: 2185 * vdc_get_md_node 2186 * 2187 * Description: 2188 * Get the MD, the device node for the given disk instance. The 2189 * caller is responsible for cleaning up the reference to the 2190 * returned MD (mdpp) by calling md_fini_handle(). 2191 * 2192 * Arguments: 2193 * dip - dev info pointer for this instance of the device driver. 2194 * mdpp - the returned MD. 2195 * vd_nodep - the returned device node. 2196 * 2197 * Return Code: 2198 * 0 - Success. 2199 * ENOENT - Expected node or property did not exist. 2200 * ENXIO - Unexpected error communicating with MD framework 2201 */ 2202 static int 2203 vdc_get_md_node(dev_info_t *dip, md_t **mdpp, mde_cookie_t *vd_nodep) 2204 { 2205 int status = ENOENT; 2206 char *node_name = NULL; 2207 md_t *mdp = NULL; 2208 int num_nodes; 2209 int num_vdevs; 2210 mde_cookie_t rootnode; 2211 mde_cookie_t *listp = NULL; 2212 boolean_t found_inst = B_FALSE; 2213 int listsz; 2214 int idx; 2215 uint64_t md_inst; 2216 int obp_inst; 2217 int instance = ddi_get_instance(dip); 2218 2219 /* 2220 * Get the OBP instance number for comparison with the MD instance 2221 * 2222 * The "cfg-handle" property of a vdc node in an MD contains the MD's 2223 * notion of "instance", or unique identifier, for that node; OBP 2224 * stores the value of the "cfg-handle" MD property as the value of 2225 * the "reg" property on the node in the device tree it builds from 2226 * the MD and passes to Solaris. Thus, we look up the devinfo node's 2227 * "reg" property value to uniquely identify this device instance. 2228 * If the "reg" property cannot be found, the device tree state is 2229 * presumably so broken that there is no point in continuing. 2230 */ 2231 if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, OBP_REG)) { 2232 cmn_err(CE_WARN, "'%s' property does not exist", OBP_REG); 2233 return (ENOENT); 2234 } 2235 obp_inst = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 2236 OBP_REG, -1); 2237 DMSGX(1, "[%d] OBP inst=%d\n", instance, obp_inst); 2238 2239 /* 2240 * We now walk the MD nodes to find the node for this vdisk. 2241 */ 2242 if ((mdp = md_get_handle()) == NULL) { 2243 cmn_err(CE_WARN, "unable to init machine description"); 2244 return (ENXIO); 2245 } 2246 2247 num_nodes = md_node_count(mdp); 2248 ASSERT(num_nodes > 0); 2249 2250 listsz = num_nodes * sizeof (mde_cookie_t); 2251 2252 /* allocate memory for nodes */ 2253 listp = kmem_zalloc(listsz, KM_SLEEP); 2254 2255 rootnode = md_root_node(mdp); 2256 ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE); 2257 2258 /* 2259 * Search for all the virtual devices, we will then check to see which 2260 * ones are disk nodes. 2261 */ 2262 num_vdevs = md_scan_dag(mdp, rootnode, 2263 md_find_name(mdp, VDC_MD_VDEV_NAME), 2264 md_find_name(mdp, "fwd"), listp); 2265 2266 if (num_vdevs <= 0) { 2267 cmn_err(CE_NOTE, "No '%s' node found", VDC_MD_VDEV_NAME); 2268 status = ENOENT; 2269 goto done; 2270 } 2271 2272 DMSGX(1, "[%d] num_vdevs=%d\n", instance, num_vdevs); 2273 for (idx = 0; idx < num_vdevs; idx++) { 2274 status = md_get_prop_str(mdp, listp[idx], "name", &node_name); 2275 if ((status != 0) || (node_name == NULL)) { 2276 cmn_err(CE_NOTE, "Unable to get name of node type '%s'" 2277 ": err %d", VDC_MD_VDEV_NAME, status); 2278 continue; 2279 } 2280 2281 DMSGX(1, "[%d] Found node '%s'\n", instance, node_name); 2282 if (strcmp(VDC_MD_DISK_NAME, node_name) == 0) { 2283 status = md_get_prop_val(mdp, listp[idx], 2284 VDC_MD_CFG_HDL, &md_inst); 2285 DMSGX(1, "[%d] vdc inst in MD=%lx\n", 2286 instance, md_inst); 2287 if ((status == 0) && (md_inst == obp_inst)) { 2288 found_inst = B_TRUE; 2289 break; 2290 } 2291 } 2292 } 2293 2294 if (!found_inst) { 2295 DMSGX(0, "Unable to find correct '%s' node", VDC_MD_DISK_NAME); 2296 status = ENOENT; 2297 goto done; 2298 } 2299 DMSGX(0, "[%d] MD inst=%lx\n", instance, md_inst); 2300 2301 *vd_nodep = listp[idx]; 2302 *mdpp = mdp; 2303 done: 2304 kmem_free(listp, listsz); 2305 return (status); 2306 } 2307 2308 /* 2309 * Function: 2310 * vdc_init_ports 2311 * 2312 * Description: 2313 * Initialize all the ports for this vdisk instance. 2314 * 2315 * Arguments: 2316 * vdc - soft state pointer for this instance of the device driver. 2317 * mdp - md pointer 2318 * vd_nodep - device md node. 2319 * 2320 * Return Code: 2321 * 0 - Success. 2322 * ENOENT - Expected node or property did not exist. 2323 */ 2324 static int 2325 vdc_init_ports(vdc_t *vdc, md_t *mdp, mde_cookie_t vd_nodep) 2326 { 2327 int status = 0; 2328 int idx; 2329 int num_nodes; 2330 int num_vports; 2331 int num_chans; 2332 int listsz; 2333 mde_cookie_t vd_port; 2334 mde_cookie_t *chanp = NULL; 2335 mde_cookie_t *portp = NULL; 2336 vdc_server_t *srvr; 2337 vdc_server_t *prev_srvr = NULL; 2338 2339 /* 2340 * We now walk the MD nodes to find the port nodes for this vdisk. 2341 */ 2342 num_nodes = md_node_count(mdp); 2343 ASSERT(num_nodes > 0); 2344 2345 listsz = num_nodes * sizeof (mde_cookie_t); 2346 2347 /* allocate memory for nodes */ 2348 portp = kmem_zalloc(listsz, KM_SLEEP); 2349 chanp = kmem_zalloc(listsz, KM_SLEEP); 2350 2351 num_vports = md_scan_dag(mdp, vd_nodep, 2352 md_find_name(mdp, VDC_MD_PORT_NAME), 2353 md_find_name(mdp, "fwd"), portp); 2354 if (num_vports == 0) { 2355 DMSGX(0, "Found no '%s' node for '%s' port\n", 2356 VDC_MD_PORT_NAME, VDC_MD_VDEV_NAME); 2357 status = ENOENT; 2358 goto done; 2359 } 2360 2361 DMSGX(1, "Found %d '%s' node(s) for '%s' port\n", 2362 num_vports, VDC_MD_PORT_NAME, VDC_MD_VDEV_NAME); 2363 2364 vdc->num_servers = 0; 2365 for (idx = 0; idx < num_vports; idx++) { 2366 2367 /* initialize this port */ 2368 vd_port = portp[idx]; 2369 srvr = kmem_zalloc(sizeof (vdc_server_t), KM_SLEEP); 2370 srvr->vdcp = vdc; 2371 2372 /* get port id */ 2373 if (md_get_prop_val(mdp, vd_port, VDC_MD_ID, &srvr->id) != 0) { 2374 cmn_err(CE_NOTE, "vDisk port '%s' property not found", 2375 VDC_MD_ID); 2376 kmem_free(srvr, sizeof (vdc_server_t)); 2377 continue; 2378 } 2379 2380 /* set the connection timeout */ 2381 if (md_get_prop_val(mdp, vd_port, VDC_MD_TIMEOUT, 2382 &srvr->ctimeout) != 0) { 2383 srvr->ctimeout = 0; 2384 } 2385 2386 /* get the ldc id */ 2387 num_chans = md_scan_dag(mdp, vd_port, 2388 md_find_name(mdp, VDC_MD_CHAN_NAME), 2389 md_find_name(mdp, "fwd"), chanp); 2390 2391 /* expecting at least one channel */ 2392 if (num_chans <= 0) { 2393 cmn_err(CE_NOTE, "No '%s' node for '%s' port", 2394 VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME); 2395 kmem_free(srvr, sizeof (vdc_server_t)); 2396 continue; 2397 } else if (num_chans != 1) { 2398 DMSGX(0, "Expected 1 '%s' node for '%s' port, " 2399 "found %d\n", VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME, 2400 num_chans); 2401 } 2402 2403 /* 2404 * We use the first channel found (index 0), irrespective of how 2405 * many are there in total. 2406 */ 2407 if (md_get_prop_val(mdp, chanp[0], VDC_MD_ID, 2408 &srvr->ldc_id) != 0) { 2409 cmn_err(CE_NOTE, "Channel '%s' property not found", 2410 VDC_MD_ID); 2411 kmem_free(srvr, sizeof (vdc_server_t)); 2412 continue; 2413 } 2414 2415 /* 2416 * now initialise LDC channel which will be used to 2417 * communicate with this server 2418 */ 2419 if (vdc_do_ldc_init(vdc, srvr) != 0) { 2420 kmem_free(srvr, sizeof (vdc_server_t)); 2421 continue; 2422 } 2423 2424 /* add server to list */ 2425 if (prev_srvr) 2426 prev_srvr->next = srvr; 2427 else 2428 vdc->server_list = srvr; 2429 2430 prev_srvr = srvr; 2431 2432 /* inc numbers of servers */ 2433 vdc->num_servers++; 2434 } 2435 2436 /* 2437 * Adjust the max number of handshake retries to match 2438 * the number of vdisk servers. 2439 */ 2440 if (vdc_hshake_retries < vdc->num_servers) 2441 vdc_hshake_retries = vdc->num_servers; 2442 2443 /* pick first server as current server */ 2444 if (vdc->server_list != NULL) { 2445 vdc->curr_server = vdc->server_list; 2446 status = 0; 2447 } else { 2448 status = ENOENT; 2449 } 2450 2451 done: 2452 kmem_free(chanp, listsz); 2453 kmem_free(portp, listsz); 2454 return (status); 2455 } 2456 2457 2458 /* 2459 * Function: 2460 * vdc_do_ldc_up 2461 * 2462 * Description: 2463 * Bring the channel for the current server up. 2464 * 2465 * Arguments: 2466 * vdc - soft state pointer for this instance of the device driver. 2467 * 2468 * Return Code: 2469 * 0 - Success. 2470 * EINVAL - Driver is detaching / LDC error 2471 * ECONNREFUSED - Other end is not listening 2472 */ 2473 static int 2474 vdc_do_ldc_up(vdc_t *vdc) 2475 { 2476 int status; 2477 ldc_status_t ldc_state; 2478 2479 ASSERT(MUTEX_HELD(&vdc->lock)); 2480 2481 DMSG(vdc, 0, "[%d] Bringing up channel %lx\n", 2482 vdc->instance, vdc->curr_server->ldc_id); 2483 2484 if (vdc->lifecycle == VDC_LC_DETACHING) 2485 return (EINVAL); 2486 2487 if ((status = ldc_up(vdc->curr_server->ldc_handle)) != 0) { 2488 switch (status) { 2489 case ECONNREFUSED: /* listener not ready at other end */ 2490 DMSG(vdc, 0, "[%d] ldc_up(%lx,...) return %d\n", 2491 vdc->instance, vdc->curr_server->ldc_id, status); 2492 status = 0; 2493 break; 2494 default: 2495 DMSG(vdc, 0, "[%d] Failed to bring up LDC: " 2496 "channel=%ld, err=%d", vdc->instance, 2497 vdc->curr_server->ldc_id, status); 2498 break; 2499 } 2500 } 2501 2502 if (ldc_status(vdc->curr_server->ldc_handle, &ldc_state) == 0) { 2503 vdc->curr_server->ldc_state = ldc_state; 2504 if (ldc_state == LDC_UP) { 2505 DMSG(vdc, 0, "[%d] LDC channel already up\n", 2506 vdc->instance); 2507 vdc->seq_num = 1; 2508 vdc->seq_num_reply = 0; 2509 } 2510 } 2511 2512 return (status); 2513 } 2514 2515 /* 2516 * Function: 2517 * vdc_terminate_ldc() 2518 * 2519 * Description: 2520 * 2521 * Arguments: 2522 * vdc - soft state pointer for this instance of the device driver. 2523 * srvr - vdc per-server info structure 2524 * 2525 * Return Code: 2526 * None 2527 */ 2528 static void 2529 vdc_terminate_ldc(vdc_t *vdc, vdc_server_t *srvr) 2530 { 2531 int instance = ddi_get_instance(vdc->dip); 2532 2533 if (srvr->state & VDC_LDC_OPEN) { 2534 DMSG(vdc, 0, "[%d] ldc_close()\n", instance); 2535 (void) ldc_close(srvr->ldc_handle); 2536 } 2537 if (srvr->state & VDC_LDC_CB) { 2538 DMSG(vdc, 0, "[%d] ldc_unreg_callback()\n", instance); 2539 (void) ldc_unreg_callback(srvr->ldc_handle); 2540 } 2541 if (srvr->state & VDC_LDC_INIT) { 2542 DMSG(vdc, 0, "[%d] ldc_fini()\n", instance); 2543 (void) ldc_fini(srvr->ldc_handle); 2544 srvr->ldc_handle = NULL; 2545 } 2546 2547 srvr->state &= ~(VDC_LDC_INIT | VDC_LDC_CB | VDC_LDC_OPEN); 2548 } 2549 2550 /* 2551 * Function: 2552 * vdc_fini_ports() 2553 * 2554 * Description: 2555 * Finalize all ports by closing the channel associated with each 2556 * port and also freeing the server structure. 2557 * 2558 * Arguments: 2559 * vdc - soft state pointer for this instance of the device driver. 2560 * 2561 * Return Code: 2562 * None 2563 */ 2564 static void 2565 vdc_fini_ports(vdc_t *vdc) 2566 { 2567 int instance = ddi_get_instance(vdc->dip); 2568 vdc_server_t *srvr, *prev_srvr; 2569 2570 ASSERT(vdc != NULL); 2571 ASSERT(mutex_owned(&vdc->lock)); 2572 2573 DMSG(vdc, 0, "[%d] initialized=%x\n", instance, vdc->initialized); 2574 2575 srvr = vdc->server_list; 2576 2577 while (srvr) { 2578 2579 vdc_terminate_ldc(vdc, srvr); 2580 2581 /* next server */ 2582 prev_srvr = srvr; 2583 srvr = srvr->next; 2584 2585 /* free server */ 2586 kmem_free(prev_srvr, sizeof (vdc_server_t)); 2587 } 2588 2589 vdc->server_list = NULL; 2590 } 2591 2592 /* -------------------------------------------------------------------------- */ 2593 2594 /* 2595 * Descriptor Ring helper routines 2596 */ 2597 2598 /* 2599 * Function: 2600 * vdc_init_descriptor_ring() 2601 * 2602 * Description: 2603 * 2604 * Arguments: 2605 * vdc - soft state pointer for this instance of the device driver. 2606 * 2607 * Return Code: 2608 * 0 - Success 2609 */ 2610 static int 2611 vdc_init_descriptor_ring(vdc_t *vdc) 2612 { 2613 vd_dring_entry_t *dep = NULL; /* DRing Entry pointer */ 2614 int status = 0; 2615 int i; 2616 2617 DMSG(vdc, 0, "[%d] initialized=%x\n", vdc->instance, vdc->initialized); 2618 2619 ASSERT(vdc != NULL); 2620 ASSERT(mutex_owned(&vdc->lock)); 2621 2622 /* ensure we have enough room to store max sized block */ 2623 ASSERT(maxphys <= VD_MAX_BLOCK_SIZE); 2624 2625 if ((vdc->initialized & VDC_DRING_INIT) == 0) { 2626 DMSG(vdc, 0, "[%d] ldc_mem_dring_create\n", vdc->instance); 2627 /* 2628 * Calculate the maximum block size we can transmit using one 2629 * Descriptor Ring entry from the attributes returned by the 2630 * vDisk server. This is subject to a minimum of 'maxphys' 2631 * as we do not have the capability to split requests over 2632 * multiple DRing entries. 2633 */ 2634 if ((vdc->max_xfer_sz * vdc->vdisk_bsize) < maxphys) { 2635 DMSG(vdc, 0, "[%d] using minimum DRing size\n", 2636 vdc->instance); 2637 vdc->dring_max_cookies = maxphys / PAGESIZE; 2638 } else { 2639 vdc->dring_max_cookies = 2640 (vdc->max_xfer_sz * vdc->vdisk_bsize) / PAGESIZE; 2641 } 2642 vdc->dring_entry_size = (sizeof (vd_dring_entry_t) + 2643 (sizeof (ldc_mem_cookie_t) * 2644 (vdc->dring_max_cookies - 1))); 2645 vdc->dring_len = VD_DRING_LEN; 2646 2647 status = ldc_mem_dring_create(vdc->dring_len, 2648 vdc->dring_entry_size, &vdc->dring_hdl); 2649 if ((vdc->dring_hdl == NULL) || (status != 0)) { 2650 DMSG(vdc, 0, "[%d] Descriptor ring creation failed", 2651 vdc->instance); 2652 return (status); 2653 } 2654 vdc->initialized |= VDC_DRING_INIT; 2655 } 2656 2657 if ((vdc->initialized & VDC_DRING_BOUND) == 0) { 2658 DMSG(vdc, 0, "[%d] ldc_mem_dring_bind\n", vdc->instance); 2659 vdc->dring_cookie = 2660 kmem_zalloc(sizeof (ldc_mem_cookie_t), KM_SLEEP); 2661 2662 status = ldc_mem_dring_bind(vdc->curr_server->ldc_handle, 2663 vdc->dring_hdl, 2664 LDC_SHADOW_MAP|LDC_DIRECT_MAP, LDC_MEM_RW, 2665 &vdc->dring_cookie[0], 2666 &vdc->dring_cookie_count); 2667 if (status != 0) { 2668 DMSG(vdc, 0, "[%d] Failed to bind descriptor ring " 2669 "(%lx) to channel (%lx) status=%d\n", 2670 vdc->instance, vdc->dring_hdl, 2671 vdc->curr_server->ldc_handle, status); 2672 return (status); 2673 } 2674 ASSERT(vdc->dring_cookie_count == 1); 2675 vdc->initialized |= VDC_DRING_BOUND; 2676 } 2677 2678 status = ldc_mem_dring_info(vdc->dring_hdl, &vdc->dring_mem_info); 2679 if (status != 0) { 2680 DMSG(vdc, 0, 2681 "[%d] Failed to get info for descriptor ring (%lx)\n", 2682 vdc->instance, vdc->dring_hdl); 2683 return (status); 2684 } 2685 2686 if ((vdc->initialized & VDC_DRING_LOCAL) == 0) { 2687 DMSG(vdc, 0, "[%d] local dring\n", vdc->instance); 2688 2689 /* Allocate the local copy of this dring */ 2690 vdc->local_dring = 2691 kmem_zalloc(vdc->dring_len * sizeof (vdc_local_desc_t), 2692 KM_SLEEP); 2693 vdc->initialized |= VDC_DRING_LOCAL; 2694 } 2695 2696 /* 2697 * Mark all DRing entries as free and initialize the private 2698 * descriptor's memory handles. If any entry is initialized, 2699 * we need to free it later so we set the bit in 'initialized' 2700 * at the start. 2701 */ 2702 vdc->initialized |= VDC_DRING_ENTRY; 2703 for (i = 0; i < vdc->dring_len; i++) { 2704 dep = VDC_GET_DRING_ENTRY_PTR(vdc, i); 2705 dep->hdr.dstate = VIO_DESC_FREE; 2706 2707 status = ldc_mem_alloc_handle(vdc->curr_server->ldc_handle, 2708 &vdc->local_dring[i].desc_mhdl); 2709 if (status != 0) { 2710 DMSG(vdc, 0, "![%d] Failed to alloc mem handle for" 2711 " descriptor %d", vdc->instance, i); 2712 return (status); 2713 } 2714 vdc->local_dring[i].is_free = B_TRUE; 2715 vdc->local_dring[i].dep = dep; 2716 } 2717 2718 /* Initialize the starting index */ 2719 vdc->dring_curr_idx = 0; 2720 2721 return (status); 2722 } 2723 2724 /* 2725 * Function: 2726 * vdc_destroy_descriptor_ring() 2727 * 2728 * Description: 2729 * 2730 * Arguments: 2731 * vdc - soft state pointer for this instance of the device driver. 2732 * 2733 * Return Code: 2734 * None 2735 */ 2736 static void 2737 vdc_destroy_descriptor_ring(vdc_t *vdc) 2738 { 2739 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 2740 ldc_mem_handle_t mhdl = NULL; 2741 ldc_mem_info_t minfo; 2742 int status = -1; 2743 int i; /* loop */ 2744 2745 ASSERT(vdc != NULL); 2746 ASSERT(mutex_owned(&vdc->lock)); 2747 2748 DMSG(vdc, 0, "[%d] Entered\n", vdc->instance); 2749 2750 if (vdc->initialized & VDC_DRING_ENTRY) { 2751 DMSG(vdc, 0, 2752 "[%d] Removing Local DRing entries\n", vdc->instance); 2753 for (i = 0; i < vdc->dring_len; i++) { 2754 ldep = &vdc->local_dring[i]; 2755 mhdl = ldep->desc_mhdl; 2756 2757 if (mhdl == NULL) 2758 continue; 2759 2760 if ((status = ldc_mem_info(mhdl, &minfo)) != 0) { 2761 DMSG(vdc, 0, 2762 "ldc_mem_info returned an error: %d\n", 2763 status); 2764 2765 /* 2766 * This must mean that the mem handle 2767 * is not valid. Clear it out so that 2768 * no one tries to use it. 2769 */ 2770 ldep->desc_mhdl = NULL; 2771 continue; 2772 } 2773 2774 if (minfo.status == LDC_BOUND) { 2775 (void) ldc_mem_unbind_handle(mhdl); 2776 } 2777 2778 (void) ldc_mem_free_handle(mhdl); 2779 2780 ldep->desc_mhdl = NULL; 2781 } 2782 vdc->initialized &= ~VDC_DRING_ENTRY; 2783 } 2784 2785 if (vdc->initialized & VDC_DRING_LOCAL) { 2786 DMSG(vdc, 0, "[%d] Freeing Local DRing\n", vdc->instance); 2787 kmem_free(vdc->local_dring, 2788 vdc->dring_len * sizeof (vdc_local_desc_t)); 2789 vdc->initialized &= ~VDC_DRING_LOCAL; 2790 } 2791 2792 if (vdc->initialized & VDC_DRING_BOUND) { 2793 DMSG(vdc, 0, "[%d] Unbinding DRing\n", vdc->instance); 2794 status = ldc_mem_dring_unbind(vdc->dring_hdl); 2795 if (status == 0) { 2796 vdc->initialized &= ~VDC_DRING_BOUND; 2797 } else { 2798 DMSG(vdc, 0, "[%d] Error %d unbinding DRing %lx", 2799 vdc->instance, status, vdc->dring_hdl); 2800 } 2801 kmem_free(vdc->dring_cookie, sizeof (ldc_mem_cookie_t)); 2802 } 2803 2804 if (vdc->initialized & VDC_DRING_INIT) { 2805 DMSG(vdc, 0, "[%d] Destroying DRing\n", vdc->instance); 2806 status = ldc_mem_dring_destroy(vdc->dring_hdl); 2807 if (status == 0) { 2808 vdc->dring_hdl = NULL; 2809 bzero(&vdc->dring_mem_info, sizeof (ldc_mem_info_t)); 2810 vdc->initialized &= ~VDC_DRING_INIT; 2811 } else { 2812 DMSG(vdc, 0, "[%d] Error %d destroying DRing (%lx)", 2813 vdc->instance, status, vdc->dring_hdl); 2814 } 2815 } 2816 } 2817 2818 /* 2819 * Function: 2820 * vdc_map_to_shared_dring() 2821 * 2822 * Description: 2823 * Copy contents of the local descriptor to the shared 2824 * memory descriptor. 2825 * 2826 * Arguments: 2827 * vdcp - soft state pointer for this instance of the device driver. 2828 * idx - descriptor ring index 2829 * 2830 * Return Code: 2831 * None 2832 */ 2833 static int 2834 vdc_map_to_shared_dring(vdc_t *vdcp, int idx) 2835 { 2836 vdc_local_desc_t *ldep; 2837 vd_dring_entry_t *dep; 2838 int rv; 2839 2840 ldep = &(vdcp->local_dring[idx]); 2841 2842 /* for now leave in the old pop_mem_hdl stuff */ 2843 if (ldep->nbytes > 0) { 2844 rv = vdc_populate_mem_hdl(vdcp, ldep); 2845 if (rv) { 2846 DMSG(vdcp, 0, "[%d] Cannot populate mem handle\n", 2847 vdcp->instance); 2848 return (rv); 2849 } 2850 } 2851 2852 /* 2853 * fill in the data details into the DRing 2854 */ 2855 dep = ldep->dep; 2856 ASSERT(dep != NULL); 2857 2858 dep->payload.req_id = VDC_GET_NEXT_REQ_ID(vdcp); 2859 dep->payload.operation = ldep->operation; 2860 dep->payload.addr = ldep->offset; 2861 dep->payload.nbytes = ldep->nbytes; 2862 dep->payload.status = (uint32_t)-1; /* vds will set valid value */ 2863 dep->payload.slice = ldep->slice; 2864 dep->hdr.dstate = VIO_DESC_READY; 2865 dep->hdr.ack = 1; /* request an ACK for every message */ 2866 2867 return (0); 2868 } 2869 2870 /* 2871 * Function: 2872 * vdc_send_request 2873 * 2874 * Description: 2875 * This routine writes the data to be transmitted to vds into the 2876 * descriptor, notifies vds that the ring has been updated and 2877 * then waits for the request to be processed. 2878 * 2879 * Arguments: 2880 * vdcp - the soft state pointer 2881 * operation - operation we want vds to perform (VD_OP_XXX) 2882 * addr - address of data buf to be read/written. 2883 * nbytes - number of bytes to read/write 2884 * slice - the disk slice this request is for 2885 * offset - relative disk offset 2886 * cb_type - type of call - STRATEGY or SYNC 2887 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 2888 * . mode for ioctl(9e) 2889 * . LP64 diskaddr_t (block I/O) 2890 * dir - direction of operation (READ/WRITE/BOTH) 2891 * 2892 * Return Codes: 2893 * 0 2894 * ENXIO 2895 */ 2896 static int 2897 vdc_send_request(vdc_t *vdcp, int operation, caddr_t addr, 2898 size_t nbytes, int slice, diskaddr_t offset, int cb_type, 2899 void *cb_arg, vio_desc_direction_t dir) 2900 { 2901 int rv = 0; 2902 2903 ASSERT(vdcp != NULL); 2904 ASSERT(slice == VD_SLICE_NONE || slice < V_NUMPAR); 2905 2906 mutex_enter(&vdcp->lock); 2907 2908 /* 2909 * If this is a block read/write operation we update the I/O statistics 2910 * to indicate that the request is being put on the waitq to be 2911 * serviced. 2912 * 2913 * We do it here (a common routine for both synchronous and strategy 2914 * calls) for performance reasons - we are already holding vdc->lock 2915 * so there is no extra locking overhead. We would have to explicitly 2916 * grab the 'lock' mutex to update the stats if we were to do this 2917 * higher up the stack in vdc_strategy() et. al. 2918 */ 2919 if ((operation == VD_OP_BREAD) || (operation == VD_OP_BWRITE)) { 2920 DTRACE_IO1(start, buf_t *, cb_arg); 2921 VD_KSTAT_WAITQ_ENTER(vdcp); 2922 } 2923 2924 do { 2925 while (vdcp->state != VDC_STATE_RUNNING) { 2926 2927 /* return error if detaching */ 2928 if (vdcp->state == VDC_STATE_DETACH) { 2929 rv = ENXIO; 2930 goto done; 2931 } 2932 2933 /* fail request if connection timeout is reached */ 2934 if (vdcp->ctimeout_reached) { 2935 rv = EIO; 2936 goto done; 2937 } 2938 2939 /* 2940 * If we are panicking and the disk is not ready then 2941 * we can't send any request because we can't complete 2942 * the handshake now. 2943 */ 2944 if (ddi_in_panic()) { 2945 rv = EIO; 2946 goto done; 2947 } 2948 2949 cv_wait(&vdcp->running_cv, &vdcp->lock); 2950 } 2951 2952 } while (vdc_populate_descriptor(vdcp, operation, addr, 2953 nbytes, slice, offset, cb_type, cb_arg, dir)); 2954 2955 done: 2956 /* 2957 * If this is a block read/write we update the I/O statistics kstat 2958 * to indicate that this request has been placed on the queue for 2959 * processing (i.e sent to the vDisk server) - iostat(1M) will 2960 * report the time waiting for the vDisk server under the %b column 2961 * In the case of an error we simply take it off the wait queue. 2962 */ 2963 if ((operation == VD_OP_BREAD) || (operation == VD_OP_BWRITE)) { 2964 if (rv == 0) { 2965 VD_KSTAT_WAITQ_TO_RUNQ(vdcp); 2966 DTRACE_PROBE1(send, buf_t *, cb_arg); 2967 } else { 2968 VD_UPDATE_ERR_STATS(vdcp, vd_transerrs); 2969 VD_KSTAT_WAITQ_EXIT(vdcp); 2970 DTRACE_IO1(done, buf_t *, cb_arg); 2971 } 2972 } 2973 2974 mutex_exit(&vdcp->lock); 2975 2976 return (rv); 2977 } 2978 2979 2980 /* 2981 * Function: 2982 * vdc_populate_descriptor 2983 * 2984 * Description: 2985 * This routine writes the data to be transmitted to vds into the 2986 * descriptor, notifies vds that the ring has been updated and 2987 * then waits for the request to be processed. 2988 * 2989 * Arguments: 2990 * vdcp - the soft state pointer 2991 * operation - operation we want vds to perform (VD_OP_XXX) 2992 * addr - address of data buf to be read/written. 2993 * nbytes - number of bytes to read/write 2994 * slice - the disk slice this request is for 2995 * offset - relative disk offset 2996 * cb_type - type of call - STRATEGY or SYNC 2997 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 2998 * . mode for ioctl(9e) 2999 * . LP64 diskaddr_t (block I/O) 3000 * dir - direction of operation (READ/WRITE/BOTH) 3001 * 3002 * Return Codes: 3003 * 0 3004 * EAGAIN 3005 * ECONNRESET 3006 * ENXIO 3007 */ 3008 static int 3009 vdc_populate_descriptor(vdc_t *vdcp, int operation, caddr_t addr, 3010 size_t nbytes, int slice, diskaddr_t offset, int cb_type, 3011 void *cb_arg, vio_desc_direction_t dir) 3012 { 3013 vdc_local_desc_t *local_dep = NULL; /* Local Dring Pointer */ 3014 int idx; /* Index of DRing entry used */ 3015 int next_idx; 3016 vio_dring_msg_t dmsg; 3017 size_t msglen; 3018 int rv; 3019 3020 ASSERT(MUTEX_HELD(&vdcp->lock)); 3021 vdcp->threads_pending++; 3022 loop: 3023 DMSG(vdcp, 2, ": dring_curr_idx = %d\n", vdcp->dring_curr_idx); 3024 3025 /* Get next available D-Ring entry */ 3026 idx = vdcp->dring_curr_idx; 3027 local_dep = &(vdcp->local_dring[idx]); 3028 3029 if (!local_dep->is_free) { 3030 DMSG(vdcp, 2, "[%d]: dring full - waiting for space\n", 3031 vdcp->instance); 3032 cv_wait(&vdcp->dring_free_cv, &vdcp->lock); 3033 if (vdcp->state == VDC_STATE_RUNNING || 3034 vdcp->state == VDC_STATE_HANDLE_PENDING) { 3035 goto loop; 3036 } 3037 vdcp->threads_pending--; 3038 return (ECONNRESET); 3039 } 3040 3041 next_idx = idx + 1; 3042 if (next_idx >= vdcp->dring_len) 3043 next_idx = 0; 3044 vdcp->dring_curr_idx = next_idx; 3045 3046 ASSERT(local_dep->is_free); 3047 3048 local_dep->operation = operation; 3049 local_dep->addr = addr; 3050 local_dep->nbytes = nbytes; 3051 local_dep->slice = slice; 3052 local_dep->offset = offset; 3053 local_dep->cb_type = cb_type; 3054 local_dep->cb_arg = cb_arg; 3055 local_dep->dir = dir; 3056 3057 local_dep->is_free = B_FALSE; 3058 3059 rv = vdc_map_to_shared_dring(vdcp, idx); 3060 if (rv) { 3061 DMSG(vdcp, 0, "[%d]: cannot bind memory - waiting ..\n", 3062 vdcp->instance); 3063 /* free the descriptor */ 3064 local_dep->is_free = B_TRUE; 3065 vdcp->dring_curr_idx = idx; 3066 cv_wait(&vdcp->membind_cv, &vdcp->lock); 3067 if (vdcp->state == VDC_STATE_RUNNING || 3068 vdcp->state == VDC_STATE_HANDLE_PENDING) { 3069 goto loop; 3070 } 3071 vdcp->threads_pending--; 3072 return (ECONNRESET); 3073 } 3074 3075 /* 3076 * Send a msg with the DRing details to vds 3077 */ 3078 VIO_INIT_DRING_DATA_TAG(dmsg); 3079 VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdcp); 3080 dmsg.dring_ident = vdcp->dring_ident; 3081 dmsg.start_idx = idx; 3082 dmsg.end_idx = idx; 3083 vdcp->seq_num++; 3084 3085 DTRACE_PROBE2(populate, int, vdcp->instance, 3086 vdc_local_desc_t *, local_dep); 3087 DMSG(vdcp, 2, "ident=0x%lx, st=%u, end=%u, seq=%ld\n", 3088 vdcp->dring_ident, dmsg.start_idx, dmsg.end_idx, dmsg.seq_num); 3089 3090 /* 3091 * note we're still holding the lock here to 3092 * make sure the message goes out in order !!!... 3093 */ 3094 msglen = sizeof (dmsg); 3095 rv = vdc_send(vdcp, (caddr_t)&dmsg, &msglen); 3096 switch (rv) { 3097 case ECONNRESET: 3098 /* 3099 * vdc_send initiates the reset on failure. 3100 * Since the transaction has already been put 3101 * on the local dring, it will automatically get 3102 * retried when the channel is reset. Given that, 3103 * it is ok to just return success even though the 3104 * send failed. 3105 */ 3106 rv = 0; 3107 break; 3108 3109 case 0: /* EOK */ 3110 DMSG(vdcp, 1, "sent via LDC: rv=%d\n", rv); 3111 break; 3112 3113 default: 3114 goto cleanup_and_exit; 3115 } 3116 3117 vdcp->threads_pending--; 3118 return (rv); 3119 3120 cleanup_and_exit: 3121 DMSG(vdcp, 0, "unexpected error, rv=%d\n", rv); 3122 return (ENXIO); 3123 } 3124 3125 /* 3126 * Function: 3127 * vdc_do_sync_op 3128 * 3129 * Description: 3130 * Wrapper around vdc_populate_descriptor that blocks until the 3131 * response to the message is available. 3132 * 3133 * Arguments: 3134 * vdcp - the soft state pointer 3135 * operation - operation we want vds to perform (VD_OP_XXX) 3136 * addr - address of data buf to be read/written. 3137 * nbytes - number of bytes to read/write 3138 * slice - the disk slice this request is for 3139 * offset - relative disk offset 3140 * cb_type - type of call - STRATEGY or SYNC 3141 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 3142 * . mode for ioctl(9e) 3143 * . LP64 diskaddr_t (block I/O) 3144 * dir - direction of operation (READ/WRITE/BOTH) 3145 * rconflict - check for reservation conflict in case of failure 3146 * 3147 * rconflict should be set to B_TRUE by most callers. Callers invoking the 3148 * VD_OP_SCSICMD operation can set rconflict to B_FALSE if they check the 3149 * result of a successful operation with vd_scsi_status(). 3150 * 3151 * Return Codes: 3152 * 0 3153 * EAGAIN 3154 * EFAULT 3155 * ENXIO 3156 * EIO 3157 */ 3158 static int 3159 vdc_do_sync_op(vdc_t *vdcp, int operation, caddr_t addr, size_t nbytes, 3160 int slice, diskaddr_t offset, int cb_type, void *cb_arg, 3161 vio_desc_direction_t dir, boolean_t rconflict) 3162 { 3163 int status; 3164 vdc_io_t *vio; 3165 boolean_t check_resv_conflict = B_FALSE; 3166 3167 ASSERT(cb_type == CB_SYNC); 3168 3169 /* 3170 * Grab the lock, if blocked wait until the server 3171 * response causes us to wake up again. 3172 */ 3173 mutex_enter(&vdcp->lock); 3174 vdcp->sync_op_cnt++; 3175 while (vdcp->sync_op_blocked && vdcp->state != VDC_STATE_DETACH) { 3176 if (ddi_in_panic()) { 3177 /* don't block if we are panicking */ 3178 vdcp->sync_op_cnt--; 3179 mutex_exit(&vdcp->lock); 3180 return (EIO); 3181 } else { 3182 cv_wait(&vdcp->sync_blocked_cv, &vdcp->lock); 3183 } 3184 } 3185 3186 if (vdcp->state == VDC_STATE_DETACH) { 3187 cv_broadcast(&vdcp->sync_blocked_cv); 3188 vdcp->sync_op_cnt--; 3189 mutex_exit(&vdcp->lock); 3190 return (ENXIO); 3191 } 3192 3193 /* now block anyone other thread entering after us */ 3194 vdcp->sync_op_blocked = B_TRUE; 3195 vdcp->sync_op_pending = B_TRUE; 3196 mutex_exit(&vdcp->lock); 3197 3198 status = vdc_send_request(vdcp, operation, addr, 3199 nbytes, slice, offset, cb_type, cb_arg, dir); 3200 3201 mutex_enter(&vdcp->lock); 3202 3203 if (status != 0) { 3204 vdcp->sync_op_pending = B_FALSE; 3205 } else if (ddi_in_panic()) { 3206 if (vdc_drain_response(vdcp, CB_SYNC, NULL) == 0) { 3207 status = vdcp->sync_op_status; 3208 } else { 3209 vdcp->sync_op_pending = B_FALSE; 3210 status = EIO; 3211 } 3212 } else { 3213 /* 3214 * block until our transaction completes. 3215 * Also anyone else waiting also gets to go next. 3216 */ 3217 while (vdcp->sync_op_pending && vdcp->state != VDC_STATE_DETACH) 3218 cv_wait(&vdcp->sync_pending_cv, &vdcp->lock); 3219 3220 DMSG(vdcp, 2, ": operation returned %d\n", 3221 vdcp->sync_op_status); 3222 if (vdcp->state == VDC_STATE_DETACH) { 3223 vdcp->sync_op_pending = B_FALSE; 3224 status = ENXIO; 3225 } else { 3226 status = vdcp->sync_op_status; 3227 if (status != 0 && vdcp->failfast_interval != 0) { 3228 /* 3229 * Operation has failed and failfast is enabled. 3230 * We need to check if the failure is due to a 3231 * reservation conflict if this was requested. 3232 */ 3233 check_resv_conflict = rconflict; 3234 } 3235 3236 } 3237 } 3238 3239 vdcp->sync_op_status = 0; 3240 vdcp->sync_op_blocked = B_FALSE; 3241 vdcp->sync_op_cnt--; 3242 3243 /* signal the next waiting thread */ 3244 cv_signal(&vdcp->sync_blocked_cv); 3245 3246 /* 3247 * We have to check for reservation conflict after unblocking sync 3248 * operations because some sync operations will be used to do this 3249 * check. 3250 */ 3251 if (check_resv_conflict) { 3252 vio = vdc_failfast_io_queue(vdcp, NULL); 3253 while (vio->vio_qtime != 0) 3254 cv_wait(&vdcp->failfast_io_cv, &vdcp->lock); 3255 kmem_free(vio, sizeof (vdc_io_t)); 3256 } 3257 3258 mutex_exit(&vdcp->lock); 3259 3260 return (status); 3261 } 3262 3263 3264 /* 3265 * Function: 3266 * vdc_drain_response() 3267 * 3268 * Description: 3269 * When a guest is panicking, the completion of requests needs to be 3270 * handled differently because interrupts are disabled and vdc 3271 * will not get messages. We have to poll for the messages instead. 3272 * 3273 * Note: since we are panicking we don't implement the io:::done 3274 * DTrace probe or update the I/O statistics kstats. 3275 * 3276 * Arguments: 3277 * vdc - soft state pointer for this instance of the device driver. 3278 * cb_type - the type of request we want to drain. If type is CB_SYNC 3279 * then we drain all responses until we find a CB_SYNC request. 3280 * If the type is CB_STRATEGY then the behavior depends on the 3281 * value of the buf argument. 3282 * buf - if the cb_type argument is CB_SYNC then the buf argument 3283 * must be NULL. If the cb_type argument is CB_STRATEGY and 3284 * if buf is NULL then we drain all responses, otherwise we 3285 * poll until we receive a ACK/NACK for the specific I/O 3286 * described by buf. 3287 * 3288 * Return Code: 3289 * 0 - Success. If we were expecting a response to a particular 3290 * CB_SYNC or CB_STRATEGY request then this means that a 3291 * response has been received. 3292 */ 3293 static int 3294 vdc_drain_response(vdc_t *vdc, vio_cb_type_t cb_type, struct buf *buf) 3295 { 3296 int rv, idx, retries; 3297 size_t msglen; 3298 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 3299 vio_dring_msg_t dmsg; 3300 struct buf *mbuf; 3301 boolean_t ack; 3302 3303 ASSERT(cb_type == CB_STRATEGY || cb_type == CB_SYNC); 3304 3305 mutex_enter(&vdc->lock); 3306 3307 retries = 0; 3308 for (;;) { 3309 msglen = sizeof (dmsg); 3310 rv = ldc_read(vdc->curr_server->ldc_handle, (caddr_t)&dmsg, 3311 &msglen); 3312 if (rv) { 3313 rv = EINVAL; 3314 break; 3315 } 3316 3317 /* 3318 * if there are no packets wait and check again 3319 */ 3320 if ((rv == 0) && (msglen == 0)) { 3321 if (retries++ > vdc_dump_retries) { 3322 rv = EAGAIN; 3323 break; 3324 } 3325 3326 drv_usecwait(vdc_usec_timeout_dump); 3327 continue; 3328 } 3329 3330 /* 3331 * Ignore all messages that are not ACKs/NACKs to 3332 * DRing requests. 3333 */ 3334 if ((dmsg.tag.vio_msgtype != VIO_TYPE_DATA) || 3335 (dmsg.tag.vio_subtype_env != VIO_DRING_DATA)) { 3336 DMSG(vdc, 0, "discard pkt: type=%d sub=%d env=%d\n", 3337 dmsg.tag.vio_msgtype, 3338 dmsg.tag.vio_subtype, 3339 dmsg.tag.vio_subtype_env); 3340 continue; 3341 } 3342 3343 /* 3344 * Record if the packet was ACK'ed or not. If the packet was not 3345 * ACK'ed then we will just mark the request as failed; we don't 3346 * want to reset the connection at this point. 3347 */ 3348 switch (dmsg.tag.vio_subtype) { 3349 case VIO_SUBTYPE_ACK: 3350 ack = B_TRUE; 3351 break; 3352 case VIO_SUBTYPE_NACK: 3353 ack = B_FALSE; 3354 break; 3355 default: 3356 continue; 3357 } 3358 3359 idx = dmsg.start_idx; 3360 if (idx >= vdc->dring_len) { 3361 DMSG(vdc, 0, "[%d] Bogus ack data : start %d\n", 3362 vdc->instance, idx); 3363 continue; 3364 } 3365 ldep = &vdc->local_dring[idx]; 3366 if (ldep->dep->hdr.dstate != VIO_DESC_DONE) { 3367 DMSG(vdc, 0, "[%d] Entry @ %d - state !DONE %d\n", 3368 vdc->instance, idx, ldep->dep->hdr.dstate); 3369 continue; 3370 } 3371 3372 switch (ldep->cb_type) { 3373 3374 case CB_STRATEGY: 3375 mbuf = ldep->cb_arg; 3376 if (mbuf != NULL) { 3377 mbuf->b_resid = mbuf->b_bcount - 3378 ldep->dep->payload.nbytes; 3379 bioerror(mbuf, 3380 ack ? ldep->dep->payload.status : EIO); 3381 biodone(mbuf); 3382 } 3383 rv = vdc_depopulate_descriptor(vdc, idx); 3384 if (buf != NULL && buf == mbuf) { 3385 rv = 0; 3386 goto done; 3387 } 3388 break; 3389 3390 case CB_SYNC: 3391 rv = vdc_depopulate_descriptor(vdc, idx); 3392 vdc->sync_op_status = ack ? rv : EIO; 3393 vdc->sync_op_pending = B_FALSE; 3394 cv_signal(&vdc->sync_pending_cv); 3395 if (cb_type == CB_SYNC) { 3396 rv = 0; 3397 goto done; 3398 } 3399 break; 3400 } 3401 3402 /* if this is the last descriptor - break out of loop */ 3403 if ((idx + 1) % vdc->dring_len == vdc->dring_curr_idx) { 3404 /* 3405 * If we were expecting a response for a particular 3406 * request then we return with an error otherwise we 3407 * have successfully completed the drain. 3408 */ 3409 rv = (buf != NULL || cb_type == CB_SYNC)? ESRCH: 0; 3410 break; 3411 } 3412 } 3413 3414 done: 3415 mutex_exit(&vdc->lock); 3416 DMSG(vdc, 0, "End idx=%d\n", idx); 3417 3418 return (rv); 3419 } 3420 3421 3422 /* 3423 * Function: 3424 * vdc_depopulate_descriptor() 3425 * 3426 * Description: 3427 * 3428 * Arguments: 3429 * vdc - soft state pointer for this instance of the device driver. 3430 * idx - Index of the Descriptor Ring entry being modified 3431 * 3432 * Return Code: 3433 * 0 - Success 3434 */ 3435 static int 3436 vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx) 3437 { 3438 vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 3439 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 3440 int status = ENXIO; 3441 int rv = 0; 3442 3443 ASSERT(vdc != NULL); 3444 ASSERT(idx < vdc->dring_len); 3445 ldep = &vdc->local_dring[idx]; 3446 ASSERT(ldep != NULL); 3447 ASSERT(MUTEX_HELD(&vdc->lock)); 3448 3449 DTRACE_PROBE2(depopulate, int, vdc->instance, vdc_local_desc_t *, ldep); 3450 DMSG(vdc, 2, ": idx = %d\n", idx); 3451 3452 dep = ldep->dep; 3453 ASSERT(dep != NULL); 3454 ASSERT((dep->hdr.dstate == VIO_DESC_DONE) || 3455 (dep->payload.status == ECANCELED)); 3456 3457 VDC_MARK_DRING_ENTRY_FREE(vdc, idx); 3458 3459 ldep->is_free = B_TRUE; 3460 status = dep->payload.status; 3461 DMSG(vdc, 2, ": is_free = %d : status = %d\n", ldep->is_free, status); 3462 3463 /* 3464 * If no buffers were used to transfer information to the server when 3465 * populating the descriptor then no memory handles need to be unbound 3466 * and we can return now. 3467 */ 3468 if (ldep->nbytes == 0) { 3469 cv_signal(&vdc->dring_free_cv); 3470 return (status); 3471 } 3472 3473 /* 3474 * If the upper layer passed in a misaligned address we copied the 3475 * data into an aligned buffer before sending it to LDC - we now 3476 * copy it back to the original buffer. 3477 */ 3478 if (ldep->align_addr) { 3479 ASSERT(ldep->addr != NULL); 3480 3481 if (dep->payload.nbytes > 0) 3482 bcopy(ldep->align_addr, ldep->addr, 3483 dep->payload.nbytes); 3484 kmem_free(ldep->align_addr, 3485 sizeof (caddr_t) * P2ROUNDUP(ldep->nbytes, 8)); 3486 ldep->align_addr = NULL; 3487 } 3488 3489 rv = ldc_mem_unbind_handle(ldep->desc_mhdl); 3490 if (rv != 0) { 3491 DMSG(vdc, 0, "?[%d] unbind mhdl 0x%lx @ idx %d failed (%d)", 3492 vdc->instance, ldep->desc_mhdl, idx, rv); 3493 /* 3494 * The error returned by the vDisk server is more informative 3495 * and thus has a higher priority but if it isn't set we ensure 3496 * that this function returns an error. 3497 */ 3498 if (status == 0) 3499 status = EINVAL; 3500 } 3501 3502 cv_signal(&vdc->membind_cv); 3503 cv_signal(&vdc->dring_free_cv); 3504 3505 return (status); 3506 } 3507 3508 /* 3509 * Function: 3510 * vdc_populate_mem_hdl() 3511 * 3512 * Description: 3513 * 3514 * Arguments: 3515 * vdc - soft state pointer for this instance of the device driver. 3516 * idx - Index of the Descriptor Ring entry being modified 3517 * addr - virtual address being mapped in 3518 * nybtes - number of bytes in 'addr' 3519 * operation - the vDisk operation being performed (VD_OP_xxx) 3520 * 3521 * Return Code: 3522 * 0 - Success 3523 */ 3524 static int 3525 vdc_populate_mem_hdl(vdc_t *vdcp, vdc_local_desc_t *ldep) 3526 { 3527 vd_dring_entry_t *dep = NULL; 3528 ldc_mem_handle_t mhdl; 3529 caddr_t vaddr; 3530 size_t nbytes; 3531 uint8_t perm = LDC_MEM_RW; 3532 uint8_t maptype; 3533 int rv = 0; 3534 int i; 3535 3536 ASSERT(vdcp != NULL); 3537 3538 dep = ldep->dep; 3539 mhdl = ldep->desc_mhdl; 3540 3541 switch (ldep->dir) { 3542 case VIO_read_dir: 3543 perm = LDC_MEM_W; 3544 break; 3545 3546 case VIO_write_dir: 3547 perm = LDC_MEM_R; 3548 break; 3549 3550 case VIO_both_dir: 3551 perm = LDC_MEM_RW; 3552 break; 3553 3554 default: 3555 ASSERT(0); /* catch bad programming in vdc */ 3556 } 3557 3558 /* 3559 * LDC expects any addresses passed in to be 8-byte aligned. We need 3560 * to copy the contents of any misaligned buffers to a newly allocated 3561 * buffer and bind it instead (and copy the the contents back to the 3562 * original buffer passed in when depopulating the descriptor) 3563 */ 3564 vaddr = ldep->addr; 3565 nbytes = ldep->nbytes; 3566 if (((uint64_t)vaddr & 0x7) != 0) { 3567 ASSERT(ldep->align_addr == NULL); 3568 ldep->align_addr = 3569 kmem_alloc(sizeof (caddr_t) * 3570 P2ROUNDUP(nbytes, 8), KM_SLEEP); 3571 DMSG(vdcp, 0, "[%d] Misaligned address %p reallocating " 3572 "(buf=%p nb=%ld op=%d)\n", 3573 vdcp->instance, (void *)vaddr, (void *)ldep->align_addr, 3574 nbytes, ldep->operation); 3575 if (perm != LDC_MEM_W) 3576 bcopy(vaddr, ldep->align_addr, nbytes); 3577 vaddr = ldep->align_addr; 3578 } 3579 3580 maptype = LDC_IO_MAP|LDC_SHADOW_MAP|LDC_DIRECT_MAP; 3581 rv = ldc_mem_bind_handle(mhdl, vaddr, P2ROUNDUP(nbytes, 8), 3582 maptype, perm, &dep->payload.cookie[0], &dep->payload.ncookies); 3583 DMSG(vdcp, 2, "[%d] bound mem handle; ncookies=%d\n", 3584 vdcp->instance, dep->payload.ncookies); 3585 if (rv != 0) { 3586 DMSG(vdcp, 0, "[%d] Failed to bind LDC memory handle " 3587 "(mhdl=%p, buf=%p, err=%d)\n", 3588 vdcp->instance, (void *)mhdl, (void *)vaddr, rv); 3589 if (ldep->align_addr) { 3590 kmem_free(ldep->align_addr, 3591 sizeof (caddr_t) * P2ROUNDUP(nbytes, 8)); 3592 ldep->align_addr = NULL; 3593 } 3594 return (EAGAIN); 3595 } 3596 3597 /* 3598 * Get the other cookies (if any). 3599 */ 3600 for (i = 1; i < dep->payload.ncookies; i++) { 3601 rv = ldc_mem_nextcookie(mhdl, &dep->payload.cookie[i]); 3602 if (rv != 0) { 3603 (void) ldc_mem_unbind_handle(mhdl); 3604 DMSG(vdcp, 0, "?[%d] Failed to get next cookie " 3605 "(mhdl=%lx cnum=%d), err=%d", 3606 vdcp->instance, mhdl, i, rv); 3607 if (ldep->align_addr) { 3608 kmem_free(ldep->align_addr, 3609 sizeof (caddr_t) * ldep->nbytes); 3610 ldep->align_addr = NULL; 3611 } 3612 return (EAGAIN); 3613 } 3614 } 3615 3616 return (rv); 3617 } 3618 3619 /* 3620 * Interrupt handlers for messages from LDC 3621 */ 3622 3623 /* 3624 * Function: 3625 * vdc_handle_cb() 3626 * 3627 * Description: 3628 * 3629 * Arguments: 3630 * event - Type of event (LDC_EVT_xxx) that triggered the callback 3631 * arg - soft state pointer for this instance of the device driver. 3632 * 3633 * Return Code: 3634 * 0 - Success 3635 */ 3636 static uint_t 3637 vdc_handle_cb(uint64_t event, caddr_t arg) 3638 { 3639 ldc_status_t ldc_state; 3640 int rv = 0; 3641 vdc_server_t *srvr = (vdc_server_t *)(void *)arg; 3642 vdc_t *vdc = srvr->vdcp; 3643 3644 ASSERT(vdc != NULL); 3645 3646 DMSG(vdc, 1, "evt=%lx seqID=%ld\n", event, vdc->seq_num); 3647 3648 /* If callback is not for the current server, ignore it */ 3649 mutex_enter(&vdc->lock); 3650 3651 if (vdc->curr_server != srvr) { 3652 DMSG(vdc, 0, "[%d] Ignoring event 0x%lx for port@%ld\n", 3653 vdc->instance, event, srvr->id); 3654 mutex_exit(&vdc->lock); 3655 return (LDC_SUCCESS); 3656 } 3657 3658 /* 3659 * Depending on the type of event that triggered this callback, 3660 * we modify the handshake state or read the data. 3661 * 3662 * NOTE: not done as a switch() as event could be triggered by 3663 * a state change and a read request. Also the ordering of the 3664 * check for the event types is deliberate. 3665 */ 3666 if (event & LDC_EVT_UP) { 3667 DMSG(vdc, 0, "[%d] Received LDC_EVT_UP\n", vdc->instance); 3668 3669 /* get LDC state */ 3670 rv = ldc_status(srvr->ldc_handle, &ldc_state); 3671 if (rv != 0) { 3672 DMSG(vdc, 0, "[%d] Couldn't get LDC status %d", 3673 vdc->instance, rv); 3674 mutex_exit(&vdc->lock); 3675 return (LDC_SUCCESS); 3676 } 3677 if (srvr->ldc_state != LDC_UP && 3678 ldc_state == LDC_UP) { 3679 /* 3680 * Reset the transaction sequence numbers when 3681 * LDC comes up. We then kick off the handshake 3682 * negotiation with the vDisk server. 3683 */ 3684 vdc->seq_num = 1; 3685 vdc->seq_num_reply = 0; 3686 srvr->ldc_state = ldc_state; 3687 cv_signal(&vdc->initwait_cv); 3688 } 3689 } 3690 3691 if (event & LDC_EVT_READ) { 3692 DMSG(vdc, 1, "[%d] Received LDC_EVT_READ\n", vdc->instance); 3693 mutex_enter(&vdc->read_lock); 3694 cv_signal(&vdc->read_cv); 3695 vdc->read_state = VDC_READ_PENDING; 3696 mutex_exit(&vdc->read_lock); 3697 mutex_exit(&vdc->lock); 3698 3699 /* that's all we have to do - no need to handle DOWN/RESET */ 3700 return (LDC_SUCCESS); 3701 } 3702 3703 if (event & (LDC_EVT_RESET|LDC_EVT_DOWN)) { 3704 3705 DMSG(vdc, 0, "[%d] Received LDC RESET event\n", vdc->instance); 3706 3707 /* 3708 * Need to wake up any readers so they will 3709 * detect that a reset has occurred. 3710 */ 3711 mutex_enter(&vdc->read_lock); 3712 if ((vdc->read_state == VDC_READ_WAITING) || 3713 (vdc->read_state == VDC_READ_RESET)) 3714 cv_signal(&vdc->read_cv); 3715 vdc->read_state = VDC_READ_RESET; 3716 mutex_exit(&vdc->read_lock); 3717 3718 /* wake up any threads waiting for connection to come up */ 3719 if (vdc->state == VDC_STATE_INIT_WAITING) { 3720 vdc->state = VDC_STATE_RESETTING; 3721 cv_signal(&vdc->initwait_cv); 3722 } 3723 3724 } 3725 3726 mutex_exit(&vdc->lock); 3727 3728 if (event & ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) 3729 DMSG(vdc, 0, "![%d] Unexpected LDC event (%lx) received", 3730 vdc->instance, event); 3731 3732 return (LDC_SUCCESS); 3733 } 3734 3735 /* 3736 * Function: 3737 * vdc_wait_for_response() 3738 * 3739 * Description: 3740 * Block waiting for a response from the server. If there is 3741 * no data the thread block on the read_cv that is signalled 3742 * by the callback when an EVT_READ occurs. 3743 * 3744 * Arguments: 3745 * vdcp - soft state pointer for this instance of the device driver. 3746 * 3747 * Return Code: 3748 * 0 - Success 3749 */ 3750 static int 3751 vdc_wait_for_response(vdc_t *vdcp, vio_msg_t *msgp) 3752 { 3753 size_t nbytes = sizeof (*msgp); 3754 int status; 3755 3756 ASSERT(vdcp != NULL); 3757 3758 DMSG(vdcp, 1, "[%d] Entered\n", vdcp->instance); 3759 3760 status = vdc_recv(vdcp, msgp, &nbytes); 3761 DMSG(vdcp, 3, "vdc_read() done.. status=0x%x size=0x%x\n", 3762 status, (int)nbytes); 3763 if (status) { 3764 DMSG(vdcp, 0, "?[%d] Error %d reading LDC msg\n", 3765 vdcp->instance, status); 3766 return (status); 3767 } 3768 3769 if (nbytes < sizeof (vio_msg_tag_t)) { 3770 DMSG(vdcp, 0, "?[%d] Expect %lu bytes; recv'd %lu\n", 3771 vdcp->instance, sizeof (vio_msg_tag_t), nbytes); 3772 return (ENOMSG); 3773 } 3774 3775 DMSG(vdcp, 2, "[%d] (%x/%x/%x)\n", vdcp->instance, 3776 msgp->tag.vio_msgtype, 3777 msgp->tag.vio_subtype, 3778 msgp->tag.vio_subtype_env); 3779 3780 /* 3781 * Verify the Session ID of the message 3782 * 3783 * Every message after the Version has been negotiated should 3784 * have the correct session ID set. 3785 */ 3786 if ((msgp->tag.vio_sid != vdcp->session_id) && 3787 (msgp->tag.vio_subtype_env != VIO_VER_INFO)) { 3788 DMSG(vdcp, 0, "[%d] Invalid SID: received 0x%x, " 3789 "expected 0x%lx [seq num %lx @ %d]", 3790 vdcp->instance, msgp->tag.vio_sid, 3791 vdcp->session_id, 3792 ((vio_dring_msg_t *)msgp)->seq_num, 3793 ((vio_dring_msg_t *)msgp)->start_idx); 3794 return (ENOMSG); 3795 } 3796 return (0); 3797 } 3798 3799 3800 /* 3801 * Function: 3802 * vdc_resubmit_backup_dring() 3803 * 3804 * Description: 3805 * Resubmit each descriptor in the backed up dring to 3806 * vDisk server. The Dring was backed up during connection 3807 * reset. 3808 * 3809 * Arguments: 3810 * vdcp - soft state pointer for this instance of the device driver. 3811 * 3812 * Return Code: 3813 * 0 - Success 3814 */ 3815 static int 3816 vdc_resubmit_backup_dring(vdc_t *vdcp) 3817 { 3818 int processed = 0; 3819 int count; 3820 int b_idx; 3821 int rv = 0; 3822 int dring_size; 3823 int op; 3824 vio_msg_t vio_msg; 3825 vdc_local_desc_t *curr_ldep; 3826 3827 ASSERT(MUTEX_NOT_HELD(&vdcp->lock)); 3828 ASSERT(vdcp->state == VDC_STATE_HANDLE_PENDING); 3829 3830 if (vdcp->local_dring_backup == NULL) { 3831 /* the pending requests have already been processed */ 3832 return (0); 3833 } 3834 3835 DMSG(vdcp, 1, "restoring pending dring entries (len=%d, tail=%d)\n", 3836 vdcp->local_dring_backup_len, vdcp->local_dring_backup_tail); 3837 3838 /* 3839 * Walk the backup copy of the local descriptor ring and 3840 * resubmit all the outstanding transactions. 3841 */ 3842 b_idx = vdcp->local_dring_backup_tail; 3843 for (count = 0; count < vdcp->local_dring_backup_len; count++) { 3844 3845 curr_ldep = &(vdcp->local_dring_backup[b_idx]); 3846 3847 /* only resubmit outstanding transactions */ 3848 if (!curr_ldep->is_free) { 3849 /* 3850 * If we are retrying a block read/write operation we 3851 * need to update the I/O statistics to indicate that 3852 * the request is being put back on the waitq to be 3853 * serviced (it will have been taken off after the 3854 * error was reported). 3855 */ 3856 mutex_enter(&vdcp->lock); 3857 op = curr_ldep->operation; 3858 if ((op == VD_OP_BREAD) || (op == VD_OP_BWRITE)) { 3859 DTRACE_IO1(start, buf_t *, curr_ldep->cb_arg); 3860 VD_KSTAT_WAITQ_ENTER(vdcp); 3861 } 3862 3863 DMSG(vdcp, 1, "resubmitting entry idx=%x\n", b_idx); 3864 rv = vdc_populate_descriptor(vdcp, op, 3865 curr_ldep->addr, curr_ldep->nbytes, 3866 curr_ldep->slice, curr_ldep->offset, 3867 curr_ldep->cb_type, curr_ldep->cb_arg, 3868 curr_ldep->dir); 3869 3870 if (rv) { 3871 if (op == VD_OP_BREAD || op == VD_OP_BWRITE) { 3872 VD_UPDATE_ERR_STATS(vdcp, vd_transerrs); 3873 VD_KSTAT_WAITQ_EXIT(vdcp); 3874 DTRACE_IO1(done, buf_t *, 3875 curr_ldep->cb_arg); 3876 } 3877 DMSG(vdcp, 1, "[%d] cannot resubmit entry %d\n", 3878 vdcp->instance, b_idx); 3879 mutex_exit(&vdcp->lock); 3880 goto done; 3881 } 3882 3883 /* 3884 * If this is a block read/write we update the I/O 3885 * statistics kstat to indicate that the request 3886 * has been sent back to the vDisk server and should 3887 * now be put on the run queue. 3888 */ 3889 if ((op == VD_OP_BREAD) || (op == VD_OP_BWRITE)) { 3890 DTRACE_PROBE1(send, buf_t *, curr_ldep->cb_arg); 3891 VD_KSTAT_WAITQ_TO_RUNQ(vdcp); 3892 } 3893 mutex_exit(&vdcp->lock); 3894 3895 /* Wait for the response message. */ 3896 DMSG(vdcp, 1, "waiting for response to idx=%x\n", 3897 b_idx); 3898 rv = vdc_wait_for_response(vdcp, &vio_msg); 3899 if (rv) { 3900 /* 3901 * If this is a block read/write we update 3902 * the I/O statistics kstat to take it 3903 * off the run queue. 3904 */ 3905 mutex_enter(&vdcp->lock); 3906 if (op == VD_OP_BREAD || op == VD_OP_BWRITE) { 3907 VD_UPDATE_ERR_STATS(vdcp, vd_transerrs); 3908 VD_KSTAT_RUNQ_EXIT(vdcp); 3909 DTRACE_IO1(done, buf_t *, 3910 curr_ldep->cb_arg); 3911 } 3912 DMSG(vdcp, 1, "[%d] wait_for_response " 3913 "returned err=%d\n", vdcp->instance, 3914 rv); 3915 mutex_exit(&vdcp->lock); 3916 goto done; 3917 } 3918 3919 DMSG(vdcp, 1, "processing msg for idx=%x\n", b_idx); 3920 rv = vdc_process_data_msg(vdcp, &vio_msg); 3921 if (rv) { 3922 DMSG(vdcp, 1, "[%d] process_data_msg " 3923 "returned err=%d\n", vdcp->instance, 3924 rv); 3925 goto done; 3926 } 3927 /* 3928 * Mark this entry as free so that we will not resubmit 3929 * this "done" request again, if we were to use the same 3930 * backup_dring again in future. This could happen when 3931 * a reset happens while processing the backup_dring. 3932 */ 3933 curr_ldep->is_free = B_TRUE; 3934 processed++; 3935 } 3936 3937 /* get the next element to submit */ 3938 if (++b_idx >= vdcp->local_dring_backup_len) 3939 b_idx = 0; 3940 } 3941 3942 /* all done - now clear up pending dring copy */ 3943 dring_size = vdcp->local_dring_backup_len * 3944 sizeof (vdcp->local_dring_backup[0]); 3945 3946 (void) kmem_free(vdcp->local_dring_backup, dring_size); 3947 3948 vdcp->local_dring_backup = NULL; 3949 3950 done: 3951 DTRACE_PROBE2(processed, int, processed, vdc_t *, vdcp); 3952 3953 return (rv); 3954 } 3955 3956 /* 3957 * Function: 3958 * vdc_cancel_backup_dring 3959 * 3960 * Description: 3961 * Cancel each descriptor in the backed up dring to vDisk server. 3962 * The Dring was backed up during connection reset. 3963 * 3964 * Arguments: 3965 * vdcp - soft state pointer for this instance of the device driver. 3966 * 3967 * Return Code: 3968 * None 3969 */ 3970 void 3971 vdc_cancel_backup_dring(vdc_t *vdcp) 3972 { 3973 vdc_local_desc_t *ldep; 3974 struct buf *bufp; 3975 int count; 3976 int b_idx; 3977 int dring_size; 3978 int cancelled = 0; 3979 3980 ASSERT(MUTEX_HELD(&vdcp->lock)); 3981 ASSERT(vdcp->state == VDC_STATE_INIT || 3982 vdcp->state == VDC_STATE_INIT_WAITING || 3983 vdcp->state == VDC_STATE_NEGOTIATE || 3984 vdcp->state == VDC_STATE_RESETTING); 3985 3986 if (vdcp->local_dring_backup == NULL) { 3987 /* the pending requests have already been processed */ 3988 return; 3989 } 3990 3991 DMSG(vdcp, 1, "cancelling pending dring entries (len=%d, tail=%d)\n", 3992 vdcp->local_dring_backup_len, vdcp->local_dring_backup_tail); 3993 3994 /* 3995 * Walk the backup copy of the local descriptor ring and 3996 * cancel all the outstanding transactions. 3997 */ 3998 b_idx = vdcp->local_dring_backup_tail; 3999 for (count = 0; count < vdcp->local_dring_backup_len; count++) { 4000 4001 ldep = &(vdcp->local_dring_backup[b_idx]); 4002 4003 /* only cancel outstanding transactions */ 4004 if (!ldep->is_free) { 4005 4006 DMSG(vdcp, 1, "cancelling entry idx=%x\n", b_idx); 4007 cancelled++; 4008 4009 /* 4010 * All requests have already been cleared from the 4011 * local descriptor ring and the LDC channel has been 4012 * reset so we will never get any reply for these 4013 * requests. Now we just have to notify threads waiting 4014 * for replies that the request has failed. 4015 */ 4016 switch (ldep->cb_type) { 4017 case CB_SYNC: 4018 ASSERT(vdcp->sync_op_pending); 4019 vdcp->sync_op_status = EIO; 4020 vdcp->sync_op_pending = B_FALSE; 4021 cv_signal(&vdcp->sync_pending_cv); 4022 break; 4023 4024 case CB_STRATEGY: 4025 bufp = ldep->cb_arg; 4026 ASSERT(bufp != NULL); 4027 bufp->b_resid = bufp->b_bcount;