1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * This file contains common code for handling Options Management requests. 31 */ 32 33 #include <sys/types.h> 34 #include <sys/stream.h> 35 #include <sys/stropts.h> 36 #include <sys/strsubr.h> 37 #include <sys/errno.h> 38 #define _SUN_TPI_VERSION 2 39 #include <sys/tihdr.h> 40 #include <sys/socket.h> 41 #include <sys/ddi.h> 42 #include <sys/debug.h> /* for ASSERT */ 43 #include <sys/policy.h> 44 45 #include <inet/common.h> 46 #include <inet/mi.h> 47 #include <inet/nd.h> 48 #include <netinet/ip6.h> 49 #include <inet/ip.h> 50 #include <inet/mib2.h> 51 #include <netinet/in.h> 52 #include "optcom.h" 53 54 #include <inet/optcom.h> 55 56 /* 57 * Function prototypes 58 */ 59 static t_scalar_t process_topthdrs_first_pass(mblk_t *, cred_t *, optdb_obj_t *, 60 boolean_t *, size_t *); 61 static t_scalar_t do_options_second_pass(queue_t *q, mblk_t *reqmp, 62 mblk_t *ack_mp, cred_t *, optdb_obj_t *dbobjp, 63 mblk_t *first_mp, boolean_t is_restart, boolean_t *queued_statusp); 64 static t_uscalar_t get_worst_status(t_uscalar_t, t_uscalar_t); 65 static int do_opt_default(queue_t *, struct T_opthdr *, uchar_t **, 66 t_uscalar_t *, cred_t *, optdb_obj_t *); 67 static void do_opt_current(queue_t *, struct T_opthdr *, uchar_t **, 68 t_uscalar_t *, cred_t *cr, optdb_obj_t *); 69 static int do_opt_check_or_negotiate(queue_t *q, struct T_opthdr *reqopt, 70 uint_t optset_context, uchar_t **resptrp, t_uscalar_t *worst_statusp, 71 cred_t *, optdb_obj_t *dbobjp, mblk_t *first_mp); 72 static opdes_t *opt_chk_lookup(t_uscalar_t, t_uscalar_t, opdes_t *, uint_t); 73 static boolean_t opt_level_valid(t_uscalar_t, optlevel_t *, uint_t); 74 static size_t opt_level_allopts_lengths(t_uscalar_t, opdes_t *, uint_t); 75 static boolean_t opt_length_ok(opdes_t *, struct T_opthdr *); 76 static t_uscalar_t optcom_max_optbuf_len(opdes_t *, uint_t); 77 static boolean_t opt_bloated_maxsize(opdes_t *); 78 79 /* Common code for sending back a T_ERROR_ACK. */ 80 void 81 optcom_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 82 { 83 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 84 qreply(q, mp); 85 } 86 87 /* 88 * The option management routines svr4_optcom_req() and tpi_optcom_req() use 89 * callback functions as arguments. Here is the expected interfaces 90 * assumed from the callback functions 91 * 92 * 93 * (1) deffn(q, optlevel, optname, optvalp) 94 * 95 * - Function only called when default value comes from protocol 96 * specific code and not the option database table (indicated by 97 * OP_DEF_FN property in option database.) 98 * - Error return is -1. Valid returns are >=0. 99 * - When valid, the return value represents the length used for storing 100 * the default value of the option. 101 * - Error return implies the called routine did not recognize this 102 * option. Something downstream could so input is left unchanged 103 * in request buffer. 104 * 105 * (2) getfn(q, optlevel, optname, optvalp) 106 * 107 * - Error return is -1. Valid returns are >=0. 108 * - When valid, the return value represents the length used for storing 109 * the actual value of the option. 110 * - Error return implies the called routine did not recognize this 111 * option. Something downstream could so input is left unchanged 112 * in request buffer. 113 * 114 * (3) setfn(q, optset_context, optlevel, optname, inlen, invalp, 115 * outlenp, outvalp, attrp, cr); 116 * 117 * - OK return is 0, Error code is returned as a non-zero argument. 118 * - If negative it is ignored by svr4_optcom_req(). If positive, error 119 * is returned. A negative return implies that option, while handled on 120 * this stack is not handled at this level and will be handled further 121 * downstream. 122 * - Both negative and positive errors are treats as errors in an 123 * identical manner by tpi_optcom_req(). The errors affect "status" 124 * field of each option's T_opthdr. If sucessfull, an appropriate sucess 125 * result is carried. If error, it instantiated to "failure" at the 126 * topmost level and left unchanged at other levels. (This "failure" can 127 * turn to a success at another level). 128 * - optset_context passed for tpi_optcom_req(). It is interpreted as: 129 * - SETFN_OPTCOM_CHECKONLY 130 * semantics are to pretend to set the value and report 131 * back if it would be successful. 132 * This is used with T_CHECK semantics in XTI 133 * - SETFN_OPTCOM_NEGOTIATE 134 * set the value. Call from option management primitive 135 * T_OPTMGMT_REQ when T_NEGOTIATE flags is used. 136 * - SETFN_UD_NEGOTIATE 137 * option request came riding on UNITDATA primitive most often 138 * has "this datagram" semantics to influence prpoerties 139 * affecting an outgoig datagram or associated with recived 140 * datagram 141 * [ Note: XTI permits this use outside of "this datagram" 142 * semantics also and permits setting "management related" 143 * options in this context and its test suite enforces it ] 144 * - SETFN_CONN_NEGOTATE 145 * option request came riding on CONN_REQ/RES primitive and 146 * most often has "this connection" (negotiation during 147 * "connection estblishment") semantics. 148 * [ Note: XTI permits use of these outside of "this connection" 149 * semantics and permits "management related" options in this 150 * context and its test suite enforces it. ] 151 * 152 * - inlen, invalp is the option length,value requested to be set. 153 * - outlenp, outvalp represent return parameters which contain the 154 * value set and it might be different from one passed on input. 155 * - attrp points to a data structure that's used by v6 modules to 156 * store ancillary data options or sticky options. 157 * - cr points to the caller's credentials 158 * - the caller might pass same buffers for input and output and the 159 * routine should protect against this case by not updating output 160 * buffers until it is done referencing input buffers and any other 161 * issues (e.g. not use bcopy() if we do not trust what it does). 162 * - If option is not known, it returns error. We randomly pick EINVAL. 163 * It can however get called with options that are handled downstream 164 * opr upstream so for svr4_optcom_req(), it does not return error for 165 * negative return values. 166 * 167 */ 168 169 /* 170 * Upper Level Protocols call this routine when they receive 171 * a T_SVR4_OPTMGMT_REQ message. They supply callback functions 172 * for setting a new value for a single options, getting the 173 * current value for a single option, and checking for support 174 * of a single option. svr4_optcom_req validates the option management 175 * buffer passed in, and calls the appropriate routines to do the 176 * job requested. 177 * XXX Code below needs some restructuring after we have some more 178 * macros to support 'struct opthdr' in the headers. 179 * 180 * IP-MT notes: The option management framework functions svr4_optcom_req() and 181 * tpi_optcom_req() allocate and prepend an M_CTL mblk to the actual 182 * T_optmgmt_req mblk and pass the chain as an additional parameter to the 183 * protocol set functions. If a protocol set function (such as ip_opt_set) 184 * cannot process the option immediately it can return EINPROGRESS. ip_opt_set 185 * enqueues the message in the appropriate sq and returns EINPROGRESS. Later 186 * the sq framework arranges to restart this operation and passes control to 187 * the restart function ip_restart_optmgmt() which in turn calls 188 * svr4_optcom_req() or tpi_optcom_req() to restart the option processing. 189 */ 190 int 191 svr4_optcom_req(queue_t *q, mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp, 192 boolean_t pass_to_ip) 193 { 194 pfi_t deffn = dbobjp->odb_deffn; 195 pfi_t getfn = dbobjp->odb_getfn; 196 opt_set_fn setfn = dbobjp->odb_setfn; 197 opdes_t *opt_arr = dbobjp->odb_opt_des_arr; 198 uint_t opt_arr_cnt = dbobjp->odb_opt_arr_cnt; 199 boolean_t topmost_tpiprovider = dbobjp->odb_topmost_tpiprovider; 200 opt_restart_t *or; 201 struct opthdr *restart_opt; 202 boolean_t is_restart = B_FALSE; 203 mblk_t *first_mp; 204 205 t_uscalar_t max_optbuf_len; 206 int len; 207 mblk_t *mp1 = NULL; 208 struct opthdr *next_opt; 209 struct opthdr *opt; 210 struct opthdr *opt1; 211 struct opthdr *opt_end; 212 struct opthdr *opt_start; 213 opdes_t *optd; 214 boolean_t pass_to_next = B_FALSE; 215 struct T_optmgmt_ack *toa; 216 struct T_optmgmt_req *tor; 217 218 /* 219 * Allocate M_CTL and prepend to the packet for restarting this 220 * option if needed. IP may need to queue and restart the option 221 * if it cannot obtain exclusive conditions immediately. Please see 222 * IP-MT notes before the start of svr4_optcom_req 223 */ 224 if (mp->b_datap->db_type == M_CTL) { 225 is_restart = B_TRUE; 226 first_mp = mp; 227 mp = mp->b_cont; 228 ASSERT(mp->b_wptr - mp->b_rptr >= 229 sizeof (struct T_optmgmt_req)); 230 tor = (struct T_optmgmt_req *)mp->b_rptr; 231 ASSERT(tor->MGMT_flags == T_NEGOTIATE); 232 233 or = (opt_restart_t *)first_mp->b_rptr; 234 opt_start = or->or_start; 235 opt_end = or->or_end; 236 restart_opt = or->or_ropt; 237 goto restart; 238 } 239 240 tor = (struct T_optmgmt_req *)mp->b_rptr; 241 /* Verify message integrity. */ 242 if (mp->b_wptr - mp->b_rptr < sizeof (struct T_optmgmt_req)) 243 goto bad_opt; 244 /* Verify MGMT_flags legal */ 245 switch (tor->MGMT_flags) { 246 case T_DEFAULT: 247 case T_NEGOTIATE: 248 case T_CURRENT: 249 case T_CHECK: 250 /* OK - legal request flags */ 251 break; 252 default: 253 optcom_err_ack(q, mp, TBADFLAG, 0); 254 return (0); 255 } 256 if (tor->MGMT_flags == T_DEFAULT) { 257 /* Is it a request for default option settings? */ 258 259 /* 260 * Note: XXX TLI and TPI specification was unclear about 261 * semantics of T_DEFAULT and the following historical note 262 * and its interpretation is incorrect (it implies a request 263 * for default values of only the identified options not all. 264 * The semantics have been explained better in XTI spec.) 265 * However, we do not modify (comment or code) here to keep 266 * compatibility. 267 * We can rethink this if it ever becomes an issue. 268 * ----historical comment start------ 269 * As we understand it, the input buffer is meaningless 270 * so we ditch the message. A T_DEFAULT request is a 271 * request to obtain a buffer containing defaults for 272 * all supported options, so we allocate a maximum length 273 * reply. 274 * ----historical comment end ------- 275 */ 276 /* T_DEFAULT not passed down */ 277 ASSERT(topmost_tpiprovider == B_TRUE); 278 freemsg(mp); 279 max_optbuf_len = optcom_max_optbuf_len(opt_arr, 280 opt_arr_cnt); 281 mp = allocb(max_optbuf_len, BPRI_MED); 282 if (!mp) { 283 no_mem:; 284 optcom_err_ack(q, mp, TSYSERR, ENOMEM); 285 return (0); 286 } 287 288 /* Initialize the T_optmgmt_ack header. */ 289 toa = (struct T_optmgmt_ack *)mp->b_rptr; 290 bzero((char *)toa, max_optbuf_len); 291 toa->PRIM_type = T_OPTMGMT_ACK; 292 toa->OPT_offset = (t_scalar_t)sizeof (struct T_optmgmt_ack); 293 /* TODO: Is T_DEFAULT the right thing to put in MGMT_flags? */ 294 toa->MGMT_flags = T_DEFAULT; 295 296 /* Now walk the table of options passed in */ 297 opt = (struct opthdr *)&toa[1]; 298 for (optd = opt_arr; optd < &opt_arr[opt_arr_cnt]; optd++) { 299 /* 300 * All the options in the table of options passed 301 * in are by definition supported by the protocol 302 * calling this function. 303 */ 304 if (!OA_READ_PERMISSION(optd, cr)) 305 continue; 306 opt->level = optd->opdes_level; 307 opt->name = optd->opdes_name; 308 if (!(optd->opdes_props & OP_DEF_FN) || 309 ((len = (*deffn)(q, opt->level, 310 opt->name, (uchar_t *)&opt[1])) < 0)) { 311 /* 312 * Fill length and value from table. 313 * 314 * Default value not instantiated from function 315 * (or the protocol specific function failed it; 316 * In this interpretation of T_DEFAULT, this is 317 * the best we can do) 318 */ 319 switch (optd->opdes_size) { 320 /* 321 * Since options are guaranteed aligned only 322 * on a 4 byte boundary (t_scalar_t) any 323 * option that is greater in size will default 324 * to the bcopy below 325 */ 326 case sizeof (int32_t): 327 *(int32_t *)&opt[1] = 328 (int32_t)optd->opdes_default; 329 break; 330 case sizeof (int16_t): 331 *(int16_t *)&opt[1] = 332 (int16_t)optd->opdes_default; 333 break; 334 case sizeof (int8_t): 335 *(int8_t *)&opt[1] = 336 (int8_t)optd->opdes_default; 337 break; 338 default: 339 /* 340 * other length but still assume 341 * fixed - use bcopy 342 */ 343 bcopy(optd->opdes_defbuf, 344 &opt[1], optd->opdes_size); 345 break; 346 } 347 opt->len = optd->opdes_size; 348 } 349 else 350 opt->len = (t_uscalar_t)len; 351 opt = (struct opthdr *)((char *)&opt[1] + 352 _TPI_ALIGN_OPT(opt->len)); 353 } 354 355 /* Now record the final length. */ 356 toa->OPT_length = (t_scalar_t)((char *)opt - (char *)&toa[1]); 357 mp->b_wptr = (uchar_t *)opt; 358 mp->b_datap->db_type = M_PCPROTO; 359 /* Ship it back. */ 360 qreply(q, mp); 361 return (0); 362 } 363 /* T_DEFAULT processing complete - no more T_DEFAULT */ 364 365 /* 366 * For T_NEGOTIATE, T_CURRENT, and T_CHECK requests, we make a 367 * pass through the input buffer validating the details and 368 * making sure each option is supported by the protocol. 369 */ 370 if ((opt_start = (struct opthdr *)mi_offset_param(mp, 371 tor->OPT_offset, tor->OPT_length)) == NULL) 372 goto bad_opt; 373 if (!__TPI_OPT_ISALIGNED(opt_start)) 374 goto bad_opt; 375 376 opt_end = (struct opthdr *)((uchar_t *)opt_start + 377 tor->OPT_length); 378 379 for (opt = opt_start; opt < opt_end; opt = next_opt) { 380 /* 381 * Verify we have room to reference the option header 382 * fields in the option buffer. 383 */ 384 if ((uchar_t *)opt + sizeof (struct opthdr) > 385 (uchar_t *)opt_end) 386 goto bad_opt; 387 /* 388 * We now compute pointer to next option in buffer 'next_opt' 389 * The next_opt computation above below 'opt->len' initialized 390 * by application which cannot be trusted. The usual value 391 * too large will be captured by the loop termination condition 392 * above. We check for the following which it will miss. 393 * -pointer space wraparound arithmetic overflow 394 * -last option in buffer with 'opt->len' being too large 395 * (only reason 'next_opt' should equal or exceed 396 * 'opt_end' for last option is roundup unless length is 397 * too-large/invalid) 398 */ 399 next_opt = (struct opthdr *)((uchar_t *)&opt[1] + 400 _TPI_ALIGN_OPT(opt->len)); 401 402 if ((uchar_t *)next_opt < (uchar_t *)&opt[1] || 403 ((next_opt >= opt_end) && 404 (((uchar_t *)next_opt - (uchar_t *)opt_end) >= 405 __TPI_ALIGN_SIZE))) 406 goto bad_opt; 407 408 /* sanity check */ 409 if (opt->name == T_ALLOPT) 410 goto bad_opt; 411 412 /* Find the option in the opt_arr. */ 413 if ((optd = opt_chk_lookup(opt->level, opt->name, 414 opt_arr, opt_arr_cnt)) == NULL) { 415 /* 416 * Not found, that is a bad thing if 417 * the caller is a tpi provider 418 */ 419 if (topmost_tpiprovider) 420 goto bad_opt; 421 else 422 continue; /* skip unmodified */ 423 } 424 425 /* Additional checks dependent on operation. */ 426 switch (tor->MGMT_flags) { 427 case T_NEGOTIATE: 428 if (!OA_WRITE_OR_EXECUTE(optd, cr)) { 429 /* can't negotiate option */ 430 if (!(OA_MATCHED_PRIV(optd, cr)) && 431 OA_WX_ANYPRIV(optd)) { 432 /* 433 * not privileged but privilege 434 * will help negotiate option. 435 */ 436 optcom_err_ack(q, mp, TACCES, 0); 437 return (0); 438 } else 439 goto bad_opt; 440 } 441 /* 442 * Verify size for options 443 * Note: For retaining compatibility with historical 444 * behavior, variable lengths options will have their 445 * length verified in the setfn() processing. 446 * In order to be compatible with SunOS 4.X we return 447 * EINVAL errors for bad lengths. 448 */ 449 if (!(optd->opdes_props & OP_VARLEN)) { 450 /* fixed length - size must match */ 451 if (opt->len != optd->opdes_size) { 452 optcom_err_ack(q, mp, TSYSERR, EINVAL); 453 return (0); 454 } 455 } 456 break; 457 458 case T_CHECK: 459 if (!OA_RWX_ANYPRIV(optd)) 460 /* any of "rwx" permission but not not none */ 461 goto bad_opt; 462 /* 463 * XXX Since T_CURRENT was not there in TLI and the 464 * official TLI inspired TPI standard, getsockopt() 465 * API uses T_CHECK (for T_CURRENT semantics) 466 * The following fallthru makes sense because of its 467 * historical use as semantic equivalent to T_CURRENT. 468 */ 469 /* FALLTHRU */ 470 case T_CURRENT: 471 if (!OA_READ_PERMISSION(optd, cr)) { 472 /* can't read option value */ 473 if (!(OA_MATCHED_PRIV(optd, cr)) && 474 OA_R_ANYPRIV(optd)) { 475 /* 476 * not privileged but privilege 477 * will help in reading option value. 478 */ 479 optcom_err_ack(q, mp, TACCES, 0); 480 return (0); 481 } else 482 goto bad_opt; 483 } 484 break; 485 486 default: 487 optcom_err_ack(q, mp, TBADFLAG, 0); 488 return (0); 489 } 490 /* We liked it. Keep going. */ 491 } /* end for loop scanning option buffer */ 492 493 /* Now complete the operation as required. */ 494 switch (tor->MGMT_flags) { 495 case T_CHECK: 496 /* 497 * Historically used same as T_CURRENT (which was added to 498 * standard later). Code retained for compatibility. 499 */ 500 /* FALLTHROUGH */ 501 case T_CURRENT: 502 /* 503 * Allocate a maximum size reply. Perhaps we are supposed to 504 * assume that the input buffer includes space for the answers 505 * as well as the opthdrs, but we don't know that for sure. 506 * So, instead, we create a new output buffer, using the 507 * input buffer only as a list of options. 508 */ 509 max_optbuf_len = optcom_max_optbuf_len(opt_arr, 510 opt_arr_cnt); 511 mp1 = allocb_cred(max_optbuf_len, cr); 512 if (!mp1) 513 goto no_mem; 514 /* Initialize the header. */ 515 mp1->b_datap->db_type = M_PCPROTO; 516 mp1->b_wptr = &mp1->b_rptr[sizeof (struct T_optmgmt_ack)]; 517 toa = (struct T_optmgmt_ack *)mp1->b_rptr; 518 toa->OPT_offset = (t_scalar_t)sizeof (struct T_optmgmt_ack); 519 toa->MGMT_flags = tor->MGMT_flags; 520 /* 521 * Walk through the input buffer again, this time adding 522 * entries to the output buffer for each option requested. 523 * Note, sanity of option header, last option etc, verified 524 * in first pass. 525 */ 526 opt1 = (struct opthdr *)&toa[1]; 527 528 for (opt = opt_start; opt < opt_end; opt = next_opt) { 529 530 next_opt = (struct opthdr *)((uchar_t *)&opt[1] + 531 _TPI_ALIGN_OPT(opt->len)); 532 533 opt1->name = opt->name; 534 opt1->level = opt->level; 535 len = (*getfn)(q, opt->level, 536 opt->name, (uchar_t *)&opt1[1]); 537 /* 538 * Failure means option is not recognized. Copy input 539 * buffer as is 540 */ 541 if (len < 0) { 542 opt1->len = opt->len; 543 bcopy(&opt[1], &opt1[1], opt->len); 544 } else { 545 opt1->len = (t_uscalar_t)len; 546 } 547 opt1 = (struct opthdr *)((uchar_t *)&opt1[1] + 548 _TPI_ALIGN_OPT(opt1->len)); 549 } /* end for loop */ 550 551 /* Record the final length. */ 552 toa->OPT_length = (t_scalar_t)((uchar_t *)opt1 - 553 (uchar_t *)&toa[1]); 554 mp1->b_wptr = (uchar_t *)opt1; 555 /* Ditch the input buffer. */ 556 freemsg(mp); 557 mp = mp1; 558 /* Always let the next module look at the option. */ 559 pass_to_next = B_TRUE; 560 break; 561 562 case T_NEGOTIATE: 563 first_mp = allocb(sizeof (opt_restart_t), BPRI_LO); 564 if (first_mp == NULL) { 565 optcom_err_ack(q, mp, TSYSERR, ENOMEM); 566 return (0); 567 } 568 first_mp->b_datap->db_type = M_CTL; 569 or = (opt_restart_t *)first_mp->b_rptr; 570 or->or_start = opt_start; 571 or->or_end = opt_end; 572 or->or_type = T_SVR4_OPTMGMT_REQ; 573 or->or_private = 0; 574 first_mp->b_cont = mp; 575 restart: 576 /* 577 * Here we are expecting that the response buffer is exactly 578 * the same size as the input buffer. We pass each opthdr 579 * to the protocol's set function. If the protocol doesn't 580 * like it, it can update the value in it return argument. 581 */ 582 /* 583 * Pass each negotiated option through the protocol set 584 * function. 585 * Note: sanity check on option header values done in first 586 * pass and not repeated here. 587 */ 588 toa = (struct T_optmgmt_ack *)tor; 589 590 for (opt = is_restart ? restart_opt: opt_start; opt < opt_end; 591 opt = next_opt) { 592 int error; 593 594 /* 595 * Point to the current option in or, in case this 596 * option has to be restarted later on 597 */ 598 or->or_ropt = opt; 599 next_opt = (struct opthdr *)((uchar_t *)&opt[1] + 600 _TPI_ALIGN_OPT(opt->len)); 601 602 error = (*setfn)(q, SETFN_OPTCOM_NEGOTIATE, 603 opt->level, opt->name, 604 opt->len, (uchar_t *)&opt[1], 605 &opt->len, (uchar_t *)&opt[1], NULL, cr, first_mp); 606 /* 607 * Treat positive "errors" as real. 608 * Note: negative errors are to be treated as 609 * non-fatal by svr4_optcom_req() and are 610 * returned by setfn() when it is passed an 611 * option it does not handle. Since the option 612 * passed opt_chk_lookup(), it is implied that 613 * it is valid but was either handled upstream 614 * or will be handled downstream. 615 */ 616 if (error == EINPROGRESS) { 617 /* 618 * The message is queued and will be 619 * reprocessed later. Typically ip queued 620 * the message to get some exclusive conditions 621 * and later on calls this func again. 622 */ 623 return (EINPROGRESS); 624 } else if (error > 0) { 625 optcom_err_ack(q, mp, TSYSERR, error); 626 freeb(first_mp); 627 return (0); 628 } 629 /* 630 * error < 0 means option is not recognized. 631 * But with OP_PASSNEXT the next module 632 * might recognize it. 633 */ 634 } 635 /* Done with the restart control mp. */ 636 freeb(first_mp); 637 pass_to_next = B_TRUE; 638 break; 639 default: 640 optcom_err_ack(q, mp, TBADFLAG, 0); 641 return (0); 642 } 643 644 if (pass_to_next && (q->q_next != NULL || pass_to_ip)) { 645 /* Send it down to the next module and let it reply */ 646 toa->PRIM_type = T_SVR4_OPTMGMT_REQ; /* Changed by IP to ACK */ 647 if (q->q_next != NULL) 648 putnext(q, mp); 649 else 650 ip_output(Q_TO_CONN(q), mp, q, IP_WPUT); 651 } else { 652 /* Set common fields in the header. */ 653 toa->MGMT_flags = T_SUCCESS; 654 mp->b_datap->db_type = M_PCPROTO; 655 toa->PRIM_type = T_OPTMGMT_ACK; 656 qreply(q, mp); 657 } 658 return (0); 659 bad_opt:; 660 optcom_err_ack(q, mp, TBADOPT, 0); 661 return (0); 662 } 663 664 /* 665 * New optcom_req inspired by TPI/XTI semantics 666 */ 667 int 668 tpi_optcom_req(queue_t *q, mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp, 669 boolean_t pass_to_ip) 670 { 671 t_scalar_t t_error; 672 mblk_t *toa_mp; 673 boolean_t pass_to_next; 674 size_t toa_len; 675 struct T_optmgmt_ack *toa; 676 struct T_optmgmt_req *tor = 677 (struct T_optmgmt_req *)mp->b_rptr; 678 679 opt_restart_t *or; 680 boolean_t is_restart = B_FALSE; 681 mblk_t *first_mp = NULL; 682 t_uscalar_t worst_status; 683 boolean_t queued_status; 684 685 /* 686 * Allocate M_CTL and prepend to the packet for restarting this 687 * option if needed. IP may need to queue and restart the option 688 * if it cannot obtain exclusive conditions immediately. Please see 689 * IP-MT notes before the start of svr4_optcom_req 690 */ 691 if (mp->b_datap->db_type == M_CTL) { 692 is_restart = B_TRUE; 693 first_mp = mp; 694 toa_mp = mp->b_cont; 695 mp = toa_mp->b_cont; 696 ASSERT(mp->b_wptr - mp->b_rptr >= 697 sizeof (struct T_optmgmt_req)); 698 tor = (struct T_optmgmt_req *)mp->b_rptr; 699 ASSERT(tor->MGMT_flags == T_NEGOTIATE); 700 701 or = (opt_restart_t *)first_mp->b_rptr; 702 goto restart; 703 } 704 705 /* Verify message integrity. */ 706 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_optmgmt_req)) { 707 optcom_err_ack(q, mp, TBADOPT, 0); 708 return (0); 709 } 710 711 /* Verify MGMT_flags legal */ 712 switch (tor->MGMT_flags) { 713 case T_DEFAULT: 714 case T_NEGOTIATE: 715 case T_CURRENT: 716 case T_CHECK: 717 /* OK - legal request flags */ 718 break; 719 default: 720 optcom_err_ack(q, mp, TBADFLAG, 0); 721 return (0); 722 } 723 724 /* 725 * In this design, there are two passes required on the input buffer 726 * mostly to accomodate variable length options and "T_ALLOPT" option 727 * which has the semantics "all options of the specified level". 728 * 729 * For T_DEFAULT, T_NEGOTIATE, T_CURRENT, and T_CHECK requests, we make 730 * a pass through the input buffer validating the details and making 731 * sure each option is supported by the protocol. We also determine the 732 * length of the option buffer to return. (Variable length options and 733 * T_ALLOPT mean that length can be different for output buffer). 734 */ 735 736 pass_to_next = B_FALSE; /* initial value */ 737 toa_len = 0; /* initial value */ 738 739 /* 740 * First pass, we do the following 741 * - estimate cumulative length needed for results 742 * - set "status" field based on permissions, option header check 743 * etc. 744 * - determine "pass_to_next" whether we need to send request to 745 * downstream module/driver. 746 */ 747 if ((t_error = process_topthdrs_first_pass(mp, cr, dbobjp, 748 &pass_to_next, &toa_len)) != 0) { 749 optcom_err_ack(q, mp, t_error, 0); 750 return (0); 751 } 752 753 /* 754 * A validation phase of the input buffer is done. We have also 755 * obtained the length requirement and and other details about the 756 * input and we liked input buffer so far. We make another scan 757 * through the input now and generate the output necessary to complete 758 * the operation. 759 */ 760 761 toa_mp = allocb_cred(toa_len, cr); 762 if (!toa_mp) { 763 optcom_err_ack(q, mp, TSYSERR, ENOMEM); 764 return (0); 765 } 766 767 first_mp = allocb(sizeof (opt_restart_t), BPRI_LO); 768 if (first_mp == NULL) { 769 freeb(toa_mp); 770 optcom_err_ack(q, mp, TSYSERR, ENOMEM); 771 return (0); 772 } 773 first_mp->b_datap->db_type = M_CTL; 774 or = (opt_restart_t *)first_mp->b_rptr; 775 /* 776 * Set initial values for generating output. 777 */ 778 or->or_worst_status = T_SUCCESS; 779 or->or_type = T_OPTMGMT_REQ; 780 or->or_private = 0; 781 /* remaining fields fileed in do_options_second_pass */ 782 783 restart: 784 /* 785 * This routine makes another pass through the option buffer this 786 * time acting on the request based on "status" result in the 787 * first pass. It also performs "expansion" of T_ALLOPT into 788 * all options of a certain level and acts on each for this request. 789 */ 790 if ((t_error = do_options_second_pass(q, mp, toa_mp, cr, dbobjp, 791 first_mp, is_restart, &queued_status)) != 0) { 792 freemsg(toa_mp); 793 optcom_err_ack(q, mp, t_error, 0); 794 return (0); 795 } 796 if (queued_status) { 797 /* Option will be restarted */ 798 return (EINPROGRESS); 799 } 800 worst_status = or->or_worst_status; 801 /* Done with the first mp */ 802 freeb(first_mp); 803 toa_mp->b_cont = NULL; 804 805 /* 806 * Following code relies on the coincidence that T_optmgmt_req 807 * and T_optmgmt_ack are identical in binary representation 808 */ 809 toa = (struct T_optmgmt_ack *)toa_mp->b_rptr; 810 toa->OPT_length = (t_scalar_t)(toa_mp->b_wptr - (toa_mp->b_rptr + 811 sizeof (struct T_optmgmt_ack))); 812 toa->OPT_offset = (t_scalar_t)sizeof (struct T_optmgmt_ack); 813 814 toa->MGMT_flags = tor->MGMT_flags; 815 816 817 freemsg(mp); /* free input mblk */ 818 819 /* 820 * If there is atleast one option that requires a downstream 821 * forwarding and if it is possible, we forward the message 822 * downstream. Else we ack it. 823 */ 824 if (pass_to_next && (q->q_next != NULL || pass_to_ip)) { 825 /* 826 * We pass it down as T_OPTMGMT_REQ. This code relies 827 * on the happy coincidence that T_optmgmt_req and 828 * T_optmgmt_ack are identical data structures 829 * at the binary representation level. 830 */ 831 toa_mp->b_datap->db_type = M_PROTO; 832 toa->PRIM_type = T_OPTMGMT_REQ; 833 if (q->q_next != NULL) 834 putnext(q, toa_mp); 835 else 836 ip_output(Q_TO_CONN(q), toa_mp, q, IP_WPUT); 837 } else { 838 toa->PRIM_type = T_OPTMGMT_ACK; 839 toa_mp->b_datap->db_type = M_PCPROTO; 840 toa->MGMT_flags |= worst_status; /* XXX "worst" or "OR" TPI ? */ 841 qreply(q, toa_mp); 842 } 843 return (0); 844 } 845 846 847 /* 848 * Following routine makes a pass through option buffer in mp and performs the 849 * following tasks. 850 * - estimate cumulative length needed for results 851 * - set "status" field based on permissions, option header check 852 * etc. 853 * - determine "pass_to_next" whether we need to send request to 854 * downstream module/driver. 855 */ 856 857 static t_scalar_t 858 process_topthdrs_first_pass(mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp, 859 boolean_t *pass_to_nextp, size_t *toa_lenp) 860 { 861 opdes_t *opt_arr = dbobjp->odb_opt_des_arr; 862 uint_t opt_arr_cnt = dbobjp->odb_opt_arr_cnt; 863 boolean_t topmost_tpiprovider = dbobjp->odb_topmost_tpiprovider; 864 optlevel_t *valid_level_arr = dbobjp->odb_valid_levels_arr; 865 uint_t valid_level_arr_cnt = dbobjp->odb_valid_levels_arr_cnt; 866 struct T_opthdr *opt; 867 struct T_opthdr *opt_start, *opt_end; 868 opdes_t *optd; 869 size_t allopt_len; 870 struct T_optmgmt_req *tor = 871 (struct T_optmgmt_req *)mp->b_rptr; 872 873 *toa_lenp = sizeof (struct T_optmgmt_ack); /* initial value */ 874 875 if ((opt_start = (struct T_opthdr *) 876 mi_offset_param(mp, tor->OPT_offset, tor->OPT_length)) == NULL) { 877 return (TBADOPT); 878 } 879 if (!__TPI_TOPT_ISALIGNED(opt_start)) 880 return (TBADOPT); 881 882 opt_end = (struct T_opthdr *)((uchar_t *)opt_start + tor->OPT_length); 883 884 for (opt = opt_start; opt && (opt < opt_end); 885 opt = _TPI_TOPT_NEXTHDR(opt_start, tor->OPT_length, opt)) { 886 /* 887 * Validate the option for length and alignment 888 * before accessing anything in it. 889 */ 890 if (!(_TPI_TOPT_VALID(opt, opt_start, opt_end))) 891 return (TBADOPT); 892 893 /* Find the option in the opt_arr. */ 894 if (opt->name != T_ALLOPT) { 895 optd = opt_chk_lookup(opt->level, opt->name, 896 opt_arr, opt_arr_cnt); 897 if (optd == NULL) { 898 /* 899 * Option not found 900 * 901 * Verify if level is "valid" or not. 902 * Note: This check is required by XTI 903 * 904 * TPI provider always initializes 905 * the "not supported" (or whatever) status 906 * for the options. Other levels leave status 907 * unchanged if they do not understand an 908 * option. 909 */ 910 if (topmost_tpiprovider) { 911 if (!opt_level_valid(opt->level, 912 valid_level_arr, 913 valid_level_arr_cnt)) 914 return (TBADOPT); 915 /* 916 * level is valid - initialize 917 * option as not supported 918 */ 919 opt->status = T_NOTSUPPORT; 920 } 921 922 *toa_lenp += _TPI_ALIGN_TOPT(opt->len); 923 continue; 924 } 925 } else { 926 /* 927 * Handle T_ALLOPT case as a special case. 928 * Note: T_ALLOPT does not mean anything 929 * for T_CHECK operation. 930 */ 931 allopt_len = 0; 932 if (tor->MGMT_flags == T_CHECK || 933 !topmost_tpiprovider || 934 ((allopt_len = opt_level_allopts_lengths(opt->level, 935 opt_arr, opt_arr_cnt)) == 0)) { 936 /* 937 * This is confusing but correct ! 938 * It is not valid to to use T_ALLOPT with 939 * T_CHECK flag. 940 * 941 * T_ALLOPT is assumed "expanded" at the 942 * topmost_tpiprovider level so it should not 943 * be there as an "option name" if this is not 944 * a topmost_tpiprovider call and we fail it. 945 * 946 * opt_level_allopts_lengths() is used to verify 947 * that "level" associated with the T_ALLOPT is 948 * supported. 949 * 950 */ 951 opt->status = T_FAILURE; 952 *toa_lenp += _TPI_ALIGN_TOPT(opt->len); 953 continue; 954 } 955 ASSERT(allopt_len != 0); /* remove ? */ 956 957 *toa_lenp += allopt_len; 958 opt->status = T_SUCCESS; 959 /* XXX - always set T_ALLOPT 'pass_to_next' for now */ 960 *pass_to_nextp = B_TRUE; 961 continue; 962 } 963 /* 964 * Check if option wants to flow downstream 965 */ 966 if (optd->opdes_props & OP_PASSNEXT) 967 *pass_to_nextp = B_TRUE; 968 969 /* Additional checks dependent on operation. */ 970 switch (tor->MGMT_flags) { 971 case T_DEFAULT: 972 case T_CURRENT: 973 974 /* 975 * The opt_chk_lookup() routine call above approved of 976 * this option so we can work on the status for it 977 * based on the permissions for the operation. (This 978 * can override any status for it set at higher levels) 979 * We assume this override is OK since chkfn at this 980 * level approved of this option. 981 * 982 * T_CURRENT semantics: 983 * The read access is required. Else option 984 * status is T_NOTSUPPORT. 985 * 986 * T_DEFAULT semantics: 987 * Note: specification is not clear on this but we 988 * interpret T_DEFAULT semantics such that access to 989 * read value is required for access even the default 990 * value. Otherwise the option status is T_NOTSUPPORT. 991 */ 992 if (!OA_READ_PERMISSION(optd, cr)) { 993 opt->status = T_NOTSUPPORT; 994 *toa_lenp += _TPI_ALIGN_TOPT(opt->len); 995 /* skip to next */ 996 continue; 997 } 998 999 /* 1000 * T_DEFAULT/T_CURRENT semantics: 1001 * We know that read access is set. If no other access 1002 * is set, then status is T_READONLY. 1003 */ 1004 if (OA_READONLY_PERMISSION(optd, cr)) 1005 opt->status = T_READONLY; 1006 else 1007 opt->status = T_SUCCESS; 1008 /* 1009 * Option passes all checks. Make room for it in the 1010 * ack. Note: size stored in table does not include 1011 * space for option header. 1012 */ 1013 *toa_lenp += sizeof (struct T_opthdr) + 1014 _TPI_ALIGN_TOPT(optd->opdes_size); 1015 break; 1016 1017 case T_CHECK: 1018 case T_NEGOTIATE: 1019 1020 /* 1021 * T_NEGOTIATE semantics: 1022 * If for fixed length option value on input is not the 1023 * same as value supplied, then status is T_FAILURE. 1024 * 1025 * T_CHECK semantics: 1026 * If value is supplied, semantics same as T_NEGOTIATE. 1027 * It is however ok not to supply a value with T_CHECK. 1028 */ 1029 1030 if (tor->MGMT_flags == T_NEGOTIATE || 1031 (opt->len != sizeof (struct T_opthdr))) { 1032 /* 1033 * Implies "value" is specified in T_CHECK or 1034 * it is a T_NEGOTIATE request. 1035 * Verify size. 1036 * Note: This can override anything about this 1037 * option request done at a higher level. 1038 */ 1039 if (!opt_length_ok(optd, opt)) { 1040 /* bad size */ 1041 *toa_lenp += _TPI_ALIGN_TOPT(opt->len); 1042 opt->status = T_FAILURE; 1043 continue; 1044 } 1045 } 1046 /* 1047 * The opt_chk_lookup() routine above() approved of 1048 * this option so we can work on the status for it based 1049 * on the permissions for the operation. (This can 1050 * override anything set at a higher level). 1051 * 1052 * T_CHECK/T_NEGOTIATE semantics: 1053 * Set status to T_READONLY if read is the only access 1054 * permitted 1055 */ 1056 if (OA_READONLY_PERMISSION(optd, cr)) { 1057 opt->status = T_READONLY; 1058 *toa_lenp += _TPI_ALIGN_TOPT(opt->len); 1059 /* skip to next */ 1060 continue; 1061 } 1062 1063 /* 1064 * T_CHECK/T_NEGOTIATE semantics: 1065 * If write (or execute) access is not set, then status 1066 * is T_NOTSUPPORT. 1067 */ 1068 if (!OA_WRITE_OR_EXECUTE(optd, cr)) { 1069 opt->status = T_NOTSUPPORT; 1070 *toa_lenp += _TPI_ALIGN_TOPT(opt->len); 1071 /* skip to next option */ 1072 continue; 1073 } 1074 /* 1075 * Option passes all checks. Make room for it in the 1076 * ack and set success in status. 1077 * Note: size stored in table does not include header 1078 * length. 1079 */ 1080 opt->status = T_SUCCESS; 1081 *toa_lenp += sizeof (struct T_opthdr) + 1082 _TPI_ALIGN_TOPT(optd->opdes_size); 1083 break; 1084 1085 default: 1086 return (TBADFLAG); 1087 } 1088 } /* for loop scanning input buffer */ 1089 1090 return (0); /* OK return */ 1091 } 1092 1093 /* 1094 * This routine makes another pass through the option buffer this 1095 * time acting on the request based on "status" result in the 1096 * first pass. It also performs "expansion" of T_ALLOPT into 1097 * all options of a certain level and acts on each for this request. 1098 */ 1099 static t_scalar_t 1100 do_options_second_pass(queue_t *q, mblk_t *reqmp, mblk_t *ack_mp, cred_t *cr, 1101 optdb_obj_t *dbobjp, mblk_t *first_mp, boolean_t is_restart, 1102 boolean_t *queued_statusp) 1103 { 1104 boolean_t topmost_tpiprovider = dbobjp->odb_topmost_tpiprovider; 1105 int failed_option; 1106 struct T_opthdr *opt; 1107 struct T_opthdr *opt_start, *opt_end, *restart_opt; 1108 uchar_t *optr; 1109 uint_t optset_context; 1110 struct T_optmgmt_req *tor = (struct T_optmgmt_req *)reqmp->b_rptr; 1111 opt_restart_t *or; 1112 t_uscalar_t *worst_statusp; 1113 int err; 1114 1115 *queued_statusp = B_FALSE; 1116 or = (opt_restart_t *)first_mp->b_rptr; 1117 worst_statusp = &or->or_worst_status; 1118 1119 optr = (uchar_t *)ack_mp->