1 0 stevel /* 2 0 stevel * CDDL HEADER START 3 0 stevel * 4 0 stevel * The contents of this file are subject to the terms of the 5 1712 rm88369 * Common Development and Distribution License (the "License"). 6 1712 rm88369 * You may not use this file except in compliance with the License. 7 0 stevel * 8 0 stevel * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 0 stevel * or http://www.opensolaris.org/os/licensing. 10 0 stevel * See the License for the specific language governing permissions 11 0 stevel * and limitations under the License. 12 0 stevel * 13 0 stevel * When distributing Covered Code, include this CDDL HEADER in each 14 0 stevel * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 0 stevel * If applicable, add the following below this CDDL HEADER, with the 16 0 stevel * fields enclosed by brackets "[]" replaced with your own identifying 17 0 stevel * information: Portions Copyright [yyyy] [name of copyright owner] 18 0 stevel * 19 0 stevel * CDDL HEADER END 20 0 stevel */ 21 9263 Sean 22 0 stevel /* 23 8944 dp * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 0 stevel * Use is subject to license terms. 25 0 stevel */ 26 0 stevel 27 0 stevel /* 28 0 stevel * method.c - method execution functions 29 0 stevel * 30 0 stevel * This file contains the routines needed to run a method: a fork(2)-exec(2) 31 0 stevel * invocation monitored using either the contract filesystem or waitpid(2). 32 0 stevel * (Plain fork1(2) support is provided in fork.c.) 33 0 stevel * 34 0 stevel * Contract Transfer 35 0 stevel * When we restart a service, we want to transfer any contracts that the old 36 0 stevel * service's contract inherited. This means that (a) we must not abandon the 37 0 stevel * old contract when the service dies and (b) we must write the id of the old 38 0 stevel * contract into the terms of the new contract. There should be limits to 39 0 stevel * (a), though, since we don't want to keep the contract around forever. To 40 0 stevel * this end we'll say that services in the offline state may have a contract 41 0 stevel * to be transfered and services in the disabled or maintenance states cannot. 42 0 stevel * This means that when a service transitions from online (or degraded) to 43 0 stevel * offline, the contract should be preserved, and when the service transitions 44 0 stevel * from offline to online (i.e., the start method), we'll transfer inherited 45 0 stevel * contracts. 46 0 stevel */ 47 0 stevel 48 0 stevel #include <sys/contract/process.h> 49 0 stevel #include <sys/ctfs.h> 50 0 stevel #include <sys/stat.h> 51 0 stevel #include <sys/time.h> 52 0 stevel #include <sys/types.h> 53 0 stevel #include <sys/uio.h> 54 0 stevel #include <sys/wait.h> 55 0 stevel #include <alloca.h> 56 0 stevel #include <assert.h> 57 0 stevel #include <errno.h> 58 0 stevel #include <fcntl.h> 59 0 stevel #include <libcontract.h> 60 0 stevel #include <libcontract_priv.h> 61 0 stevel #include <libgen.h> 62 0 stevel #include <librestart.h> 63 0 stevel #include <libscf.h> 64 0 stevel #include <limits.h> 65 0 stevel #include <port.h> 66 0 stevel #include <sac.h> 67 0 stevel #include <signal.h> 68 0 stevel #include <stdlib.h> 69 0 stevel #include <string.h> 70 0 stevel #include <strings.h> 71 0 stevel #include <unistd.h> 72 4244 jeanm #include <atomic.h> 73 4244 jeanm #include <poll.h> 74 0 stevel 75 0 stevel #include "startd.h" 76 0 stevel 77 0 stevel #define SBIN_SH "/sbin/sh" 78 4244 jeanm 79 4244 jeanm /* 80 4244 jeanm * Used to tell if contracts are in the process of being 81 4244 jeanm * stored into the svc.startd internal hash table. 82 4244 jeanm */ 83 4244 jeanm volatile uint16_t storing_contract = 0; 84 0 stevel 85 0 stevel /* 86 0 stevel * Mapping from restart_on method-type to contract events. Must correspond to 87 0 stevel * enum method_restart_t. 88 0 stevel */ 89 0 stevel static uint_t method_events[] = { 90 0 stevel /* METHOD_RESTART_ALL */ 91 0 stevel CT_PR_EV_HWERR | CT_PR_EV_SIGNAL | CT_PR_EV_CORE | CT_PR_EV_EMPTY, 92 0 stevel /* METHOD_RESTART_EXTERNAL_FAULT */ 93 0 stevel CT_PR_EV_HWERR | CT_PR_EV_SIGNAL, 94 0 stevel /* METHOD_RESTART_ANY_FAULT */ 95 0 stevel CT_PR_EV_HWERR | CT_PR_EV_SIGNAL | CT_PR_EV_CORE 96 0 stevel }; 97 0 stevel 98 0 stevel /* 99 0 stevel * method_record_start(restarter_inst_t *) 100 0 stevel * Record a service start for rate limiting. Place the current time 101 0 stevel * in the circular array of instance starts. 102 0 stevel */ 103 0 stevel static void 104 0 stevel method_record_start(restarter_inst_t *inst) 105 0 stevel { 106 0 stevel int index = inst->ri_start_index++ % RINST_START_TIMES; 107 0 stevel 108 0 stevel inst->ri_start_time[index] = gethrtime(); 109 0 stevel } 110 0 stevel 111 0 stevel /* 112 0 stevel * method_rate_critical(restarter_inst_t *) 113 0 stevel * Return true if the average start interval is less than the permitted 114 0 stevel * interval. Implicit success if insufficient measurements for an 115 0 stevel * average exist. 116 0 stevel */ 117 0 stevel static int 118 0 stevel method_rate_critical(restarter_inst_t *inst) 119 0 stevel { 120 0 stevel uint_t n = inst->ri_start_index; 121 0 stevel hrtime_t avg_ns = 0; 122 0 stevel 123 0 stevel if (inst->ri_start_index < RINST_START_TIMES) 124 0 stevel return (0); 125 0 stevel 126 0 stevel avg_ns = 127 0 stevel (inst->ri_start_time[(n - 1) % RINST_START_TIMES] - 128 0 stevel inst->ri_start_time[n % RINST_START_TIMES]) / 129 0 stevel (RINST_START_TIMES - 1); 130 0 stevel 131 0 stevel return (avg_ns < RINST_FAILURE_RATE_NS); 132 0 stevel } 133 0 stevel 134 0 stevel /* 135 0 stevel * int method_is_transient() 136 0 stevel * Determine if the method for the given instance is transient, 137 0 stevel * from a contract perspective. Return 1 if it is, and 0 if it isn't. 138 0 stevel */ 139 0 stevel static int 140 0 stevel method_is_transient(restarter_inst_t *inst, int type) 141 0 stevel { 142 0 stevel if (instance_is_transient_style(inst) || type != METHOD_START) 143 0 stevel return (1); 144 0 stevel else 145 0 stevel return (0); 146 0 stevel } 147 0 stevel 148 0 stevel /* 149 0 stevel * void method_store_contract() 150 0 stevel * Store the newly created contract id into local structures and 151 0 stevel * the repository. If the repository connection is broken it is rebound. 152 0 stevel */ 153 0 stevel static void 154 0 stevel method_store_contract(restarter_inst_t *inst, int type, ctid_t *cid) 155 0 stevel { 156 0 stevel int r; 157 0 stevel boolean_t primary; 158 0 stevel 159 0 stevel if (errno = contract_latest(cid)) 160 0 stevel uu_die("%s: Couldn't get new contract's id", inst->ri_i.i_fmri); 161 0 stevel 162 0 stevel primary = !method_is_transient(inst, type); 163 0 stevel 164 0 stevel if (!primary) { 165 0 stevel if (inst->ri_i.i_transient_ctid != 0) { 166 0 stevel log_framework(LOG_INFO, 167 0 stevel "%s: transient ctid expected to be 0 but " 168 0 stevel "was set to %ld\n", inst->ri_i.i_fmri, 169 0 stevel inst->ri_i.i_transient_ctid); 170 0 stevel } 171 0 stevel 172 0 stevel inst->ri_i.i_transient_ctid = *cid; 173 0 stevel } else { 174 0 stevel if (inst->ri_i.i_primary_ctid != 0) { 175 0 stevel /* 176 0 stevel * There was an old contract that we transferred. 177 0 stevel * Remove it. 178 0 stevel */ 179 0 stevel method_remove_contract(inst, B_TRUE, B_FALSE); 180 0 stevel } 181 0 stevel 182 0 stevel if (inst->ri_i.i_primary_ctid != 0) { 183 0 stevel log_framework(LOG_INFO, 184 0 stevel "%s: primary ctid expected to be 0 but " 185 0 stevel "was set to %ld\n", inst->ri_i.i_fmri, 186 0 stevel inst->ri_i.i_primary_ctid); 187 0 stevel } 188 0 stevel 189 0 stevel inst->ri_i.i_primary_ctid = *cid; 190 0 stevel inst->ri_i.i_primary_ctid_stopped = 0; 191 4244 jeanm 192 4244 jeanm log_framework(LOG_DEBUG, "Storing primary contract %ld for " 193 4244 jeanm "%s.\n", *cid, inst->ri_i.i_fmri); 194 0 stevel 195 0 stevel contract_hash_store(*cid, inst->ri_id); 196 0 stevel } 197 0 stevel 198 0 stevel again: 199 0 stevel if (inst->ri_mi_deleted) 200 0 stevel return; 201 0 stevel 202 0 stevel r = restarter_store_contract(inst->ri_m_inst, *cid, primary ? 203 0 stevel RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT); 204 0 stevel switch (r) { 205 0 stevel case 0: 206 0 stevel break; 207 0 stevel 208 0 stevel case ECANCELED: 209 0 stevel inst->ri_mi_deleted = B_TRUE; 210 0 stevel break; 211 0 stevel 212 0 stevel case ECONNABORTED: 213 0 stevel libscf_handle_rebind(scf_instance_handle(inst->ri_m_inst)); 214 0 stevel /* FALLTHROUGH */ 215 0 stevel 216 0 stevel case EBADF: 217 0 stevel libscf_reget_instance(inst); 218 0 stevel goto again; 219 0 stevel 220 0 stevel case ENOMEM: 221 0 stevel case EPERM: 222 0 stevel case EACCES: 223 0 stevel case EROFS: 224 0 stevel uu_die("%s: Couldn't store contract id %ld", 225 0 stevel inst->ri_i.i_fmri, *cid); 226 0 stevel /* NOTREACHED */ 227 0 stevel 228 0 stevel case EINVAL: 229 0 stevel default: 230 0 stevel bad_error("restarter_store_contract", r); 231 0 stevel } 232 0 stevel } 233 0 stevel 234 0 stevel /* 235 0 stevel * void method_remove_contract() 236 0 stevel * Remove any non-permanent contracts from internal structures and 237 0 stevel * the repository, then abandon them. 238 0 stevel * Returns 239 0 stevel * 0 - success 240 0 stevel * ECANCELED - inst was deleted from the repository 241 0 stevel * 242 0 stevel * If the repository connection was broken, it is rebound. 243 0 stevel */ 244 0 stevel void 245 0 stevel method_remove_contract(restarter_inst_t *inst, boolean_t primary, 246 0 stevel boolean_t abandon) 247 0 stevel { 248 0 stevel ctid_t * const ctidp = primary ? &inst->ri_i.i_primary_ctid : 249 0 stevel &inst->ri_i.i_transient_ctid; 250 0 stevel 251 0 stevel int r; 252 0 stevel 253 0 stevel assert(*ctidp != 0); 254 0 stevel 255 0 stevel log_framework(LOG_DEBUG, "Removing %s contract %lu for %s.\n", 256 0 stevel primary ? "primary" : "transient", *ctidp, inst->ri_i.i_fmri); 257 0 stevel 258 0 stevel if (abandon) 259 0 stevel contract_abandon(*ctidp); 260 0 stevel 261 0 stevel again: 262 0 stevel if (inst->ri_mi_deleted) { 263 0 stevel r = ECANCELED; 264 0 stevel goto out; 265 0 stevel } 266 0 stevel 267 0 stevel r = restarter_remove_contract(inst->ri_m_inst, *ctidp, primary ? 268 0 stevel RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT); 269 0 stevel switch (r) { 270 0 stevel case 0: 271 0 stevel break; 272 0 stevel 273 0 stevel case ECANCELED: 274 0 stevel inst->ri_mi_deleted = B_TRUE; 275 0 stevel break; 276 0 stevel 277 0 stevel case ECONNABORTED: 278 0 stevel libscf_handle_rebind(scf_instance_handle(inst->ri_m_inst)); 279 0 stevel /* FALLTHROUGH */ 280 0 stevel 281 0 stevel case EBADF: 282 0 stevel libscf_reget_instance(inst); 283 0 stevel goto again; 284 0 stevel 285 0 stevel case ENOMEM: 286 0 stevel case EPERM: 287 0 stevel case EACCES: 288 0 stevel case EROFS: 289 0 stevel log_error(LOG_INFO, "%s: Couldn't remove contract id %ld: " 290 0 stevel "%s.\n", inst->ri_i.i_fmri, *ctidp, strerror(r)); 291 0 stevel break; 292 0 stevel 293 0 stevel case EINVAL: 294 0 stevel default: 295 0 stevel bad_error("restarter_remove_contract", r); 296 0 stevel } 297 0 stevel 298 0 stevel out: 299 0 stevel if (primary) 300 0 stevel contract_hash_remove(*ctidp); 301 0 stevel 302 0 stevel *ctidp = 0; 303 0 stevel } 304 0 stevel 305 6073 acruz static const char *method_names[] = { "start", "stop", "refresh" }; 306 6073 acruz 307 0 stevel /* 308 0 stevel * int method_ready_contract(restarter_inst_t *, int, method_restart_t, int) 309 0 stevel * 310 0 stevel * Activate a contract template for the type method of inst. type, 311 0 stevel * restart_on, and cte_mask dictate the critical events term of the contract. 312 0 stevel * Returns 313 0 stevel * 0 - success 314 0 stevel * ECANCELED - inst has been deleted from the repository 315 0 stevel */ 316 0 stevel static int 317 0 stevel method_ready_contract(restarter_inst_t *inst, int type, 318 0 stevel method_restart_t restart_on, uint_t cte_mask) 319 0 stevel { 320 0 stevel int tmpl, err, istrans, iswait, ret; 321 0 stevel uint_t cevents, fevents; 322 0 stevel 323 0 stevel /* 324 0 stevel * Correctly supporting wait-style services is tricky without 325 0 stevel * rearchitecting startd to cope with multiple event sources 326 0 stevel * simultaneously trying to stop an instance. Until a better 327 0 stevel * solution is implemented, we avoid this problem for 328 0 stevel * wait-style services by making contract events fatal and 329 0 stevel * letting the wait code alone handle stopping the service. 330 0 stevel */ 331 0 stevel iswait = instance_is_wait_style(inst); 332 0 stevel istrans = method_is_transient(inst, type); 333 0 stevel 334 0 stevel tmpl = open64(CTFS_ROOT "/process/template", O_RDWR); 335 0 stevel if (tmpl == -1) 336 0 stevel uu_die("Could not create contract template"); 337 0 stevel 338 0 stevel /* 339 0 stevel * We assume non-login processes are unlikely to create 340 0 stevel * multiple process groups, and set CT_PR_PGRPONLY for all 341 0 stevel * wait-style services' contracts. 342 0 stevel */ 343 0 stevel err = ct_pr_tmpl_set_param(tmpl, CT_PR_INHERIT | CT_PR_REGENT | 344 0 stevel (iswait ? CT_PR_PGRPONLY : 0)); 345 0 stevel assert(err == 0); 346 0 stevel 347 0 stevel if (istrans) { 348 0 stevel cevents = 0; 349 0 stevel fevents = 0; 350 0 stevel } else { 351 0 stevel assert(restart_on >= 0); 352 0 stevel assert(restart_on <= METHOD_RESTART_ANY_FAULT); 353 0 stevel cevents = method_events[restart_on] & ~cte_mask; 354 0 stevel fevents = iswait ? 355 0 stevel (method_events[restart_on] & ~cte_mask & CT_PR_ALLFATAL) : 356 0 stevel 0; 357 0 stevel } 358 0 stevel 359 0 stevel err = ct_tmpl_set_critical(tmpl, cevents); 360 0 stevel assert(err == 0); 361 0 stevel 362 0 stevel err = ct_tmpl_set_informative(tmpl, 0); 363 0 stevel assert(err == 0); 364 0 stevel err = ct_pr_tmpl_set_fatal(tmpl, fevents); 365 0 stevel assert(err == 0); 366 0 stevel 367 0 stevel err = ct_tmpl_set_cookie(tmpl, istrans ? METHOD_OTHER_COOKIE : 368 0 stevel METHOD_START_COOKIE); 369 0 stevel assert(err == 0); 370 0 stevel 371 0 stevel if (type == METHOD_START && inst->ri_i.i_primary_ctid != 0) { 372 0 stevel ret = ct_pr_tmpl_set_transfer(tmpl, inst->ri_i.i_primary_ctid); 373 0 stevel switch (ret) { 374 0 stevel case 0: 375 0 stevel break; 376 0 stevel 377 0 stevel case ENOTEMPTY: 378 0 stevel /* No contracts for you! */ 379 0 stevel method_remove_contract(inst, B_TRUE, B_TRUE); 380 0 stevel if (inst->ri_mi_deleted) { 381 0 stevel ret = ECANCELED; 382 0 stevel goto out; 383 0 stevel } 384 0 stevel break; 385 0 stevel 386 0 stevel case EINVAL: 387 0 stevel case ESRCH: 388 0 stevel case EACCES: 389 0 stevel default: 390 0 stevel bad_error("ct_pr_tmpl_set_transfer", ret); 391 0 stevel } 392 0 stevel } 393 0 stevel 394 6073 acruz err = ct_pr_tmpl_set_svc_fmri(tmpl, inst->ri_i.i_fmri); 395 6073 acruz assert(err == 0); 396 6073 acruz err = ct_pr_tmpl_set_svc_aux(tmpl, method_names[type]); 397 6073 acruz assert(err == 0); 398 6073 acruz 399 0 stevel err = ct_tmpl_activate(tmpl); 400 0 stevel assert(err == 0); 401 0 stevel 402 0 stevel ret = 0; 403 0 stevel 404 0 stevel out: 405 0 stevel err = close(tmpl); 406 0 stevel assert(err == 0); 407 0 stevel 408 0 stevel return (ret); 409 0 stevel } 410 0 stevel 411 0 stevel static void 412 0 stevel exec_method(const restarter_inst_t *inst, int type, const char *method, 413 0 stevel struct method_context *mcp, uint8_t need_session) 414 0 stevel { 415 0 stevel char *cmd; 416 0 stevel const char *errf; 417 0 stevel char **nenv; 418 4816 acruz int rsmc_errno = 0; 419 0 stevel 420 0 stevel cmd = uu_msprintf("exec %s", method); 421 0 stevel 422 0 stevel if (inst->ri_utmpx_prefix[0] != '\0' && inst->ri_utmpx_prefix != NULL) 423 0 stevel (void) utmpx_mark_init(getpid(), inst->ri_utmpx_prefix); 424 0 stevel 425 0 stevel setlog(inst->ri_logstem); 426 5238 lianep log_instance(inst, B_FALSE, "Executing %s method (\"%s\").", 427 0 stevel method_names[type], method); 428 0 stevel 429 0 stevel if (need_session) 430 0 stevel (void) setpgrp(); 431 0 stevel 432 0 stevel /* Set credentials. */ 433 4816 acruz rsmc_errno = restarter_set_method_context(mcp, &errf); 434 4816 acruz if (rsmc_errno != 0) { 435 9263 Sean log_instance(inst, B_FALSE, 436 9263 Sean "svc.startd could not set context for method: "); 437 0 stevel 438 4816 acruz if (rsmc_errno == -1) { 439 0 stevel if (strcmp(errf, "core_set_process_path") == 0) { 440 9263 Sean log_instance(inst, B_FALSE, 441 9263 Sean "Could not set corefile path."); 442 0 stevel } else if (strcmp(errf, "setproject") == 0) { 443 9263 Sean log_instance(inst, B_FALSE, "%s: a resource " 444 9263 Sean "control assignment failed", errf); 445 0 stevel } else if (strcmp(errf, "pool_set_binding") == 0) { 446 9263 Sean log_instance(inst, B_FALSE, "%s: a system " 447 9263 Sean "error occurred", errf); 448 0 stevel } else { 449 0 stevel #ifndef NDEBUG 450 0 stevel uu_warn("%s:%d: Bad function name \"%s\" for " 451 0 stevel "error %d from " 452 0 stevel "restarter_set_method_context().\n", 453 4816 acruz __FILE__, __LINE__, errf, rsmc_errno); 454 0 stevel #endif 455 0 stevel abort(); 456 0 stevel } 457 0 stevel 458 0 stevel exit(1); 459 0 stevel } 460 0 stevel 461 0 stevel if (errf != NULL && strcmp(errf, "pool_set_binding") == 0) { 462 4816 acruz switch (rsmc_errno) { 463 0 stevel case ENOENT: 464 9263 Sean log_instance(inst, B_FALSE, "%s: the pool " 465 9263 Sean "could not be found", errf); 466 0 stevel break; 467 0 stevel 468 0 stevel case EBADF: 469 9263 Sean log_instance(inst, B_FALSE, "%s: the " 470 9263 Sean "configuration is invalid", errf); 471 1712 rm88369 break; 472 1712 rm88369 473 1712 rm88369 case EINVAL: 474 9263 Sean log_instance(inst, B_FALSE, "%s: pool name " 475 9263 Sean "\"%s\" is invalid", errf, 476 9263 Sean mcp->resource_pool); 477 0 stevel break; 478 0 stevel 479 0 stevel default: 480 0 stevel #ifndef NDEBUG 481 0 stevel uu_warn("%s:%d: Bad error %d for function %s " 482 0 stevel "in restarter_set_method_context().\n", 483 4816 acruz __FILE__, __LINE__, rsmc_errno, errf); 484 0 stevel #endif 485 0 stevel abort(); 486 0 stevel } 487 0 stevel 488 0 stevel exit(SMF_EXIT_ERR_CONFIG); 489 0 stevel } 490 0 stevel 491 0 stevel if (errf != NULL) { 492 4816 acruz errno = rsmc_errno; 493 0 stevel perror(errf); 494 0 stevel 495 4816 acruz switch (rsmc_errno) { 496 0 stevel case EINVAL: 497 0 stevel case EPERM: 498 0 stevel case ENOENT: 499 0 stevel case ENAMETOOLONG: 500 0 stevel case ERANGE: 501 0 stevel case ESRCH: 502 0 stevel exit(SMF_EXIT_ERR_CONFIG); 503 0 stevel /* NOTREACHED */ 504 0 stevel 505 0 stevel default: 506 0 stevel exit(1); 507 0 stevel } 508 0 stevel } 509 0 stevel 510 4816 acruz switch (rsmc_errno) { 511 0 stevel case ENOMEM: 512 9263 Sean log_instance(inst, B_FALSE, "Out of memory."); 513 0 stevel exit(1); 514 0 stevel /* NOTREACHED */ 515 0 stevel 516 0 stevel case ENOENT: 517 9263 Sean log_instance(inst, B_FALSE, "Missing passwd entry for " 518 9263 Sean "user."); 519 0 stevel exit(SMF_EXIT_ERR_CONFIG); 520 0 stevel /* NOTREACHED */ 521 0 stevel 522 0 stevel default: 523 0 stevel #ifndef NDEBUG 524 0 stevel uu_warn("%s:%d: Bad miscellaneous error %d from " 525 0 stevel "restarter_set_method_context().\n", __FILE__, 526 4816 acruz __LINE__, rsmc_errno); 527 0 stevel #endif 528 0 stevel abort(); 529 0 stevel } 530 0 stevel } 531 0 stevel 532 5040 wesolows nenv = set_smf_env(mcp->env, mcp->env_sz, NULL, inst, 533 5040 wesolows method_names[type]); 534 0 stevel 535 0 stevel log_preexec(); 536 0 stevel 537 0 stevel (void) execle(SBIN_SH, SBIN_SH, "-c", cmd, NULL, nenv); 538 0 stevel 539 0 stevel exit(10); 540 0 stevel } 541 0 stevel 542 0 stevel static void 543 0 stevel write_status(restarter_inst_t *inst, const char *mname, int stat) 544 0 stevel { 545 0 stevel int r; 546 0 stevel 547 0 stevel again: 548 0 stevel if (inst->ri_mi_deleted) 549 0 stevel return; 550 0 stevel 551 0 stevel r = libscf_write_method_status(inst->ri_m_inst, mname, stat); 552 0 stevel switch (r) { 553 0 stevel case 0: 554 0 stevel break; 555 0 stevel 556 0 stevel case ECONNABORTED: 557 0 stevel libscf_reget_instance(inst); 558 0 stevel goto again; 559 0 stevel 560 0 stevel case ECANCELED: 561 0 stevel inst->ri_mi_deleted = 1; 562 0 stevel break; 563 0 stevel 564 0 stevel case EPERM: 565 0 stevel case EACCES: 566 0 stevel case EROFS: 567 0 stevel log_framework(LOG_INFO, "Could not write exit status " 568 0 stevel "for %s method of %s: %s.\n", mname, 569 0 stevel inst->ri_i.i_fmri, strerror(r)); 570 0 stevel break; 571 0 stevel 572 0 stevel case ENAMETOOLONG: 573 0 stevel default: 574 0 stevel bad_error("libscf_write_method_status", r); 575 0 stevel } 576 0 stevel } 577 0 stevel 578 0 stevel /* 579 0 stevel * int method_run() 580 0 stevel * Execute the type method of instp. If it requires a fork(), wait for it 581 0 stevel * to return and return its exit code in *exit_code. Otherwise set 582 0 stevel * *exit_code to 0 if the method succeeds & -1 if it fails. If the 583 0 stevel * repository connection is broken, it is rebound, but inst may not be 584 0 stevel * reset. 585 0 stevel * Returns 586 0 stevel * 0 - success 587 0 stevel * EINVAL - A correct method or method context couldn't be retrieved. 588 0 stevel * EIO - Contract kill failed. 589 0 stevel * EFAULT - Method couldn't be executed successfully. 590 0 stevel * ELOOP - Retry threshold exceeded. 591 0 stevel * ECANCELED - inst was deleted from the repository before method was run 592 0 stevel * ERANGE - Timeout retry threshold exceeded. 593 0 stevel * EAGAIN - Failed due to external cause, retry. 594 0 stevel */ 595 0 stevel int 596 0 stevel method_run(restarter_inst_t **instp, int type, int *exit_code) 597 0 stevel { 598 0 stevel char *method; 599 0 stevel int ret_status; 600 0 stevel pid_t pid; 601 0 stevel method_restart_t restart_on; 602 0 stevel uint_t cte_mask; 603 0 stevel uint8_t need_session; 604 0 stevel scf_handle_t *h; 605 0 stevel scf_snapshot_t *snap; 606 0 stevel const char *mname; 607 9765 Sean mc_error_t *m_error; 608 0 stevel struct method_context *mcp; 609 0 stevel int result = 0, timeout_fired = 0; 610 0 stevel int sig, r; 611 0 stevel boolean_t transient; 612 0 stevel uint64_t timeout; 613 0 stevel uint8_t timeout_retry; 614 0 stevel ctid_t ctid; 615 0 stevel int ctfd = -1; 616 0 stevel restarter_inst_t *inst = *instp; 617 0 stevel int id = inst->ri_id; 618 119 sl108498 int forkerr; 619 0 stevel 620 0 stevel assert(PTHREAD_MUTEX_HELD(&inst->ri_lock)); 621 0 stevel assert(instance_in_transition(inst)); 622 0 stevel 623 0 stevel if (inst->ri_mi_deleted) 624 0 stevel return (ECANCELED); 625 0 stevel 626 0 stevel *exit_code = 0; 627 0 stevel 628 0 stevel assert(0 <= type && type <= 2); 629 0 stevel mname = method_names[type]; 630 0 stevel 631 0 stevel if (type == METHOD_START) 632 0 stevel inst->ri_pre_online_hook(); 633 0 stevel 634 0 stevel h = scf_instance_handle(inst->ri_m_inst); 635 0 stevel 636 0 stevel snap = scf_snapshot_create(h); 637 0 stevel if (snap == NULL || 638 0 stevel scf_instance_get_snapshot(inst->ri_m_inst, "running", snap) != 0) { 639 0 stevel log_framework(LOG_DEBUG, 640 0 stevel "Could not get running snapshot for %s. " 641 0 stevel "Using editing version to run method %s.\n", 642 0 stevel inst->ri_i.i_fmri, mname); 643 0 stevel scf_snapshot_destroy(snap); 644 0 stevel snap = NULL; 645 0 stevel } 646 0 stevel 647 0 stevel /* 648 0 stevel * After this point, we may be logging to the instance log. 649 0 stevel * Make sure we've noted where that log is as a property of 650 0 stevel * the instance. 651 0 stevel */ 652 0 stevel r = libscf_note_method_log(inst->ri_m_inst, st->st_log_prefix, 653 0 stevel inst->ri_logstem); 654 0 stevel if (r != 0) { 655 0 stevel log_framework(LOG_WARNING, 656 0 stevel "%s: couldn't note log location: %s\n", 657 0 stevel inst->ri_i.i_fmri, strerror(r)); 658 0 stevel } 659 0 stevel 660 0 stevel if ((method = libscf_get_method(h, type, inst, snap, &restart_on, 661 0 stevel &cte_mask, &need_session, &timeout, &timeout_retry)) == NULL) { 662 0 stevel if (errno == LIBSCF_PGROUP_ABSENT) { 663 0 stevel log_framework(LOG_DEBUG, 664 0 stevel "%s: instance has no method property group '%s'.\n", 665 0 stevel inst->ri_i.i_fmri, mname); 666 0 stevel if (type == METHOD_REFRESH) 667 0 stevel log_instance(inst, B_TRUE, "No '%s' method " 668 0 stevel "defined. Treating as :true.", mname); 669 0 stevel else 670 0 stevel log_instance(inst, B_TRUE, "Method property " 671 0 stevel "group '%s' is not present.", mname); 672 0 stevel scf_snapshot_destroy(snap); 673 0 stevel return (0); 674 0 stevel } else if (errno == LIBSCF_PROPERTY_ABSENT) { 675 0 stevel log_framework(LOG_DEBUG, 676 0 stevel "%s: instance has no '%s/exec' method property.\n", 677 0 stevel inst->ri_i.i_fmri, mname); 678 0 stevel log_instance(inst, B_TRUE, "Method property '%s/exec " 679 0 stevel "is not present.", mname); 680 0 stevel scf_snapshot_destroy(snap); 681 0 stevel return (0); 682 0 stevel } else { 683 0 stevel log_error(LOG_WARNING, 684 0 stevel "%s: instance libscf_get_method failed\n", 685 0 stevel inst->ri_i.i_fmri); 686 0 stevel scf_snapshot_destroy(snap); 687 0 stevel return (EINVAL); 688 0 stevel } 689 0 stevel } 690 0 stevel 691 0 stevel /* open service contract if stopping a non-transient service */ 692 0 stevel if (type == METHOD_STOP && (!instance_is_transient_style(inst))) { 693 0 stevel if (inst->ri_i.i_primary_ctid == 0) { 694 0 stevel /* service is not running, nothing to stop */ 695 0 stevel log_framework(LOG_DEBUG, "%s: instance has no primary " 696 0 stevel "contract, no service to stop.\n", 697 0 stevel inst->ri_i.i_fmri); 698 0 stevel scf_snapshot_destroy(snap); 699 0 stevel return (0); 700 0 stevel } 701 0 stevel if ((ctfd = contract_open(inst->ri_i.i_primary_ctid, "process", 702 0 stevel "events", O_RDONLY)) < 0) { 703 0 stevel result = EFAULT; 704 0 stevel log_instance(inst, B_TRUE, "Could not open service " 705 5238 lianep "contract %ld. Stop method not run.", 706 0 stevel inst->ri_i.i_primary_ctid); 707 0 stevel goto out; 708 0 stevel } 709 0 stevel } 710 0 stevel 711 0 stevel if (restarter_is_null_method(method)) { 712 0 stevel log_framework(LOG_DEBUG, "%s: null method succeeds\n", 713 0 stevel inst->ri_i.i_fmri); 714 0 stevel 715 5238 lianep log_instance(inst, B_TRUE, "Executing %s method (null).", 716 5238 lianep mname); 717 0 stevel 718 0 stevel if (type == METHOD_START) 719 0 stevel write_status(inst, mname, 0); 720 0 stevel goto out; 721 0 stevel } 722 0 stevel 723 0 stevel sig = restarter_is_kill_method(method); 724 0 stevel if (sig >= 0) { 725 0 stevel 726 0 stevel if (inst->ri_i.i_primary_ctid == 0) { 727 0 stevel log_error(LOG_ERR, "%s: :kill with no contract\n", 728 0 stevel inst->ri_i.i_fmri); 729 5238 lianep log_instance(inst, B_TRUE, "Invalid use of \":kill\" " 730 5238 lianep "as stop method for transient service."); 731 0 stevel result = EINVAL; 732 0 stevel goto out; 733 0 stevel } 734 0 stevel 735 0 stevel log_framework(LOG_DEBUG, 736 0 stevel "%s: :killing contract with signal %d\n", 737 0 stevel inst->ri_i.i_fmri, sig); 738 0 stevel 739 5238 lianep log_instance(inst, B_TRUE, "Executing %s method (:kill).", 740 0 stevel mname); 741 0 stevel 742 0 stevel if (contract_kill(inst->ri_i.i_primary_ctid, sig, 743 0 stevel inst->ri_i.i_fmri) != 0) { 744 0 stevel result = EIO; 745 0 stevel goto out; 746 0 stevel } else 747 0 stevel goto assured_kill; 748 0 stevel } 749 0 stevel 750 0 stevel log_framework(LOG_DEBUG, "%s: forking to run method %s\n", 751 0 stevel inst->ri_i.i_fmri, method); 752 0 stevel 753 9765 Sean m_error = restarter_get_method_context(RESTARTER_METHOD_CONTEXT_VERSION, 754 0 stevel inst->ri_m_inst, snap, mname, method, &mcp); 755 0 stevel 756 9765 Sean if (m_error != NULL) { 757 9765 Sean log_instance(inst, B_TRUE, "%s", m_error->msg); 758 9765 Sean restarter_mc_error_destroy(m_error); 759 0 stevel result = EINVAL; 760 0 stevel goto out; 761 0 stevel } 762 0 stevel 763 0 stevel r = method_ready_contract(inst, type, restart_on, cte_mask); 764 0 stevel if (r != 0) { 765 0 stevel assert(r == ECANCELED); 766 0 stevel assert(inst->ri_mi_deleted); 767 0 stevel restarter_free_method_context(mcp); 768 0 stevel result = ECANCELED; 769 0 stevel goto out; 770 0 stevel } 771 0 stevel 772 0 stevel /* 773 0 stevel * Validate safety of method contexts, to save children work. 774 0 stevel */ 775 0 stevel if (!restarter_rm_libs_loadable()) 776 0 stevel log_framework(LOG_DEBUG, "%s: method contexts limited " 777 0 stevel "to root-accessible libraries\n", inst->ri_i.i_fmri); 778 0 stevel 779 0 stevel /* 780 0 stevel * If the service is restarting too quickly, send it to 781 0 stevel * maintenance. 782 0 stevel */ 783 0 stevel if (type == METHOD_START) { 784 0 stevel method_record_start(inst); 785 0 stevel if (method_rate_critical(inst)) { 786 0 stevel log_instance(inst, B_TRUE, "Restarting too quickly, " 787 5238 lianep "changing state to maintenance."); 788 0 stevel result = ELOOP; 789 3179 jeanm restarter_free_method_context(mcp); 790 0 stevel goto out; 791 0 stevel } 792 0 stevel } 793 0 stevel 794 4244 jeanm atomic_add_16(&storing_contract, 1); 795 119 sl108498 pid = startd_fork1(&forkerr); 796 0 stevel if (pid == 0) 797 0 stevel exec_method(inst, type, method, mcp, need_session); 798 0 stevel 799 0 stevel if (pid == -1) { 800 4244 jeanm atomic_add_16(&storing_contract, -1); 801 119 sl108498 if (forkerr == EAGAIN) 802 119 sl108498 result = EAGAIN; 803 119 sl108498 else 804 119 sl108498 result = EFAULT; 805 119 sl108498 806 0 stevel log_error(LOG_WARNING, 807 119 sl108498 "%s: Couldn't fork to execute method %s: %s\n", 808 119 sl108498 inst->ri_i.i_fmri, method, strerror(forkerr)); 809 119 sl108498 810 4244 jeanm restarter_free_method_context(mcp); 811 0 stevel goto out; 812 0 stevel } 813 0 stevel 814 0 stevel 815 0 stevel /* 816 0 stevel * Get the contract id, decide whether it is primary or transient, and 817 0 stevel * stash it in inst & the repository. 818 0 stevel */ 819 0 stevel method_store_contract(inst, type, &ctid); 820 4244 jeanm atomic_add_16(&storing_contract, -1); 821 4244 jeanm 822 4244 jeanm restarter_free_method_context(mcp); 823 0 stevel 824 0 stevel /* 825 0 stevel * Similarly for the start method PID. 826 0 stevel */ 827 0 stevel if (type == METHOD_START && !inst->ri_mi_deleted) 828 0 stevel (void) libscf_write_start_pid(inst->ri_m_inst, pid); 829 0 stevel 830 0 stevel if (instance_is_wait_style(inst) && type == METHOD_START) { 831 0 stevel /* Wait style instances don't get timeouts on start methods. */ 832 0 stevel if (wait_register(pid, inst->ri_i.i_fmri, 1, 0)) { 833 0 stevel log_error(LOG_WARNING, 834 0 stevel "%s: couldn't register %ld for wait\n", 835 0 stevel inst->ri_i.i_fmri, pid); 836 0 stevel result = EFAULT; 837 0 stevel goto contract_out; 838 0 stevel } 839 0 stevel write_status(inst, mname, 0); 840 0 stevel 841 0 stevel } else { 842 0 stevel int r, err; 843 0 stevel time_t start_time; 844 0 stevel time_t end_time; 845 0 stevel 846 0 stevel /* 847 0 stevel * Because on upgrade/live-upgrade we may have no chance 848 0 stevel * to override faulty timeout values on the way to 849 0 stevel * manifest import, all services on the path to manifest 850 0 stevel * import are treated the same as INFINITE timeout services. 851 0 stevel */ 852 0 stevel 853 0 stevel start_time = time(NULL); 854 0 stevel if (timeout != METHOD_TIMEOUT_INFINITE && !is_timeout_ovr(inst)) 855 0 stevel timeout_insert(inst, ctid, timeout); 856 0 stevel else 857 0 stevel timeout = METHOD_TIMEOUT_INFINITE; 858 0 stevel 859 0 stevel /* Unlock the instance while waiting for the method. */ 860 0 stevel MUTEX_UNLOCK(&inst->ri_lock); 861 0 stevel 862 4816 acruz do { 863 0 stevel r = waitpid(pid, &ret_status, NULL); 864 4816 acruz } while (r == -1 && errno == EINTR); 865 0 stevel if (r == -1) 866 0 stevel err = errno; 867 0 stevel 868 0 stevel /* Re-grab the lock. */ 869 0 stevel inst = inst_lookup_by_id(id); 870 0 stevel 871 0 stevel /* 872 0 stevel * inst can't be removed, as the removal thread waits 873 0 stevel * for completion of this one. 874 0 stevel */ 875 0 stevel assert(inst != NULL); 876 0 stevel *instp = inst; 877 0 stevel 878 0 stevel if (inst->ri_timeout != NULL && inst->ri_timeout->te_fired) 879 0 stevel timeout_fired = 1; 880 0 stevel 881 0 stevel timeout_remove(inst, ctid); 882 0 stevel 883 0 stevel log_framework(LOG_DEBUG, 884 0 stevel "%s method for %s exited with status %d.\n", mname, 885 0 stevel inst->ri_i.i_fmri, WEXITSTATUS(ret_status)); 886 0 stevel 887 0 stevel if (r == -1) { 888 0 stevel log_error(LOG_WARNING, 889 0 stevel "Couldn't waitpid() for %s method of %s (%s).\n", 890 0 stevel mname, inst->ri_i.i_fmri, strerror(err)); 891 0 stevel result = EFAULT; 892 0 stevel goto contract_out; 893 0 stevel } 894 0 stevel 895 0 stevel if (type == METHOD_START) 896 0 stevel write_status(inst, mname, ret_status); 897 0 stevel 898 0 stevel /* return ERANGE if this service doesn't retry on timeout */ 899 0 stevel if (timeout_fired == 1 && timeout_retry == 0) { 900 0 stevel result = ERANGE; 901 0 stevel goto contract_out; 902 0 stevel } 903 0 stevel 904 0 stevel if (!WIFEXITED(ret_status)) { 905 0 stevel /* 906 0 stevel * If method didn't exit itself (it was killed by an 907 0 stevel * external entity, etc.), consider the entire 908 0 stevel * method_run as failed. 909 0 stevel */ 910 0 stevel if (WIFSIGNALED(ret_status)) { 911 0 stevel char buf[SIG2STR_MAX]; 912 0 stevel (void) sig2str(WTERMSIG(ret_status), buf); 913 0 stevel 914 0 stevel log_error(LOG_WARNING, "%s: Method \"%s\" " 915 0 stevel "failed due to signal %s.\n", 916 0 stevel inst->ri_i.i_fmri, method, buf); 917 0 stevel log_instance(inst, B_TRUE, "Method \"%s\" " 918 5238 lianep "failed due to signal %s.", mname, buf); 919 0 stevel } else { 920 0 stevel log_error(LOG_WARNING, "%s: Method \"%s\" " 921 0 stevel "failed with exit status %d.\n", 922 0 stevel inst->ri_i.i_fmri, method, 923 0 stevel WEXITSTATUS(ret_status)); 924 0 stevel log_instance(inst, B_TRUE, "Method \"%s\" " 925 5238 lianep "failed with exit status %d.", mname, 926 0 stevel WEXITSTATUS(ret_status)); 927 0 stevel } 928 0 stevel result = EAGAIN; 929 0 stevel goto contract_out; 930 0 stevel } 931 0 stevel 932 0 stevel *exit_code = WEXITSTATUS(ret_status); 933 0 stevel if (*exit_code != 0) { 934 0 stevel log_error(LOG_WARNING, 935 0 stevel "%s: Method \"%s\" failed with exit status %d.\n", 936 0 stevel inst->ri_i.i_fmri, method, WEXITSTATUS(ret_status)); 937 0 stevel } 938 0 stevel 939 0 stevel log_instance(inst, B_TRUE, "Method \"%s\" exited with status " 940 5238 lianep "%d.", mname, *exit_code); 941 0 stevel 942 0 stevel if (*exit_code != 0) 943 0 stevel goto contract_out; 944 0 stevel 945 0 stevel end_time = time(NULL); 946 0 stevel 947 0 stevel /* Give service contract remaining seconds to empty */ 948 0 stevel if (timeout != METHOD_TIMEOUT_INFINITE) 949 0 stevel timeout -= (end_time - start_time); 950 0 stevel } 951 0 stevel 952 0 stevel assured_kill: 953 0 stevel /* 954 0 stevel * For stop methods, assure that the service contract has emptied 955 0 stevel * before returning. 956 0 stevel */ 957 0 stevel if (type == METHOD_STOP && (!instance_is_transient_style(inst)) && 958 0 stevel !(contract_is_empty(inst->ri_i.i_primary_ctid))) { 959 8944 dp int times = 0; 960 0 stevel 961 0 stevel if (timeout != METHOD_TIMEOUT_INFINITE) 962 0 stevel timeout_insert(inst, inst->ri_i.i_primary_ctid, 963 0 stevel timeout); 964 0 stevel 965 0 stevel for (;;) { 966 8944 dp /* 967 8944 dp * Check frequently at first, then back off. This 968 8944 dp * keeps startd from idling while shutting down. 969 8944 dp */ 970 8944 dp if (times < 20) { 971 8944 dp (void) poll(NULL, 0, 5); 972 8944 dp times++; 973 8944 dp } else { 974 8944 dp (void) poll(NULL, 0, 100); 975 8944 dp } 976 4244 jeanm if (contract_is_empty(inst->ri_i.i_primary_ctid)) 977 0 stevel break; 978 0 stevel } 979 0 stevel 980 0 stevel if (timeout != METHOD_TIMEOUT_INFINITE) 981 0 stevel if (inst->ri_timeout->te_fired) 982 0 stevel result = EFAULT; 983 0 stevel 984 0 stevel timeout_remove(inst, inst->ri_i.i_primary_ctid); 985 0 stevel } 986 0 stevel 987 0 stevel contract_out: 988 0 stevel /* Abandon contracts for transient methods & methods that fail. */ 989 0 stevel transient = method_is_transient(inst, type); 990 0 stevel if ((transient || *exit_code != 0 || result != 0) && 991 0 stevel (restarter_is_kill_method(method) < 0)) 992 0 stevel method_remove_contract(inst, !transient, B_TRUE); 993 0 stevel 994 0 stevel out: 995 0 stevel if (ctfd >= 0) 996 0 stevel (void) close(ctfd); 997 0 stevel scf_snapshot_destroy(snap); 998 0 stevel free(method); 999 0 stevel return (result); 1000 0 stevel } 1001 0 stevel 1002 0 stevel /* 1003 0 stevel * The method thread executes a service method to effect a state transition. 1004 0 stevel * The next_state of info->sf_id should be non-_NONE on entrance, and it will 1005 0 stevel * be _NONE on exit (state will either be what next_state was (on success), or 1006 0 stevel * it will be _MAINT (on error)). 1007 0 stevel * 1008 0 stevel * There are six classes of methods to consider: start & other (stop, refresh) 1009 0 stevel * for each of "normal" services, wait services, and transient services. For 1010 0 stevel * each, the method must be fetched from the repository & executed. fork()ed 1011 0 stevel * methods must be waited on, except for the start method of wait services 1012 0 stevel * (which must be registered with the wait subsystem via wait_register()). If 1013 0 stevel * the method succeeded (returned 0), then for start methods its contract 1014 0 stevel * should be recorded as the primary contract for the service. For other 1015 0 stevel * methods, it should be abandoned. If the method fails, then depending on 1016 0 stevel * the failure, either the method should be reexecuted or the service should 1017 0 stevel * be put into maintenance. Either way the contract should be abandoned. 1018 0 stevel */ 1019 0 stevel void * 1020 0 stevel method_thread(void *arg) 1021 0 stevel { 1022 0 stevel fork_info_t *info = arg; 1023 0 stevel restarter_inst_t *inst; 1024 0 stevel scf_handle_t *local_handle; 1025 0 stevel scf_instance_t *s_inst = NULL; 1026 0 stevel int r, exit_code; 1027 0 stevel boolean_t retryable; 1028 0 stevel const char *aux; 1029 0 stevel 1030 0 stevel assert(0 <= info->sf_method_type && info->sf_method_type <= 2); 1031 0 stevel 1032 0 stevel /* Get (and lock) the restarter_inst_t. */ 1033 0 stevel inst = inst_lookup_by_id(info->sf_id); 1034 0 stevel 1035 0 stevel assert(inst->ri_method_thread != 0); 1036 0 stevel assert(instance_in_transition(inst) == 1); 1037 0 stevel 1038 0 stevel /* 1039 0 stevel * We cannot leave this function with inst in transition, because 1040 0 stevel * protocol.c withholds messages for inst otherwise. 1041 0 stevel */ 1042 0 stevel 1043 0 stevel log_framework(LOG_DEBUG, "method_thread() running %s method for %s.\n", 1044 0 stevel method_names[info->sf_method_type], inst->ri_i.i_fmri); 1045 0 stevel 1046 0 stevel local_handle = libscf_handle_create_bound_loop(); 1047 0 stevel 1048 0 stevel rebind_retry: 1049 0 stevel /* get scf_instance_t */ 1050 0 stevel switch (r = libscf_fmri_get_instance(local_handle, inst->ri_i.i_fmri, 1051 0 stevel &s_inst)) { 1052 0 stevel case 0: 1053 0 stevel break; 1054 0 stevel 1055 0 stevel case ECONNABORTED: 1056 0 stevel libscf_handle_rebind(local_handle); 1057 0 stevel goto rebind_retry; 1058 0 stevel 1059 0 stevel case ENOENT: 1060 0 stevel /* 1061 0 stevel * It's not there, but we need to call this so protocol.c 1062 0 stevel * doesn't think it's in transition anymore. 1063 0 stevel */ 1064 0 stevel (void) restarter_instance_update_states(local_handle, inst, 1065 0 stevel inst->ri_i.i_state, RESTARTER_STATE_NONE, RERR_NONE, 1066 0 stevel NULL); 1067 0 stevel goto out; 1068 0 stevel 1069 0 stevel case EINVAL: 1070 0 stevel case ENOTSUP: 1071 0 stevel default: 1072 0 stevel bad_error("libscf_fmri_get_instance", r); 1073 0 stevel } 1074 0 stevel 1075 0 stevel inst->ri_m_inst = s_inst; 1076 0 stevel inst->ri_mi_deleted = B_FALSE; 1077 0 stevel 1078 0 stevel retry: 1079 0 stevel if (info->sf_method_type == METHOD_START) 1080 0 stevel log_transition(inst, START_REQUESTED); 1081 0 stevel 1082 0 stevel r = method_run(&inst, info->sf_method_type, &exit_code); 1083 0 stevel 1084 0 stevel if (r == 0 && exit_code == 0) { 1085 0 stevel /* Success! */ 1086 0 stevel assert(inst->ri_i.i_next_state != RESTARTER_STATE_NONE); 1087 0 stevel 1088 0 stevel /* 1089 0 stevel * When a stop method succeeds, remove the primary contract of 1090 0 stevel * the service, unless we're going to offline, in which case 1091 0 stevel * retain the contract so we can transfer inherited contracts to 1092 0 stevel * the replacement service. 1093 0 stevel */ 1094 0 stevel 1095 0 stevel if (info->sf_method_type == METHOD_STOP && 1096 0 stevel inst->ri_i.i_primary_ctid != 0) { 1097 0 stevel if (inst->ri_i.i_next_state == RESTARTER_STATE_OFFLINE) 1098 0 stevel inst->ri_i.i_primary_ctid_stopped = 1; 1099 0 stevel else 1100 0 stevel method_remove_contract(inst, B_TRUE, B_TRUE); 1101 0 stevel } 1102 0 stevel /* 1103 0 stevel * We don't care whether the handle was rebound because this is 1104 0 stevel * the last thing we do with it. 1105 0 stevel */ 1106 0 stevel (void) restarter_instance_update_states(local_handle, inst, 1107 0 stevel inst->ri_i.i_next_state, RESTARTER_STATE_NONE, 1108 0 stevel info->sf_event_type, NULL); 1109 0 stevel 1110 0 stevel (void) update_fault_count(inst, FAULT_COUNT_RESET); 1111 0 stevel 1112 0 stevel goto out; 1113 0 stevel } 1114 0 stevel 1115 0 stevel /* Failure. Retry or go to maintenance. */ 1116 0 stevel 1117 0 stevel if (r != 0 && r != EAGAIN) { 1118 0 stevel retryable = B_FALSE; 1119 0 stevel } else { 1120 0 stevel switch (exit_code) { 1121 0 stevel case SMF_EXIT_ERR_CONFIG: 1122 0 stevel case SMF_EXIT_ERR_NOSMF: 1123 0 stevel case SMF_EXIT_ERR_PERM: 1124 0 stevel case SMF_EXIT_ERR_FATAL: 1125 0 stevel retryable = B_FALSE; 1126 0 stevel break; 1127 0 stevel 1128 0 stevel default: 1129 0 stevel retryable = B_TRUE; 1130 0 stevel } 1131 0 stevel } 1132 0 stevel 1133 0 stevel if (retryable && update_fault_count(inst, FAULT_COUNT_INCR) != 1) 1134 0 stevel goto retry; 1135 0 stevel 1136 0 stevel /* maintenance */ 1137 0 stevel if (r == ELOOP) 1138 0 stevel log_transition(inst, START_FAILED_REPEATEDLY); 1139 0 stevel else if (r == ERANGE) 1140 0 stevel log_transition(inst, START_FAILED_TIMEOUT_FATAL); 1141 0 stevel else if (exit_code == SMF_EXIT_ERR_CONFIG) 1142 0 stevel log_transition(inst, START_FAILED_CONFIGURATION); 1143 0 stevel else if (exit_code == SMF_EXIT_ERR_FATAL) 1144 0 stevel log_transition(inst, START_FAILED_FATAL); 1145 0 stevel else 1146 0 stevel log_transition(inst, START_FAILED_OTHER); 1147 0 stevel 1148 0 stevel if (r == ELOOP) 1149 0 stevel aux = "restarting_too_quickly"; 1150 0 stevel else if (retryable) 1151 0 stevel aux = "fault_threshold_reached"; 1152 0 stevel else 1153 0 stevel aux = "method_failed"; 1154 0 stevel 1155 0 stevel (void) restarter_instance_update_states(local_handle, inst, 1156 0 stevel RESTARTER_STATE_MAINT, RESTARTER_STATE_NONE, RERR_FAULT, 1157 0 stevel (char *)aux); 1158 0 stevel 1159 0 stevel if (!method_is_transient(inst, info->sf_method_type) && 1160 0 stevel inst->ri_i.i_primary_ctid != 0) 1161 0 stevel method_remove_contract(inst, B_TRUE, B_TRUE); 1162 0 stevel 1163 0 stevel out: 1164 0 stevel inst->ri_method_thread = 0; 1165 6748 rm88369 1166 6748 rm88369 /* 1167 6748 rm88369 * Unlock the mutex after broadcasting to avoid a race condition 1168 6748 rm88369 * with restarter_delete_inst() when the 'inst' structure is freed. 1169 6748 rm88369 */ 1170 6748 rm88369 (void) pthread_cond_broadcast(&inst->ri_method_cv); 1171 0 stevel MUTEX_UNLOCK(&inst->ri_lock); 1172 0 stevel 1173 0 stevel scf_instance_destroy(s_inst); 1174 0 stevel scf_handle_destroy(local_handle); 1175 0 stevel startd_free(info, sizeof (fork_info_t)); 1176 0 stevel return (NULL); 1177 0 stevel } 1178