1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * DTrace - Dynamic Tracing for Solaris 29 * 30 * This is the implementation of the Solaris Dynamic Tracing framework 31 * (DTrace). The user-visible interface to DTrace is described at length in 32 * the "Solaris Dynamic Tracing Guide". The interfaces between the libdtrace 33 * library, the in-kernel DTrace framework, and the DTrace providers are 34 * described in the block comments in the <sys/dtrace.h> header file. The 35 * internal architecture of DTrace is described in the block comments in the 36 * <sys/dtrace_impl.h> header file. The comments contained within the DTrace 37 * implementation very much assume mastery of all of these sources; if one has 38 * an unanswered question about the implementation, one should consult them 39 * first. 40 * 41 * The functions here are ordered roughly as follows: 42 * 43 * - Probe context functions 44 * - Probe hashing functions 45 * - Non-probe context utility functions 46 * - Matching functions 47 * - Provider-to-Framework API functions 48 * - Probe management functions 49 * - DIF object functions 50 * - Format functions 51 * - Predicate functions 52 * - ECB functions 53 * - Buffer functions 54 * - Enabling functions 55 * - DOF functions 56 * - Anonymous enabling functions 57 * - Consumer state functions 58 * - Helper functions 59 * - Hook functions 60 * - Driver cookbook functions 61 * 62 * Each group of functions begins with a block comment labelled the "DTrace 63 * [Group] Functions", allowing one to find each block by searching forward 64 * on capital-f functions. 65 */ 66 #include <sys/errno.h> 67 #include <sys/stat.h> 68 #include <sys/modctl.h> 69 #include <sys/conf.h> 70 #include <sys/systm.h> 71 #include <sys/ddi.h> 72 #include <sys/sunddi.h> 73 #include <sys/cpuvar.h> 74 #include <sys/kmem.h> 75 #include <sys/strsubr.h> 76 #include <sys/sysmacros.h> 77 #include <sys/dtrace_impl.h> 78 #include <sys/atomic.h> 79 #include <sys/cmn_err.h> 80 #include <sys/mutex_impl.h> 81 #include <sys/rwlock_impl.h> 82 #include <sys/ctf_api.h> 83 #include <sys/panic.h> 84 #include <sys/priv_impl.h> 85 #include <sys/policy.h> 86 #include <sys/cred_impl.h> 87 #include <sys/procfs_isa.h> 88 #include <sys/taskq.h> 89 #include <sys/mkdev.h> 90 #include <sys/kdi.h> 91 #include <sys/zone.h> 92 #include <sys/socket.h> 93 #include <netinet/in.h> 94 95 /* 96 * DTrace Tunable Variables 97 * 98 * The following variables may be tuned by adding a line to /etc/system that 99 * includes both the name of the DTrace module ("dtrace") and the name of the 100 * variable. For example: 101 * 102 * set dtrace:dtrace_destructive_disallow = 1 103 * 104 * In general, the only variables that one should be tuning this way are those 105 * that affect system-wide DTrace behavior, and for which the default behavior 106 * is undesirable. Most of these variables are tunable on a per-consumer 107 * basis using DTrace options, and need not be tuned on a system-wide basis. 108 * When tuning these variables, avoid pathological values; while some attempt 109 * is made to verify the integrity of these variables, they are not considered 110 * part of the supported interface to DTrace, and they are therefore not 111 * checked comprehensively. Further, these variables should not be tuned 112 * dynamically via "mdb -kw" or other means; they should only be tuned via 113 * /etc/system. 114 */ 115 int dtrace_destructive_disallow = 0; 116 dtrace_optval_t dtrace_nonroot_maxsize = (16 * 1024 * 1024); 117 size_t dtrace_difo_maxsize = (256 * 1024); 118 dtrace_optval_t dtrace_dof_maxsize = (256 * 1024); 119 size_t dtrace_global_maxsize = (16 * 1024); 120 size_t dtrace_actions_max = (16 * 1024); 121 size_t dtrace_retain_max = 1024; 122 dtrace_optval_t dtrace_helper_actions_max = 32; 123 dtrace_optval_t dtrace_helper_providers_max = 32; 124 dtrace_optval_t dtrace_dstate_defsize = (1 * 1024 * 1024); 125 size_t dtrace_strsize_default = 256; 126 dtrace_optval_t dtrace_cleanrate_default = 9900990; /* 101 hz */ 127 dtrace_optval_t dtrace_cleanrate_min = 200000; /* 5000 hz */ 128 dtrace_optval_t dtrace_cleanrate_max = (uint64_t)60 * NANOSEC; /* 1/minute */ 129 dtrace_optval_t dtrace_aggrate_default = NANOSEC; /* 1 hz */ 130 dtrace_optval_t dtrace_statusrate_default = NANOSEC; /* 1 hz */ 131 dtrace_optval_t dtrace_statusrate_max = (hrtime_t)10 * NANOSEC; /* 6/minute */ 132 dtrace_optval_t dtrace_switchrate_default = NANOSEC; /* 1 hz */ 133 dtrace_optval_t dtrace_nspec_default = 1; 134 dtrace_optval_t dtrace_specsize_default = 32 * 1024; 135 dtrace_optval_t dtrace_stackframes_default = 20; 136 dtrace_optval_t dtrace_ustackframes_default = 20; 137 dtrace_optval_t dtrace_jstackframes_default = 50; 138 dtrace_optval_t dtrace_jstackstrsize_default = 512; 139 int dtrace_msgdsize_max = 128; 140 hrtime_t dtrace_chill_max = 500 * (NANOSEC / MILLISEC); /* 500 ms */ 141 hrtime_t dtrace_chill_interval = NANOSEC; /* 1000 ms */ 142 int dtrace_devdepth_max = 32; 143 int dtrace_err_verbose; 144 hrtime_t dtrace_deadman_interval = NANOSEC; 145 hrtime_t dtrace_deadman_timeout = (hrtime_t)10 * NANOSEC; 146 hrtime_t dtrace_deadman_user = (hrtime_t)30 * NANOSEC; 147 148 /* 149 * DTrace External Variables 150 * 151 * As dtrace(7D) is a kernel module, any DTrace variables are obviously 152 * available to DTrace consumers via the backtick (`) syntax. One of these, 153 * dtrace_zero, is made deliberately so: it is provided as a source of 154 * well-known, zero-filled memory. While this variable is not documented, 155 * it is used by some translators as an implementation detail. 156 */ 157 const char dtrace_zero[256] = { 0 }; /* zero-filled memory */ 158 159 /* 160 * DTrace Internal Variables 161 */ 162 static dev_info_t *dtrace_devi; /* device info */ 163 static vmem_t *dtrace_arena; /* probe ID arena */ 164 static vmem_t *dtrace_minor; /* minor number arena */ 165 static taskq_t *dtrace_taskq; /* task queue */ 166 static dtrace_probe_t **dtrace_probes; /* array of all probes */ 167 static int dtrace_nprobes; /* number of probes */ 168 static dtrace_provider_t *dtrace_provider; /* provider list */ 169 static dtrace_meta_t *dtrace_meta_pid; /* user-land meta provider */ 170 static int dtrace_opens; /* number of opens */ 171 static int dtrace_helpers; /* number of helpers */ 172 static void *dtrace_softstate; /* softstate pointer */ 173 static dtrace_hash_t *dtrace_bymod; /* probes hashed by module */ 174 static dtrace_hash_t *dtrace_byfunc; /* probes hashed by function */ 175 static dtrace_hash_t *dtrace_byname; /* probes hashed by name */ 176 static dtrace_toxrange_t *dtrace_toxrange; /* toxic range array */ 177 static int dtrace_toxranges; /* number of toxic ranges */ 178 static int dtrace_toxranges_max; /* size of toxic range array */ 179 static dtrace_anon_t dtrace_anon; /* anonymous enabling */ 180 static kmem_cache_t *dtrace_state_cache; /* cache for dynamic state */ 181 static uint64_t dtrace_vtime_references; /* number of vtimestamp refs */ 182 static kthread_t *dtrace_panicked; /* panicking thread */ 183 static dtrace_ecb_t *dtrace_ecb_create_cache; /* cached created ECB */ 184 static dtrace_genid_t dtrace_probegen; /* current probe generation */ 185 static dtrace_helpers_t *dtrace_deferred_pid; /* deferred helper list */ 186 static dtrace_enabling_t *dtrace_retained; /* list of retained enablings */ 187 static dtrace_genid_t dtrace_retained_gen; /* current retained enab gen */ 188 static dtrace_dynvar_t dtrace_dynhash_sink; /* end of dynamic hash chains */ 189 190 /* 191 * DTrace Locking 192 * DTrace is protected by three (relatively coarse-grained) locks: 193 * 194 * (1) dtrace_lock is required to manipulate essentially any DTrace state, 195 * including enabling state, probes, ECBs, consumer state, helper state, 196 * etc. Importantly, dtrace_lock is _not_ required when in probe context; 197 * probe context is lock-free -- synchronization is handled via the 198 * dtrace_sync() cross call mechanism. 199 * 200 * (2) dtrace_provider_lock is required when manipulating provider state, or 201 * when provider state must be held constant. 202 * 203 * (3) dtrace_meta_lock is required when manipulating meta provider state, or 204 * when meta provider state must be held constant. 205 * 206 * The lock ordering between these three locks is dtrace_meta_lock before 207 * dtrace_provider_lock before dtrace_lock. (In particular, there are 208 * several places where dtrace_provider_lock is held by the framework as it 209 * calls into the providers -- which then call back into the framework, 210 * grabbing dtrace_lock.) 211 * 212 * There are two other locks in the mix: mod_lock and cpu_lock. With respect 213 * to dtrace_provider_lock and dtrace_lock, cpu_lock continues its historical 214 * role as a coarse-grained lock; it is acquired before both of these locks. 215 * With respect to dtrace_meta_lock, its behavior is stranger: cpu_lock must 216 * be acquired _between_ dtrace_meta_lock and any other DTrace locks. 217 * mod_lock is similar with respect to dtrace_provider_lock in that it must be 218 * acquired _between_ dtrace_provider_lock and dtrace_lock. 219 */ 220 static kmutex_t dtrace_lock; /* probe state lock */ 221 static kmutex_t dtrace_provider_lock; /* provider state lock */ 222 static kmutex_t dtrace_meta_lock; /* meta-provider state lock */ 223 224 /* 225 * DTrace Provider Variables 226 * 227 * These are the variables relating to DTrace as a provider (that is, the 228 * provider of the BEGIN, END, and ERROR probes). 229 */ 230 static dtrace_pattr_t dtrace_provider_attr = { 231 { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, 232 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, 233 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, 234 { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, 235 { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, 236 }; 237 238 static void 239 dtrace_nullop(void) 240 {} 241 242 static dtrace_pops_t dtrace_provider_ops = { 243 (void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop, 244 (void (*)(void *, struct modctl *))dtrace_nullop, 245 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, 246 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, 247 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, 248 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, 249 NULL, 250 NULL, 251 NULL, 252 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop 253 }; 254 255 static dtrace_id_t dtrace_probeid_begin; /* special BEGIN probe */ 256 static dtrace_id_t dtrace_probeid_end; /* special END probe */ 257 dtrace_id_t dtrace_probeid_error; /* special ERROR probe */ 258 259 /* 260 * DTrace Helper Tracing Variables 261 */ 262 uint32_t dtrace_helptrace_next = 0; 263 uint32_t dtrace_helptrace_nlocals; 264 char *dtrace_helptrace_buffer; 265 int dtrace_helptrace_bufsize = 512 * 1024; 266 267 #ifdef DEBUG 268 int dtrace_helptrace_enabled = 1; 269 #else 270 int dtrace_helptrace_enabled = 0; 271 #endif 272 273 /* 274 * DTrace Error Hashing 275 * 276 * On DEBUG kernels, DTrace will track the errors that has seen in a hash 277 * table. This is very useful for checking coverage of tests that are 278 * expected to induce DIF or DOF processing errors, and may be useful for 279 * debugging problems in the DIF code generator or in DOF generation . The 280 * error hash may be examined with the ::dtrace_errhash MDB dcmd. 281 */ 282 #ifdef DEBUG 283 static dtrace_errhash_t dtrace_errhash[DTRACE_ERRHASHSZ]; 284 static const char *dtrace_errlast; 285 static kthread_t *dtrace_errthread; 286 static kmutex_t dtrace_errlock; 287 #endif 288 289 /* 290 * DTrace Macros and Constants 291 * 292 * These are various macros that are useful in various spots in the 293 * implementation, along with a few random constants that have no meaning 294 * outside of the implementation. There is no real structure to this cpp 295 * mishmash -- but is there ever? 296 */ 297 #define DTRACE_HASHSTR(hash, probe) \ 298 dtrace_hash_str(*((char **)((uintptr_t)(probe) + (hash)->dth_stroffs))) 299 300 #define DTRACE_HASHNEXT(hash, probe) \ 301 (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_nextoffs) 302 303 #define DTRACE_HASHPREV(hash, probe) \ 304 (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_prevoffs) 305 306 #define DTRACE_HASHEQ(hash, lhs, rhs) \ 307 (strcmp(*((char **)((uintptr_t)(lhs) + (hash)->dth_stroffs)), \ 308 *((char **)((uintptr_t)(rhs) + (hash)->dth_stroffs))) == 0) 309 310 #define DTRACE_AGGHASHSIZE_SLEW 17 311 312 #define DTRACE_V4MAPPED_OFFSET (sizeof (uint32_t) * 3) 313 314 /* 315 * The key for a thread-local variable consists of the lower 61 bits of the 316 * t_did, plus the 3 bits of the highest active interrupt above LOCK_LEVEL. 317 * We add DIF_VARIABLE_MAX to t_did to assure that the thread key is never 318 * equal to a variable identifier. This is necessary (but not sufficient) to 319 * assure that global associative arrays never collide with thread-local 320 * variables. To guarantee that they cannot collide, we must also define the 321 * order for keying dynamic variables. That order is: 322 * 323 * [ key0 ] ... [ keyn ] [ variable-key ] [ tls-key ] 324 * 325 * Because the variable-key and the tls-key are in orthogonal spaces, there is 326 * no way for a global variable key signature to match a thread-local key 327 * signature. 328 */ 329 #define DTRACE_TLS_THRKEY(where) { \ 330 uint_t intr = 0; \ 331 uint_t actv = CPU->cpu_intr_actv >> (LOCK_LEVEL + 1); \ 332 for (; actv; actv >>= 1) \ 333 intr++; \ 334 ASSERT(intr < (1 << 3)); \ 335 (where) = ((curthread->t_did + DIF_VARIABLE_MAX) & \ 336 (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \ 337 } 338 339 #define DT_BSWAP_8(x) ((x) & 0xff) 340 #define DT_BSWAP_16(x) ((DT_BSWAP_8(x) << 8) | DT_BSWAP_8((x) >> 8)) 341 #define DT_BSWAP_32(x) ((DT_BSWAP_16(x) << 16) | DT_BSWAP_16((x) >> 16)) 342 #define DT_BSWAP_64(x) ((DT_BSWAP_32(x) << 32) | DT_BSWAP_32((x) >> 32)) 343 344 #define DT_MASK_LO 0x00000000FFFFFFFFULL 345 346 #define DTRACE_STORE(type, tomax, offset, what) \ 347 *((type *)((uintptr_t)(tomax) + (uintptr_t)offset)) = (type)(what); 348 349 #ifndef __i386 350 #define DTRACE_ALIGNCHECK(addr, size, flags) \ 351 if (addr & (size - 1)) { \ 352 *flags |= CPU_DTRACE_BADALIGN; \ 353 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \ 354 return (0); \ 355 } 356 #else 357 #define DTRACE_ALIGNCHECK(addr, size, flags) 358 #endif 359 360 /* 361 * Test whether a range of memory starting at testaddr of size testsz falls 362 * within the range of memory described by addr, sz. We take care to avoid 363 * problems with overflow and underflow of the unsigned quantities, and 364 * disallow all negative sizes. Ranges of size 0 are allowed. 365 */ 366 #define DTRACE_INRANGE(testaddr, testsz, baseaddr, basesz) \ 367 ((testaddr) - (baseaddr) < (basesz) && \ 368 (testaddr) + (testsz) - (baseaddr) <= (basesz) && \ 369 (testaddr) + (testsz) >= (testaddr)) 370 371 /* 372 * Test whether alloc_sz bytes will fit in the scratch region. We isolate 373 * alloc_sz on the righthand side of the comparison in order to avoid overflow 374 * or underflow in the comparison with it. This is simpler than the INRANGE 375 * check above, because we know that the dtms_scratch_ptr is valid in the 376 * range. Allocations of size zero are allowed. 377 */ 378 #define DTRACE_INSCRATCH(mstate, alloc_sz) \ 379 ((mstate)->dtms_scratch_base + (mstate)->dtms_scratch_size - \ 380 (mstate)->dtms_scratch_ptr >= (alloc_sz)) 381 382 #define DTRACE_LOADFUNC(bits) \ 383 /*CSTYLED*/ \ 384 uint##bits##_t \ 385 dtrace_load##bits(uintptr_t addr) \ 386 { \ 387 size_t size = bits / NBBY; \ 388 /*CSTYLED*/ \ 389 uint##bits##_t rval; \ 390 int i; \ 391 volatile uint16_t *flags = (volatile uint16_t *) \ 392 &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; \ 393 \ 394 DTRACE_ALIGNCHECK(addr, size, flags); \ 395 \ 396 for (i = 0; i < dtrace_toxranges; i++) { \ 397 if (addr >= dtrace_toxrange[i].dtt_limit) \ 398 continue; \ 399 \ 400 if (addr + size <= dtrace_toxrange[i].dtt_base) \ 401 continue; \ 402 \ 403 /* \ 404 * This address falls within a toxic region; return 0. \ 405 */ \ 406 *flags |= CPU_DTRACE_BADADDR; \ 407 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \ 408 return (0); \ 409 } \ 410 \ 411 *flags |= CPU_DTRACE_NOFAULT; \ 412 /*CSTYLED*/ \ 413 rval = *((volatile uint##bits##_t *)addr); \ 414 *flags &= ~CPU_DTRACE_NOFAULT; \ 415 \ 416 return (!(*flags & CPU_DTRACE_FAULT) ? rval : 0); \ 417 } 418 419 #ifdef _LP64 420 #define dtrace_loadptr dtrace_load64 421 #else 422 #define dtrace_loadptr dtrace_load32 423 #endif 424 425 #define DTRACE_DYNHASH_FREE 0 426 #define DTRACE_DYNHASH_SINK 1 427 #define DTRACE_DYNHASH_VALID 2 428 429 #define DTRACE_MATCH_NEXT 0 430 #define DTRACE_MATCH_DONE 1 431 #define DTRACE_ANCHORED(probe) ((probe)->dtpr_func[0] != '\0') 432 #define DTRACE_STATE_ALIGN 64 433 434 #define DTRACE_FLAGS2FLT(flags) \ 435 (((flags) & CPU_DTRACE_BADADDR) ? DTRACEFLT_BADADDR : \ 436 ((flags) & CPU_DTRACE_ILLOP) ? DTRACEFLT_ILLOP : \ 437 ((flags) & CPU_DTRACE_DIVZERO) ? DTRACEFLT_DIVZERO : \ 438 ((flags) & CPU_DTRACE_KPRIV) ? DTRACEFLT_KPRIV : \ 439 ((flags) & CPU_DTRACE_UPRIV) ? DTRACEFLT_UPRIV : \ 440 ((flags) & CPU_DTRACE_TUPOFLOW) ? DTRACEFLT_TUPOFLOW : \ 441 ((flags) & CPU_DTRACE_BADALIGN) ? DTRACEFLT_BADALIGN : \ 442 ((flags) & CPU_DTRACE_NOSCRATCH) ? DTRACEFLT_NOSCRATCH : \ 443 ((flags) & CPU_DTRACE_BADSTACK) ? DTRACEFLT_BADSTACK : \ 444 DTRACEFLT_UNKNOWN) 445 446 #define DTRACEACT_ISSTRING(act) \ 447 ((act)->dta_kind == DTRACEACT_DIFEXPR && \ 448 (act)->dta_difo->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) 449 450 static size_t dtrace_strlen(const char *, size_t); 451 static dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id); 452 static void dtrace_enabling_provide(dtrace_provider_t *); 453 static int dtrace_enabling_match(dtrace_enabling_t *, int *); 454 static void dtrace_enabling_matchall(void); 455 static dtrace_state_t *dtrace_anon_grab(void); 456 static uint64_t dtrace_helper(int, dtrace_mstate_t *, 457 dtrace_state_t *, uint64_t, uint64_t); 458 static dtrace_helpers_t *dtrace_helpers_create(proc_t *); 459 static void dtrace_buffer_drop(dtrace_buffer_t *); 460 static intptr_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t, 461 dtrace_state_t *, dtrace_mstate_t *); 462 static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t, 463 dtrace_optval_t); 464 static int dtrace_ecb_create_enable(dtrace_probe_t *, void *); 465 static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *); 466 467 /* 468 * DTrace Probe Context Functions 469 * 470 * These functions are called from probe context. Because probe context is 471 * any context in which C may be called, arbitrarily locks may be held, 472 * interrupts may be disabled, we may be in arbitrary dispatched state, etc. 473 * As a result, functions called from probe context may only call other DTrace 474 * support functions -- they may not interact at all with the system at large. 475 * (Note that the ASSERT macro is made probe-context safe by redefining it in 476 * terms of dtrace_assfail(), a probe-context safe function.) If arbitrary 477 * loads are to be performed from probe context, they _must_ be in terms of 478 * the safe dtrace_load*() variants. 479 * 480 * Some functions in this block are not actually called from probe context; 481 * for these functions, there will be a comment above the function reading 482 * "Note: not called from probe context." 483 */ 484 void 485 dtrace_panic(const char *format, ...) 486 { 487 va_list alist; 488 489 va_start(alist, format); 490 dtrace_vpanic(format, alist); 491 va_end(alist); 492 } 493 494 int 495 dtrace_assfail(const char *a, const char *f, int l) 496 { 497 dtrace_panic("assertion failed: %s, file: %s, line: %d", a, f, l); 498 499 /* 500 * We just need something here that even the most clever compiler 501 * cannot optimize away. 502 */ 503 return (a[(uintptr_t)f]); 504 } 505 506 /* 507 * Atomically increment a specified error counter from probe context. 508 */ 509 static void 510 dtrace_error(uint32_t *counter) 511 { 512 /* 513 * Most counters stored to in probe context are per-CPU counters. 514 * However, there are some error conditions that are sufficiently 515 * arcane that they don't merit per-CPU storage. If these counters 516 * are incremented concurrently on different CPUs, scalability will be 517 * adversely affected -- but we don't expect them to be white-hot in a 518 * correctly constructed enabling... 519 */ 520 uint32_t oval, nval; 521 522 do { 523 oval = *counter; 524 525 if ((nval = oval + 1) == 0) { 526 /* 527 * If the counter would wrap, set it to 1 -- assuring 528 * that the counter is never zero when we have seen 529 * errors. (The counter must be 32-bits because we 530 * aren't guaranteed a 64-bit compare&swap operation.) 531 * To save this code both the infamy of being fingered 532 * by a priggish news story and the indignity of being 533 * the target of a neo-puritan witch trial, we're 534 * carefully avoiding any colorful description of the 535 * likelihood of this condition -- but suffice it to 536 * say that it is only slightly more likely than the 537 * overflow of predicate cache IDs, as discussed in 538 * dtrace_predicate_create(). 539 */ 540 nval = 1; 541 } 542 } while (dtrace_cas32(counter, oval, nval) != oval); 543 } 544 545 /* 546 * Use the DTRACE_LOADFUNC macro to define functions for each of loading a 547 * uint8_t, a uint16_t, a uint32_t and a uint64_t. 548 */ 549 DTRACE_LOADFUNC(8) 550 DTRACE_LOADFUNC(16) 551 DTRACE_LOADFUNC(32) 552 DTRACE_LOADFUNC(64) 553 554 static int 555 dtrace_inscratch(uintptr_t dest, size_t size, dtrace_mstate_t *mstate) 556 { 557 if (dest < mstate->dtms_scratch_base) 558 return (0); 559 560 if (dest + size < dest) 561 return (0); 562 563 if (dest + size > mstate->dtms_scratch_ptr) 564 return (0); 565 566 return (1); 567 } 568 569 static int 570 dtrace_canstore_statvar(uint64_t addr, size_t sz, 571 dtrace_statvar_t **svars, int nsvars) 572 { 573 int i; 574 575 for (i = 0; i < nsvars; i++) { 576 dtrace_statvar_t *svar = svars[i]; 577 578 if (svar == NULL || svar->dtsv_size == 0) 579 continue; 580 581 if (DTRACE_INRANGE(addr, sz, svar->dtsv_data, svar->dtsv_size)) 582 return (1); 583 } 584 585 return (0); 586 } 587 588 /* 589 * Check to see if the address is within a memory region to which a store may 590 * be issued. This includes the DTrace scratch areas, and any DTrace variable 591 * region. The caller of dtrace_canstore() is responsible for performing any 592 * alignment checks that are needed before stores are actually executed. 593 */ 594 static int 595 dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, 596 dtrace_vstate_t *vstate) 597 { 598 /* 599 * First, check to see if the address is in scratch space... 600 */ 601 if (DTRACE_INRANGE(addr, sz, mstate->dtms_scratch_base, 602 mstate->dtms_scratch_size)) 603 return (1); 604 605 /* 606 * Now check to see if it's a dynamic variable. This check will pick 607 * up both thread-local variables and any global dynamically-allocated 608 * variables. 609 */ 610 if (DTRACE_INRANGE(addr, sz, (uintptr_t)vstate->dtvs_dynvars.dtds_base, 611 vstate->dtvs_dynvars.dtds_size)) { 612 dtrace_dstate_t *dstate = &vstate->dtvs_dynvars; 613 uintptr_t base = (uintptr_t)dstate->dtds_base + 614 (dstate->dtds_hashsize * sizeof (dtrace_dynhash_t)); 615 uintptr_t chunkoffs; 616 617 /* 618 * Before we assume that we can store here, we need to make 619 * sure that it isn't in our metadata -- storing to our 620 * dynamic variable metadata would corrupt our state. For 621 * the range to not include any dynamic variable metadata, 622 * it must: 623 * 624 * (1) Start above the hash table that is at the base of 625 * the dynamic variable space 626 * 627 * (2) Have a starting chunk offset that is beyond the 628 * dtrace_dynvar_t that is at the base of every chunk 629 * 630 * (3) Not span a chunk boundary 631 * 632 */ 633 if (addr < base) 634 return (0); 635 636 chunkoffs = (addr - base) % dstate->dtds_chunksize; 637 638 if (chunkoffs < sizeof (dtrace_dynvar_t)) 639 return (0); 640 641 if (chunkoffs + sz > dstate->dtds_chunksize) 642 return (0); 643 644 return (1); 645 } 646 647 /* 648 * Finally, check the static local and global variables. These checks 649 * take the longest, so we perform them last. 650 */ 651 if (dtrace_canstore_statvar(addr, sz, 652 vstate->dtvs_locals, vstate->dtvs_nlocals)) 653 return (1); 654 655 if (dtrace_canstore_statvar(addr, sz, 656 vstate->dtvs_globals, vstate->dtvs_nglobals)) 657 return (1); 658 659 return (0); 660 } 661 662 663 /* 664 * Convenience routine to check to see if the address is within a memory 665 * region in which a load may be issued given the user's privilege level; 666 * if not, it sets the appropriate error flags and loads 'addr' into the 667 * illegal value slot. 668 * 669 * DTrace subroutines (DIF_SUBR_*) should use this helper to implement 670 * appropriate memory access protection. 671 */ 672 static int 673 dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, 674 dtrace_vstate_t *vstate) 675 { 676 volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval; 677 678 /* 679 * If we hold the privilege to read from kernel memory, then 680 * everything is readable. 681 */ 682 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) 683 return (1); 684 685 /* 686 * You can obviously read that which you can store. 687 */ 688 if (dtrace_canstore(addr, sz, mstate, vstate)) 689 return (1); 690 691 /* 692 * We're allowed to read from our own string table. 693 */ 694 if (DTRACE_INRANGE(addr, sz, (uintptr_t)mstate->dtms_difo->dtdo_strtab, 695 mstate->dtms_difo->dtdo_strlen)) 696 return (1); 697 698 DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV); 699 *illval = addr; 700 return (0); 701 } 702 703 /* 704 * Convenience routine to check to see if a given string is within a memory 705 * region in which a load may be issued given the user's privilege level; 706 * this exists so that we don't need to issue unnecessary dtrace_strlen() 707 * calls in the event that the user has all privileges. 708 */ 709 static int 710 dtrace_strcanload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, 711 dtrace_vstate_t *vstate) 712 { 713 size_t strsz; 714 715 /* 716 * If we hold the privilege to read from kernel memory, then 717 * everything is readable. 718 */ 719 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) 720 return (1); 721 722 strsz = 1 + dtrace_strlen((char *)(uintptr_t)addr, sz); 723 if (dtrace_canload(addr, strsz, mstate, vstate)) 724 return (1); 725 726 return (0); 727 } 728 729 /* 730 * Convenience routine to check to see if a given variable is within a memory 731 * region in which a load may be issued given the user's privilege level. 732 */ 733 static int 734 dtrace_vcanload(void *src, dtrace_diftype_t *type, dtrace_mstate_t *mstate, 735 dtrace_vstate_t *vstate) 736 { 737 size_t sz; 738 ASSERT(type->dtdt_flags & DIF_TF_BYREF); 739 740 /* 741 * If we hold the privilege to read from kernel memory, then 742 * everything is readable. 743 */ 744 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) 745 return (1); 746 747 if (type->dtdt_kind == DIF_TYPE_STRING) 748 sz = dtrace_strlen(src, 749 vstate->dtvs_state->dts_options[DTRACEOPT_STRSIZE]) + 1; 750 else 751 sz = type->dtdt_size; 752 753 return (dtrace_canload((uintptr_t)src, sz, mstate, vstate)); 754 } 755 756 /* 757 * Compare two strings using safe loads. 758 */ 759 static int 760 dtrace_strncmp(char *s1, char *s2, size_t limit) 761 { 762 uint8_t c1, c2; 763 volatile uint16_t *flags; 764 765 if (s1 == s2 || limit == 0) 766 return (0); 767 768 flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags; 769 770 do { 771 if (s1 == NULL) { 772 c1 = '\0'; 773 } else { 774 c1 = dtrace_load8((uintptr_t)s1++); 775 } 776 777 if (s2 == NULL) { 778 c2 = '\0'; 779 } else { 780 c2 = dtrace_load8((uintptr_t)s2++); 781 } 782 783 if (c1 != c2) 784 return (c1 - c2); 785 } while (--limit && c1 != '\0' && !(*flags & CPU_DTRACE_FAULT)); 786 787 return (0); 788 } 789 790 /* 791 * Compute strlen(s) for a string using safe memory accesses. The additional 792 * len parameter is used to specify a maximum length to ensure completion. 793 */ 794 static size_t 795 dtrace_strlen(const char *s, size_t lim) 796 { 797 uint_t len; 798 799 for (len = 0; len != lim; len++) { 800 if (dtrace_load8((uintptr_t)s++) == '\0') 801 break; 802 } 803 804 return (len); 805 } 806 807 /* 808 * Check if an address falls within a toxic region. 809 */ 810 static int 811 dtrace_istoxic(uintptr_t kaddr, size_t size) 812 { 813 uintptr_t taddr, tsize; 814 int i; 815 816 for (i = 0; i < dtrace_toxranges; i++) { 817 taddr = dtrace_toxrange[i].dtt_base; 818 tsize = dtrace_toxrange[i].dtt_limit - taddr; 819 820 if (kaddr - taddr < tsize) { 821 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); 822 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = kaddr; 823 return (1); 824 } 825 826 if (taddr - kaddr < size) { 827 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); 828 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = taddr; 829 return (1); 830 } 831 } 832 833 return (0); 834 } 835 836 /* 837 * Copy src to dst using safe memory accesses. The src is assumed to be unsafe 838 * memory specified by the DIF program. The dst is assumed to be safe memory 839 * that we can store to directly because it is managed by DTrace. As with 840 * standard bcopy, overlapping copies are handled properly. 841 */ 842 static void 843 dtrace_bcopy(const void *src, void *dst, size_t len) 844 { 845 if (len != 0) { 846 uint8_t *s1 = dst; 847 const uint8_t *s2 = src; 848 849 if (s1 <= s2) { 850 do { 851 *s1++ = dtrace_load8((uintptr_t)s2++); 852 } while (--len != 0); 853 } else { 854 s2 += len; 855 s1 += len; 856 857 do { 858 *--s1 = dtrace_load8((uintptr_t)--s2); 859 } while (--len != 0); 860 } 861 } 862 } 863 864 /* 865 * Copy src to dst using safe memory accesses, up to either the specified 866 * length, or the point that a nul byte is encountered. The src is assumed to 867 * be unsafe memory specified by the DIF program. The dst is assumed to be 868 * safe memory that we can store to directly because it is managed by DTrace. 869 * Unlike dtrace_bcopy(), overlapping regions are not handled. 870 */ 871 static void 872 dtrace_strcpy(const void *src, void *dst, size_t len) 873 { 874 if (len != 0) { 875 uint8_t *s1 = dst, c; 876 const uint8_t *s2 = src; 877 878 do { 879 *s1++ = c = dtrace_load8((uintptr_t)s2++); 880 } while (--len != 0 && c != '\0'); 881 } 882 } 883 884 /* 885 * Copy src to dst, deriving the size and type from the specified (BYREF) 886 * variable type. The src is assumed to be unsafe memory specified by the DIF 887 * program. The dst is assumed to be DTrace variable memory that is of the 888 * specified type; we assume that we can store to directly. 889 */ 890 static void 891 dtrace_vcopy(void *src, void *dst, dtrace_diftype_t *type) 892 { 893 ASSERT(type->dtdt_flags & DIF_TF_BYREF); 894 895 if (type->dtdt_kind == DIF_TYPE_STRING) { 896 dtrace_strcpy(src, dst, type->dtdt_size); 897 } else { 898 dtrace_bcopy(src, dst, type->dtdt_size); 899 } 900 } 901 902 /* 903 * Compare s1 to s2 using safe memory accesses. The s1 data is assumed to be 904 * unsafe memory specified by the DIF program. The s2 data is assumed to be 905 * safe memory that we can access directly because it is managed by DTrace. 906 */ 907 static int 908 dtrace_bcmp(const void *s1, const void *s2, size_t len) 909 { 910 volatile uint16_t *flags; 911 912 flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags; 913 914 if (s1 == s2) 915 return (0); 916 917 if (s1 == NULL || s2 == NULL) 918 return (1); 919 920 if (s1 != s2 && len != 0) { 921 const uint8_t *ps1 = s1; 922 const uint8_t *ps2 = s2; 923 924 do { 925 if (dtrace_load8((uintptr_t)ps1++) != *ps2++) 926 return (1); 927 } while (--len != 0 && !(*flags & CPU_DTRACE_FAULT)); 928 } 929 return (0); 930 } 931 932 /* 933 * Zero the specified region using a simple byte-by-byte loop. Note that this 934 * is for safe DTrace-managed memory only. 935 */ 936 static void 937 dtrace_bzero(void *dst, size_t len) 938 { 939 uchar_t *cp; 940 941 for (cp = dst; len != 0; len--) 942 *cp++ = 0; 943 } 944 945 static void 946 dtrace_add_128(uint64_t *addend1, uint64_t *addend2, uint64_t *sum) 947 { 948 uint64_t result[2]; 949 950 result[0] = addend1[0] + addend2[0]; 951 result[1] = addend1[1] + addend2[1] + 952 (result[0] < addend1[0] || result[0] < addend2[0] ? 1 : 0); 953 954 sum[0] = result[0]; 955 sum[1] = result[1]; 956 } 957 958 /* 959 * Shift the 128-bit value in a by b. If b is positive, shift left. 960 * If b is negative, shift right. 961 */ 962 static void 963 dtrace_shift_128(uint64_t *a, int b) 964 { 965 uint64_t mask; 966 967 if (b == 0) 968 return; 969 970 if (b < 0) { 971 b = -b; 972 if (b >= 64) { 973 a[0] = a[1] >> (b - 64); 974 a[1] = 0; 975 } else { 976 a[0] >>= b; 977 mask = 1LL << (64 - b); 978 mask -= 1; 979 a[0] |= ((a[1] & mask) << (64 - b)); 980 a[1] >>= b; 981 } 982 } else { 983 if (b >= 64) { 984 a[1] = a[0] << (b - 64); 985 a[0] = 0; 986 } else { 987 a[1] <<= b; 988 mask = a[0] >> (64 - b); 989 a[1] |= mask; 990 a[0] <<= b; 991 } 992 } 993 } 994 995 /* 996 * The basic idea is to break the 2 64-bit values into 4 32-bit values, 997 * use native multiplication on those, and then re-combine into the 998 * resulting 128-bit value. 999 * 1000 * (hi1 << 32 + lo1) * (hi2 << 32 + lo2) = 1001 * hi1 * hi2 << 64 + 1002 * hi1 * lo2 << 32 + 1003 * hi2 * lo1 << 32 + 1004 * lo1 * lo2 1005 */ 1006 static void 1007 dtrace_multiply_128(uint64_t factor1, uint64_t factor2, uint64_t *product) 1008 { 1009 uint64_t hi1, hi2, lo1, lo2; 1010 uint64_t tmp[2]; 1011 1012 hi1 = factor1 >> 32; 1013 hi2 = factor2 >> 32; 1014 1015 lo1 = factor1 & DT_MASK_LO; 1016 lo2 = factor2 & DT_MASK_LO; 1017 1018 product[0] = lo1 * lo2; 1019 product[1] = hi1 * hi2; 1020 1021 tmp[0] = hi1 * lo2; 1022 tmp[1] = 0; 1023 dtrace_shift_128(tmp, 32); 1024 dtrace_add_128(product, tmp, product); 1025 1026 tmp[0] = hi2 * lo1; 1027 tmp[1] = 0; 1028 dtrace_shift_128(tmp, 32); 1029 dtrace_add_128(product, tmp, product); 1030 } 1031 1032 /* 1033 * This privilege check should be used by actions and subroutines to 1034 * verify that the user credentials of the process that enabled the 1035 * invoking ECB match the target credentials 1036 */ 1037 static int 1038 dtrace_priv_proc_common_user(dtrace_state_t *state) 1039 { 1040 cred_t *cr, *s_cr = state->dts_cred.dcr_cred; 1041 1042 /* 1043 * We should always have a non-NULL state cred here, since if cred 1044 * is null (anonymous tracing), we fast-path bypass this routine. 1045 */ 1046 ASSERT(s_cr != NULL); 1047 1048 if ((cr = CRED()) != NULL && 1049 s_cr->cr_uid == cr->cr_uid && 1050 s_cr->cr_uid == cr->cr_ruid && 1051 s_cr->cr_uid ==