1 0 stevel /* 2 0 stevel * CDDL HEADER START 3 0 stevel * 4 0 stevel * The contents of this file are subject to the terms of the 5 3446 mrj * Common Development and Distribution License (the "License"). 6 3446 mrj * You may not use this file except in compliance with the License. 7 0 stevel * 8 0 stevel * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 0 stevel * or http://www.opensolaris.org/os/licensing. 10 0 stevel * See the License for the specific language governing permissions 11 0 stevel * and limitations under the License. 12 0 stevel * 13 0 stevel * When distributing Covered Code, include this CDDL HEADER in each 14 0 stevel * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 0 stevel * If applicable, add the following below this CDDL HEADER, with the 16 0 stevel * fields enclosed by brackets "[]" replaced with your own identifying 17 0 stevel * information: Portions Copyright [yyyy] [name of copyright owner] 18 0 stevel * 19 0 stevel * CDDL HEADER END 20 0 stevel */ 21 7563 Prasad 22 0 stevel /* 23 9889 Larry * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 0 stevel * Use is subject to license terms. 25 0 stevel */ 26 0 stevel 27 0 stevel #include <sys/types.h> 28 0 stevel #include <sys/param.h> 29 0 stevel #include <sys/systm.h> 30 0 stevel #include <sys/vm.h> 31 0 stevel #include <sys/proc.h> 32 0 stevel #include <sys/file.h> 33 0 stevel #include <sys/conf.h> 34 0 stevel #include <sys/kmem.h> 35 0 stevel #include <sys/mem.h> 36 0 stevel #include <sys/mman.h> 37 0 stevel #include <sys/vnode.h> 38 0 stevel #include <sys/errno.h> 39 0 stevel #include <sys/memlist.h> 40 0 stevel #include <sys/dumphdr.h> 41 0 stevel #include <sys/dumpadm.h> 42 0 stevel #include <sys/ksyms.h> 43 0 stevel #include <sys/compress.h> 44 0 stevel #include <sys/stream.h> 45 0 stevel #include <sys/strsun.h> 46 0 stevel #include <sys/cmn_err.h> 47 0 stevel #include <sys/bitmap.h> 48 0 stevel #include <sys/modctl.h> 49 0 stevel #include <sys/utsname.h> 50 0 stevel #include <sys/systeminfo.h> 51 0 stevel #include <sys/vmem.h> 52 0 stevel #include <sys/log.h> 53 0 stevel #include <sys/var.h> 54 0 stevel #include <sys/debug.h> 55 0 stevel #include <sys/sunddi.h> 56 0 stevel #include <fs/fs_subr.h> 57 0 stevel #include <sys/fs/snode.h> 58 0 stevel #include <sys/ontrap.h> 59 0 stevel #include <sys/panic.h> 60 0 stevel #include <sys/dkio.h> 61 0 stevel #include <sys/vtoc.h> 62 0 stevel #include <sys/errorq.h> 63 0 stevel #include <sys/fm/util.h> 64 6423 gw25295 #include <sys/fs/zfs.h> 65 0 stevel 66 0 stevel #include <vm/hat.h> 67 0 stevel #include <vm/as.h> 68 0 stevel #include <vm/page.h> 69 10843 Dave #include <vm/pvn.h> 70 0 stevel #include <vm/seg.h> 71 0 stevel #include <vm/seg_kmem.h> 72 11066 rafael #include <sys/clock_impl.h> 73 0 stevel 74 10843 Dave #include <bzip2/bzlib.h> 75 0 stevel 76 0 stevel /* 77 10843 Dave * Crash dump time is dominated by disk write time. To reduce this, 78 10843 Dave * the stronger compression method bzip2 is applied to reduce the dump 79 10843 Dave * size and hence reduce I/O time. However, bzip2 is much more 80 10843 Dave * computationally expensive than the existing lzjb algorithm, so to 81 10843 Dave * avoid increasing compression time, CPUs that are otherwise idle 82 10843 Dave * during panic are employed to parallelize the compression task. 83 10843 Dave * Many helper CPUs are needed to prevent bzip2 from being a 84 10843 Dave * bottleneck, and on systems with too few CPUs, the lzjb algorithm is 85 10843 Dave * parallelized instead. Lastly, I/O and compression are performed by 86 10843 Dave * different CPUs, and are hence overlapped in time, unlike the older 87 10843 Dave * serial code. 88 10843 Dave * 89 10843 Dave * Another important consideration is the speed of the dump 90 10843 Dave * device. Faster disks need less CPUs in order to benefit from 91 10843 Dave * parallel lzjb versus parallel bzip2. Therefore, the CPU count 92 10843 Dave * threshold for switching from parallel lzjb to paralled bzip2 is 93 10843 Dave * elevated for faster disks. The dump device speed is adduced from 94 10843 Dave * the setting for dumpbuf.iosize, see dump_update_clevel. 95 0 stevel */ 96 10843 Dave 97 10843 Dave /* 98 10843 Dave * exported vars 99 10843 Dave */ 100 10843 Dave kmutex_t dump_lock; /* lock for dump configuration */ 101 10843 Dave dumphdr_t *dumphdr; /* dump header */ 102 10843 Dave int dump_conflags = DUMP_KERNEL; /* dump configuration flags */ 103 10843 Dave vnode_t *dumpvp; /* dump device vnode pointer */ 104 10843 Dave u_offset_t dumpvp_size; /* size of dump device, in bytes */ 105 10843 Dave char *dumppath; /* pathname of dump device */ 106 10843 Dave int dump_timeout = 120; /* timeout for dumping pages */ 107 10843 Dave int dump_timeleft; /* portion of dump_timeout remaining */ 108 10843 Dave int dump_ioerr; /* dump i/o error */ 109 10843 Dave int dump_check_used; /* enable check for used pages */ 110 10843 Dave 111 10843 Dave /* 112 10843 Dave * Tunables for dump compression and parallelism. These can be set via 113 10843 Dave * /etc/system. 114 10843 Dave * 115 10843 Dave * dump_ncpu_low number of helpers for parallel lzjb 116 10843 Dave * This is also the minimum configuration. 117 10843 Dave * 118 10843 Dave * dump_bzip2_level bzip2 compression level: 1-9 119 10843 Dave * Higher numbers give greater compression, but take more memory 120 10843 Dave * and time. Memory used per helper is ~(dump_bzip2_level * 1MB). 121 10843 Dave * 122 10843 Dave * dump_plat_mincpu the cross-over limit for using bzip2 (per platform): 123 10843 Dave * if dump_plat_mincpu == 0, then always do single threaded dump 124 10843 Dave * if ncpu >= dump_plat_mincpu then try to use bzip2 125 10843 Dave * 126 10843 Dave * dump_metrics_on if set, metrics are collected in the kernel, passed 127 10843 Dave * to savecore via the dump file, and recorded by savecore in 128 10843 Dave * METRICS.txt. 129 10843 Dave */ 130 10843 Dave uint_t dump_ncpu_low = 4; /* minimum config for parallel lzjb */ 131 10843 Dave uint_t dump_bzip2_level = 1; /* bzip2 level (1-9) */ 132 11178 Dave 133 11178 Dave /* tunables for pre-reserved heap */ 134 11178 Dave uint_t dump_kmem_permap = 1024; 135 11178 Dave uint_t dump_kmem_pages = 8; 136 10843 Dave 137 10843 Dave /* Define multiple buffers per helper to avoid stalling */ 138 10843 Dave #define NCBUF_PER_HELPER 2 139 10843 Dave #define NCMAP_PER_HELPER 4 140 10843 Dave 141 10843 Dave /* minimum number of helpers configured */ 142 10843 Dave #define MINHELPERS (dump_ncpu_low) 143 10843 Dave #define MINCBUFS (MINHELPERS * NCBUF_PER_HELPER) 144 10843 Dave 145 10843 Dave /* 146 10843 Dave * Define constant parameters. 147 10843 Dave * 148 10843 Dave * CBUF_SIZE size of an output buffer 149 10843 Dave * 150 10843 Dave * CBUF_MAPSIZE size of virtual range for mapping pages 151 10843 Dave * 152 10843 Dave * CBUF_MAPNP size of virtual range in pages 153 10843 Dave * 154 10843 Dave */ 155 10843 Dave #define DUMP_1KB ((size_t)1 << 10) 156 10843 Dave #define DUMP_1MB ((size_t)1 << 20) 157 10843 Dave #define CBUF_SIZE ((size_t)1 << 17) 158 10843 Dave #define CBUF_MAPSHIFT (22) 159 10843 Dave #define CBUF_MAPSIZE ((size_t)1 << CBUF_MAPSHIFT) 160 10843 Dave #define CBUF_MAPNP ((size_t)1 << (CBUF_MAPSHIFT - PAGESHIFT)) 161 10843 Dave 162 10843 Dave /* 163 10843 Dave * Compression metrics are accumulated nano-second subtotals. The 164 10843 Dave * results are normalized by the number of pages dumped. A report is 165 10843 Dave * generated when dumpsys() completes and is saved in the dump image 166 10843 Dave * after the trailing dump header. 167 10843 Dave * 168 10843 Dave * Metrics are always collected. Set the variable dump_metrics_on to 169 10843 Dave * cause metrics to be saved in the crash file, where savecore will 170 10843 Dave * save it in the file METRICS.txt. 171 10843 Dave */ 172 10843 Dave #define PERPAGES \ 173 10843 Dave PERPAGE(bitmap) PERPAGE(map) PERPAGE(unmap) \ 174 10843 Dave PERPAGE(copy) PERPAGE(compress) \ 175 10843 Dave PERPAGE(write) \ 176 10843 Dave PERPAGE(inwait) PERPAGE(outwait) 177 10843 Dave 178 10843 Dave typedef struct perpage { 179 10843 Dave #define PERPAGE(x) hrtime_t x; 180 10843 Dave PERPAGES 181 10843 Dave #undef PERPAGE 182 10843 Dave } perpage_t; 183 10843 Dave 184 10843 Dave /* 185 10843 Dave * This macro controls the code generation for collecting dump 186 10843 Dave * performance information. By default, the code is generated, but 187 10843 Dave * automatic saving of the information is disabled. If dump_metrics_on 188 10843 Dave * is set to 1, the timing information is passed to savecore via the 189 10843 Dave * crash file, where it is appended to the file dump-dir/METRICS.txt. 190 10843 Dave */ 191 10843 Dave #define COLLECT_METRICS 192 10843 Dave 193 10843 Dave #ifdef COLLECT_METRICS 194 10843 Dave uint_t dump_metrics_on = 0; /* set to 1 to enable recording metrics */ 195 10843 Dave 196 10843 Dave #define HRSTART(v, m) v##ts.m = gethrtime() 197 10843 Dave #define HRSTOP(v, m) v.m += gethrtime() - v##ts.m 198 10843 Dave #define HRBEGIN(v, m, s) v##ts.m = gethrtime(); v.size += s 199 10843 Dave #define HREND(v, m) v.m += gethrtime() - v##ts.m 200 10843 Dave #define HRNORM(v, m, n) v.m /= (n) 201 10843 Dave 202 10843 Dave #else 203 10843 Dave #define HRSTART(v, m) 204 10843 Dave #define HRSTOP(v, m) 205 10843 Dave #define HRBEGIN(v, m, s) 206 10843 Dave #define HREND(v, m) 207 10843 Dave #define HRNORM(v, m, n) 208 10843 Dave #endif /* COLLECT_METRICS */ 209 10843 Dave 210 10843 Dave /* 211 10843 Dave * Buffers for copying and compressing memory pages. 212 10843 Dave * 213 10843 Dave * cbuf_t buffer controllers: used for both input and output. 214 10843 Dave * 215 10843 Dave * The buffer state indicates how it is being used: 216 10843 Dave * 217 10843 Dave * CBUF_FREEMAP: CBUF_MAPSIZE virtual address range is available for 218 10843 Dave * mapping input pages. 219 10843 Dave * 220 10843 Dave * CBUF_INREADY: input pages are mapped and ready for compression by a 221 10843 Dave * helper. 222 10843 Dave * 223 10843 Dave * CBUF_USEDMAP: mapping has been consumed by a helper. Needs unmap. 224 10843 Dave * 225 10843 Dave * CBUF_FREEBUF: CBUF_SIZE output buffer, which is available. 226 10843 Dave * 227 10843 Dave * CBUF_WRITE: CBUF_SIZE block of compressed pages from a helper, 228 10843 Dave * ready to write out. 229 10843 Dave * 230 10843 Dave * CBUF_ERRMSG: CBUF_SIZE block of error messages from a helper 231 10843 Dave * (reports UE errors.) 232 10843 Dave */ 233 10843 Dave 234 10843 Dave typedef enum cbufstate { 235 10843 Dave CBUF_FREEMAP, 236 10843 Dave CBUF_INREADY, 237 10843 Dave CBUF_USEDMAP, 238 10843 Dave CBUF_FREEBUF, 239 10843 Dave CBUF_WRITE, 240 10843 Dave CBUF_ERRMSG 241 10843 Dave } cbufstate_t; 242 10843 Dave 243 10843 Dave typedef struct cbuf cbuf_t; 244 10843 Dave 245 10843 Dave struct cbuf { 246 10843 Dave cbuf_t *next; /* next in list */ 247 10843 Dave cbufstate_t state; /* processing state */ 248 10843 Dave size_t used; /* amount used */ 249 10843 Dave size_t size; /* mem size */ 250 10843 Dave char *buf; /* kmem or vmem */ 251 10843 Dave pgcnt_t pagenum; /* index to pfn map */ 252 10843 Dave pgcnt_t bitnum; /* first set bitnum */ 253 10843 Dave pfn_t pfn; /* first pfn in mapped range */ 254 10843 Dave int off; /* byte offset to first pfn */ 255 10843 Dave }; 256 10843 Dave 257 10843 Dave /* 258 10843 Dave * cqueue_t queues: a uni-directional channel for communication 259 10843 Dave * from the master to helper tasks or vice-versa using put and 260 10843 Dave * get primitives. Both mappings and data buffers are passed via 261 10843 Dave * queues. Producers close a queue when done. The number of 262 10843 Dave * active producers is reference counted so the consumer can 263 10843 Dave * detect end of data. Concurrent access is mediated by atomic 264 10843 Dave * operations for panic dump, or mutex/cv for live dump. 265 10843 Dave * 266 10843 Dave * There a four queues, used as follows: 267 10843 Dave * 268 10843 Dave * Queue Dataflow NewState 269 10843 Dave * -------------------------------------------------- 270 10843 Dave * mainq master -> master FREEMAP 271 10843 Dave * master has initialized or unmapped an input buffer 272 10843 Dave * -------------------------------------------------- 273 10843 Dave * helperq master -> helper INREADY 274 10843 Dave * master has mapped input for use by helper 275 10843 Dave * -------------------------------------------------- 276 10843 Dave * mainq master <- helper USEDMAP 277 10843 Dave * helper is done with input 278 10843 Dave * -------------------------------------------------- 279 10843 Dave * freebufq master -> helper FREEBUF 280 10843 Dave * master has initialized or written an output buffer 281 10843 Dave * -------------------------------------------------- 282 10843 Dave * mainq master <- helper WRITE 283 10843 Dave * block of compressed pages from a helper 284 10843 Dave * -------------------------------------------------- 285 10843 Dave * mainq master <- helper ERRMSG 286 10843 Dave * error messages from a helper (memory error case) 287 10843 Dave * -------------------------------------------------- 288 10843 Dave * writerq master <- master WRITE 289 10843 Dave * non-blocking queue of blocks to write 290 10843 Dave * -------------------------------------------------- 291 10843 Dave */ 292 10843 Dave typedef struct cqueue { 293 10843 Dave cbuf_t *volatile first; /* first in list */ 294 10843 Dave cbuf_t *last; /* last in list */ 295 10843 Dave hrtime_t ts; /* timestamp */ 296 10843 Dave hrtime_t empty; /* total time empty */ 297 10843 Dave kmutex_t mutex; /* live state lock */ 298 10843 Dave kcondvar_t cv; /* live wait var */ 299 10843 Dave lock_t spinlock; /* panic mode spin lock */ 300 10843 Dave volatile uint_t open; /* producer ref count */ 301 10843 Dave } cqueue_t; 302 10843 Dave 303 10843 Dave /* 304 10843 Dave * Convenience macros for using the cqueue functions 305 10843 Dave * Note that the caller must have defined "dumpsync_t *ds" 306 10843 Dave */ 307 10843 Dave #define CQ_IS_EMPTY(q) \ 308 10843 Dave (ds->q.first == NULL) 309 10843 Dave 310 10843 Dave #define CQ_OPEN(q) \ 311 10843 Dave atomic_inc_uint(&ds->q.open) 312 10843 Dave 313 10843 Dave #define CQ_CLOSE(q) \ 314 10843 Dave dumpsys_close_cq(&ds->q, ds->live) 315 10843 Dave 316 10843 Dave #define CQ_PUT(q, cp, st) \ 317 10843 Dave dumpsys_put_cq(&ds->q, cp, st, ds->live) 318 10843 Dave 319 10843 Dave #define CQ_GET(q) \ 320 10843 Dave dumpsys_get_cq(&ds->q, ds->live) 321 10843 Dave 322 10843 Dave /* 323 10843 Dave * Dynamic state when dumpsys() is running. 324 10843 Dave */ 325 10843 Dave typedef struct dumpsync { 326 10843 Dave pgcnt_t npages; /* subtotal of pages dumped */ 327 10843 Dave pgcnt_t pages_mapped; /* subtotal of pages mapped */ 328 10843 Dave pgcnt_t pages_used; /* subtotal of pages used per map */ 329 10843 Dave size_t nwrite; /* subtotal of bytes written */ 330 10843 Dave uint_t live; /* running live dump */ 331 10843 Dave uint_t neednl; /* will need to print a newline */ 332 10843 Dave uint_t percent; /* dump progress */ 333 10843 Dave uint_t percent_done; /* dump progress reported */ 334 10843 Dave cqueue_t freebufq; /* free kmem bufs for writing */ 335 10843 Dave cqueue_t mainq; /* input for main task */ 336 10843 Dave cqueue_t helperq; /* input for helpers */ 337 10843 Dave cqueue_t writerq; /* input for writer */ 338 10843 Dave hrtime_t start; /* start time */ 339 10843 Dave hrtime_t elapsed; /* elapsed time when completed */ 340 10843 Dave hrtime_t iotime; /* time spent writing nwrite bytes */ 341 10843 Dave hrtime_t iowait; /* time spent waiting for output */ 342 10843 Dave hrtime_t iowaitts; /* iowait timestamp */ 343 10843 Dave perpage_t perpage; /* metrics */ 344 10843 Dave perpage_t perpagets; 345 10843 Dave int dumpcpu; /* master cpu */ 346 10843 Dave } dumpsync_t; 347 10843 Dave 348 10843 Dave static dumpsync_t dumpsync; /* synchronization vars */ 349 10843 Dave 350 10843 Dave /* 351 10843 Dave * helper_t helpers: contains the context for a stream. CPUs run in 352 10843 Dave * parallel at dump time; each CPU creates a single stream of 353 10843 Dave * compression data. Stream data is divided into CBUF_SIZE blocks. 354 10843 Dave * The blocks are written in order within a stream. But, blocks from 355 10843 Dave * multiple streams can be interleaved. Each stream is identified by a 356 10843 Dave * unique tag. 357 10843 Dave */ 358 10843 Dave typedef struct helper { 359 10843 Dave int helper; /* bound helper id */ 360 10843 Dave int tag; /* compression stream tag */ 361 10843 Dave perpage_t perpage; /* per page metrics */ 362 10843 Dave perpage_t perpagets; /* per page metrics (timestamps) */ 363 10843 Dave taskqid_t taskqid; /* live dump task ptr */ 364 10843 Dave int in, out; /* buffer offsets */ 365 10843 Dave cbuf_t *cpin, *cpout, *cperr; /* cbuf objects in process */ 366 10843 Dave dumpsync_t *ds; /* pointer to sync vars */ 367 10843 Dave size_t used; /* counts input consumed */ 368 10843 Dave char *page; /* buffer for page copy */ 369 10843 Dave char *lzbuf; /* lzjb output */ 370 10843 Dave bz_stream bzstream; /* bzip2 state */ 371 10843 Dave } helper_t; 372 10843 Dave 373 10843 Dave #define MAINHELPER (-1) /* helper is also the main task */ 374 10843 Dave #define FREEHELPER (-2) /* unbound helper */ 375 10843 Dave #define DONEHELPER (-3) /* helper finished */ 376 10843 Dave 377 10843 Dave /* 378 10843 Dave * configuration vars for dumpsys 379 10843 Dave */ 380 10843 Dave typedef struct dumpcfg { 381 10843 Dave int threshold; /* ncpu threshold for bzip2 */ 382 10843 Dave int nhelper; /* number of helpers */ 383 10843 Dave int nhelper_used; /* actual number of helpers used */ 384 10843 Dave int ncmap; /* number VA pages for compression */ 385 10843 Dave int ncbuf; /* number of bufs for compression */ 386 10843 Dave int ncbuf_used; /* number of bufs in use */ 387 10843 Dave uint_t clevel; /* dump compression level */ 388 10843 Dave helper_t *helper; /* array of helpers */ 389 10843 Dave cbuf_t *cmap; /* array of input (map) buffers */ 390 10843 Dave cbuf_t *cbuf; /* array of output buffers */ 391 10843 Dave ulong_t *helpermap; /* set of dumpsys helper CPU ids */ 392 10843 Dave ulong_t *bitmap; /* bitmap for marking pages to dump */ 393 10843 Dave ulong_t *rbitmap; /* bitmap for used CBUF_MAPSIZE ranges */ 394 10843 Dave pgcnt_t bitmapsize; /* size of bitmap */ 395 10843 Dave pgcnt_t rbitmapsize; /* size of bitmap for ranges */ 396 10843 Dave pgcnt_t found4m; /* number ranges allocated by dump */ 397 10843 Dave pgcnt_t foundsm; /* number small pages allocated by dump */ 398 10843 Dave pid_t *pids; /* list of process IDs at dump time */ 399 10843 Dave size_t maxsize; /* memory size needed at dump time */ 400 10843 Dave size_t maxvmsize; /* size of reserved VM */ 401 10843 Dave char *maxvm; /* reserved VM for spare pages */ 402 10843 Dave lock_t helper_lock; /* protect helper state */ 403 10843 Dave char helpers_wanted; /* flag to enable parallelism */ 404 10843 Dave } dumpcfg_t; 405 10843 Dave 406 10843 Dave static dumpcfg_t dumpcfg; /* config vars */ 407 10843 Dave 408 10843 Dave /* 409 10843 Dave * The dump I/O buffer. 410 10843 Dave * 411 10843 Dave * There is one I/O buffer used by dumpvp_write and dumvp_flush. It is 412 10843 Dave * sized according to the optimum device transfer speed. 413 10843 Dave */ 414 10843 Dave typedef struct dumpbuf { 415 10843 Dave vnode_t *cdev_vp; /* VCHR open of the dump device */ 416 10843 Dave len_t vp_limit; /* maximum write offset */ 417 10843 Dave offset_t vp_off; /* current dump device offset */ 418 10843 Dave char *cur; /* dump write pointer */ 419 10843 Dave char *start; /* dump buffer address */ 420 10843 Dave char *end; /* dump buffer end */ 421 10843 Dave size_t size; /* size of dumpbuf in bytes */ 422 10843 Dave size_t iosize; /* best transfer size for device */ 423 10843 Dave } dumpbuf_t; 424 10843 Dave 425 10843 Dave dumpbuf_t dumpbuf; /* I/O buffer */ 426 10843 Dave 427 10843 Dave /* 428 10843 Dave * The dump I/O buffer must be at least one page, at most xfer_size 429 10843 Dave * bytes, and should scale with physmem in between. The transfer size 430 10843 Dave * passed in will either represent a global default (maxphys) or the 431 10843 Dave * best size for the device. The size of the dumpbuf I/O buffer is 432 10843 Dave * limited by dumpbuf_limit (8MB by default) because the dump 433 10843 Dave * performance saturates beyond a certain size. The default is to 434 10843 Dave * select 1/4096 of the memory. 435 10843 Dave */ 436 10843 Dave static int dumpbuf_fraction = 12; /* memory size scale factor */ 437 10843 Dave static size_t dumpbuf_limit = 8 * DUMP_1MB; /* max I/O buf size */ 438 10843 Dave 439 0 stevel static size_t 440 0 stevel dumpbuf_iosize(size_t xfer_size) 441 0 stevel { 442 10843 Dave size_t iosize = ptob(physmem >> dumpbuf_fraction); 443 0 stevel 444 10843 Dave if (iosize < PAGESIZE) 445 10843 Dave iosize = PAGESIZE; 446 10843 Dave else if (iosize > xfer_size) 447 10843 Dave iosize = xfer_size; 448 10843 Dave if (iosize > dumpbuf_limit) 449 10843 Dave iosize = dumpbuf_limit; 450 10843 Dave return (iosize & PAGEMASK); 451 0 stevel } 452 0 stevel 453 10843 Dave /* 454 10843 Dave * resize the I/O buffer 455 10843 Dave */ 456 0 stevel static void 457 0 stevel dumpbuf_resize(void) 458 0 stevel { 459 10843 Dave char *old_buf = dumpbuf.start; 460 10843 Dave size_t old_size = dumpbuf.size; 461 0 stevel char *new_buf; 462 0 stevel size_t new_size; 463 0 stevel 464 0 stevel ASSERT(MUTEX_HELD(&dump_lock)); 465 0 stevel 466 10843 Dave new_size = dumpbuf_iosize(MAX(dumpbuf.iosize, maxphys)); 467 10843 Dave if (new_size <= old_size) 468 0 stevel return; /* no need to reallocate buffer */ 469 0 stevel 470 0 stevel new_buf = kmem_alloc(new_size, KM_SLEEP); 471 10843 Dave dumpbuf.size = new_size; 472 10843 Dave dumpbuf.start = new_buf; 473 10843 Dave dumpbuf.end = new_buf + new_size; 474 0 stevel kmem_free(old_buf, old_size); 475 10843 Dave } 476 10843 Dave 477 10843 Dave /* 478 10843 Dave * dump_update_clevel is called when dumpadm configures the dump device. 479 10843 Dave * Calculate number of helpers and buffers. 480 10843 Dave * Allocate the minimum configuration for now. 481 10843 Dave * 482 10843 Dave * When the dump file is configured we reserve a minimum amount of 483 10843 Dave * memory for use at crash time. But we reserve VA for all the memory 484 10843 Dave * we really want in order to do the fastest dump possible. The VA is 485 10843 Dave * backed by pages not being dumped, according to the bitmap. If 486 10843 Dave * there is insufficient spare memory, however, we fall back to the 487 10843 Dave * minimum. 488 10843 Dave * 489 10843 Dave * Live dump (savecore -L) always uses the minimum config. 490 10843 Dave * 491 10843 Dave * clevel 0 is single threaded lzjb 492 10843 Dave * clevel 1 is parallel lzjb 493 10843 Dave * clevel 2 is parallel bzip2 494 10843 Dave * 495 10843 Dave * The ncpu threshold is selected with dump_plat_mincpu. 496 10843 Dave * On OPL, set_platform_defaults() overrides the sun4u setting. 497 10843 Dave * The actual values are defined via DUMP_PLAT_*_MINCPU macros. 498 10843 Dave * 499 10843 Dave * Architecture Threshold Algorithm 500 10843 Dave * sun4u < 51 parallel lzjb 501 10843 Dave * sun4u >= 51 parallel bzip2(*) 502 10843 Dave * sun4u OPL < 8 parallel lzjb 503 10843 Dave * sun4u OPL >= 8 parallel bzip2(*) 504 10843 Dave * sun4v < 128 parallel lzjb 505 10843 Dave * sun4v >= 128 parallel bzip2(*) 506 10843 Dave * x86 < 11 parallel lzjb 507 10843 Dave * x86 >= 11 parallel bzip2(*) 508 10843 Dave * 32-bit N/A single-threaded lzjb 509 10843 Dave * 510 10843 Dave * (*) bzip2 is only chosen if there is sufficient available 511 10843 Dave * memory for buffers at dump time. See dumpsys_get_maxmem(). 512 10843 Dave * 513 10843 Dave * Faster dump devices have larger I/O buffers. The threshold value is 514 10843 Dave * increased according to the size of the dump I/O buffer, because 515 10843 Dave * parallel lzjb performs better with faster disks. For buffers >= 1MB 516 10843 Dave * the threshold is 3X; for buffers >= 256K threshold is 2X. 517 10843 Dave * 518 10843 Dave * For parallel dumps, the number of helpers is ncpu-1. The CPU 519 10843 Dave * running panic runs the main task. For single-threaded dumps, the 520 10843 Dave * panic CPU does lzjb compression (it is tagged as MAINHELPER.) 521 10843 Dave * 522 10843 Dave * Need multiple buffers per helper so that they do not block waiting 523 10843 Dave * for the main task. 524 10843 Dave * parallel single-threaded 525 10843 Dave * Number of output buffers: nhelper*2 1 526 10843 Dave * Number of mapping buffers: nhelper*4 1 527 10843 Dave * 528 10843 Dave */ 529 10843 Dave static void 530 10843 Dave dump_update_clevel() 531 10843 Dave { 532 10843 Dave int tag; 533 10843 Dave size_t bz2size; 534 10843 Dave helper_t *hp, *hpend; 535 10843 Dave cbuf_t *cp, *cpend; 536 10843 Dave dumpcfg_t *old = &dumpcfg; 537 10843 Dave dumpcfg_t newcfg = *old; 538 10843 Dave dumpcfg_t *new = &newcfg; 539 10843 Dave 540 10843 Dave ASSERT(MUTEX_HELD(&dump_lock)); 541 10843 Dave 542 10843 Dave /* 543 10843 Dave * Free the previously allocated bufs and VM. 544 10843 Dave */ 545 10843 Dave if (old->helper != NULL) { 546 10843 Dave 547 10843 Dave /* helpers */ 548 10843 Dave hpend = &old->helper[old->nhelper]; 549 10843 Dave for (hp = old->helper; hp != hpend; hp++) { 550 10843 Dave if (hp->lzbuf != NULL) 551 10843 Dave kmem_free(hp->lzbuf, PAGESIZE); 552 10843 Dave if (hp->page != NULL) 553 10843 Dave kmem_free(hp->page, PAGESIZE); 554 10843 Dave } 555 10843 Dave kmem_free(old->helper, old->nhelper * sizeof (helper_t)); 556 10843 Dave 557 10843 Dave /* VM space for mapping pages */ 558 10843 Dave cpend = &old->cmap[old->ncmap]; 559 10843 Dave for (cp = old->cmap; cp != cpend; cp++) 560 10843 Dave vmem_xfree(heap_arena, cp->buf, CBUF_MAPSIZE); 561 10843 Dave kmem_free(old->cmap, old->ncmap * sizeof (cbuf_t)); 562 10843 Dave 563 10843 Dave /* output bufs */ 564 10843 Dave cpend = &old->cbuf[old->ncbuf]; 565 10843 Dave for (cp = old->cbuf; cp != cpend; cp++) 566 10843 Dave if (cp->buf != NULL) 567 10843 Dave kmem_free(cp->buf, cp->size); 568 10843 Dave kmem_free(old->cbuf, old->ncbuf * sizeof (cbuf_t)); 569 10843 Dave 570 10843 Dave /* reserved VM for dumpsys_get_maxmem */ 571 10843 Dave if (old->maxvmsize > 0) 572 10843 Dave vmem_xfree(heap_arena, old->maxvm, old->maxvmsize); 573 10843 Dave } 574 10843 Dave 575 10843 Dave /* 576 10843 Dave * Allocate memory and VM. 577 10843 Dave * One CPU runs dumpsys, the rest are helpers. 578 10843 Dave */ 579 10843 Dave new->nhelper = ncpus - 1; 580 10843 Dave if (new->nhelper < 1) 581 10843 Dave new->nhelper = 1; 582 10843 Dave 583 10843 Dave if (new->nhelper > DUMP_MAX_NHELPER) 584 10843 Dave new->nhelper = DUMP_MAX_NHELPER; 585 10843 Dave 586 10843 Dave /* increase threshold for faster disks */ 587 10843 Dave new->threshold = dump_plat_mincpu; 588 10843 Dave if (dumpbuf.iosize >= DUMP_1MB) 589 10843 Dave new->threshold *= 3; 590 10843 Dave else if (dumpbuf.iosize >= (256 * DUMP_1KB)) 591 10843 Dave new->threshold *= 2; 592 10843 Dave 593 10843 Dave /* figure compression level based upon the computed threshold. */ 594 10843 Dave if (dump_plat_mincpu == 0 || new->nhelper < 2) { 595 10843 Dave new->clevel = 0; 596 10843 Dave new->nhelper = 1; 597 10843 Dave } else if ((new->nhelper + 1) >= new->threshold) { 598 10843 Dave new->clevel = DUMP_CLEVEL_BZIP2; 599 10843 Dave } else { 600 10843 Dave new->clevel = DUMP_CLEVEL_LZJB; 601 10843 Dave } 602 10843 Dave 603 10843 Dave if (new->clevel == 0) { 604 10843 Dave new->ncbuf = 1; 605 10843 Dave new->ncmap = 1; 606 10843 Dave } else { 607 10843 Dave new->ncbuf = NCBUF_PER_HELPER * new->nhelper; 608 10843 Dave new->ncmap = NCMAP_PER_HELPER * new->nhelper; 609 10843 Dave } 610 10843 Dave 611 10843 Dave /* 612 10843 Dave * Allocate new data structures and buffers for MINHELPERS, 613 10843 Dave * and also figure the max desired size. 614 10843 Dave */ 615 10843 Dave bz2size = BZ2_bzCompressInitSize(dump_bzip2_level); 616 10843 Dave new->maxsize = 0; 617 10843 Dave new->maxvmsize = 0; 618 10843 Dave new->maxvm = NULL; 619 10843 Dave tag = 1; 620 10843 Dave new->helper = kmem_zalloc(new->nhelper * sizeof (helper_t), KM_SLEEP); 621 10843 Dave hpend = &new->helper[new->nhelper]; 622 10843 Dave for (hp = new->helper; hp != hpend; hp++) { 623 10843 Dave hp->tag = tag++; 624 10843 Dave if (hp < &new->helper[MINHELPERS]) { 625 10843 Dave hp->lzbuf = kmem_alloc(PAGESIZE, KM_SLEEP); 626 10843 Dave hp->page = kmem_alloc(PAGESIZE, KM_SLEEP); 627 10843 Dave } else if (new->clevel < DUMP_CLEVEL_BZIP2) { 628 10843 Dave new->maxsize += 2 * PAGESIZE; 629 10843 Dave } else { 630 10843 Dave new->maxsize += PAGESIZE; 631 10843 Dave } 632 10843 Dave if (new->clevel >= DUMP_CLEVEL_BZIP2) 633 10843 Dave new->maxsize += bz2size; 634 10843 Dave } 635 10843 Dave 636 10843 Dave new->cbuf = kmem_zalloc(new->ncbuf * sizeof (cbuf_t), KM_SLEEP); 637 10843 Dave cpend = &new->cbuf[new->ncbuf]; 638 10843 Dave for (cp = new->cbuf; cp != cpend; cp++) { 639 10843 Dave cp->state = CBUF_FREEBUF; 640 10843 Dave cp->size = CBUF_SIZE; 641 10843 Dave if (cp < &new->cbuf[MINCBUFS]) 642 10843 Dave cp->buf = kmem_alloc(cp->size, KM_SLEEP); 643 10843 Dave else 644 10843 Dave new->maxsize += cp->size; 645 10843 Dave } 646 10843 Dave 647 10843 Dave new->cmap = kmem_zalloc(new->ncmap * sizeof (cbuf_t), KM_SLEEP); 648 10843 Dave cpend = &new->cmap[new->ncmap]; 649 10843 Dave for (cp = new->cmap; cp != cpend; cp++) { 650 10843 Dave cp->state = CBUF_FREEMAP; 651 10843 Dave cp->size = CBUF_MAPSIZE; 652 10843 Dave cp->buf = vmem_xalloc(heap_arena, CBUF_MAPSIZE, CBUF_MAPSIZE, 653 10843 Dave 0, 0, NULL, NULL, VM_SLEEP); 654 10843 Dave } 655 10843 Dave 656 10843 Dave /* reserve VA to be backed with spare pages at crash time */ 657 10843 Dave if (new->maxsize > 0) { 658 10843 Dave new->maxsize = P2ROUNDUP(new->maxsize, PAGESIZE); 659 10843 Dave new->maxvmsize = P2ROUNDUP(new->maxsize, CBUF_MAPSIZE); 660 10843 Dave new->maxvm = vmem_xalloc(heap_arena, new->maxvmsize, 661 10843 Dave CBUF_MAPSIZE, 0, 0, NULL, NULL, VM_SLEEP); 662 10843 Dave } 663 11178 Dave 664 11178 Dave /* 665 11178 Dave * Reserve memory for kmem allocation calls made during crash 666 11178 Dave * dump. The hat layer allocates memory for each mapping 667 11178 Dave * created, and the I/O path allocates buffers and data structs. 668 11178 Dave * Add a few pages for safety. 669 11178 Dave */ 670 11178 Dave kmem_dump_init((new->ncmap * dump_kmem_permap) + 671 11178 Dave (dump_kmem_pages * PAGESIZE)); 672 10843 Dave 673 10843 Dave /* set new config pointers */ 674 10843 Dave *old = *new; 675 10843 Dave } 676 10843 Dave 677 10843 Dave /* 678 10843 Dave * Define a struct memlist walker to optimize bitnum to pfn 679 10843 Dave * lookup. The walker maintains the state of the list traversal. 680 10843 Dave */ 681 10843 Dave typedef struct dumpmlw { 682 10843 Dave struct memlist *mp; /* current memlist */ 683 10843 Dave pgcnt_t basenum; /* bitnum base offset */ 684 10843 Dave pgcnt_t mppages; /* current memlist size */ 685 10843 Dave pgcnt_t mpleft; /* size to end of current memlist */ 686 10843 Dave pfn_t mpaddr; /* first pfn in memlist */ 687 10843 Dave } dumpmlw_t; 688 10843 Dave 689 10843 Dave /* initialize the walker */ 690 10843 Dave static inline void 691 10843 Dave dump_init_memlist_walker(dumpmlw_t *pw) 692 10843 Dave { 693 10843 Dave pw->mp = phys_install; 694 10843 Dave pw->basenum = 0; 695 10843 Dave pw->mppages = pw->mp->size >> PAGESHIFT; 696 10843 Dave pw->mpleft = pw->mppages; 697 10843 Dave pw->mpaddr = pw->mp->address >> PAGESHIFT; 698 10843 Dave } 699 10843 Dave 700 10843 Dave /* 701 10843 Dave * Lookup pfn given bitnum. The memlist can be quite long on some 702 10843 Dave * systems (e.g.: one per board). To optimize sequential lookups, the 703 10843 Dave * caller initializes and presents a memlist walker. 704 10843 Dave */ 705 10843 Dave static pfn_t 706 10843 Dave dump_bitnum_to_pfn(pgcnt_t bitnum, dumpmlw_t *pw) 707 10843 Dave { 708 10843 Dave bitnum -= pw->basenum; 709 10843 Dave while (pw->mp != NULL) { 710 10843 Dave if (bitnum < pw->mppages) { 711 10843 Dave pw->mpleft = pw->mppages - bitnum; 712 10843 Dave return (pw->mpaddr + bitnum); 713 10843 Dave } 714 10843 Dave bitnum -= pw->mppages; 715 10843 Dave pw->basenum += pw->mppages; 716 10843 Dave pw->mp = pw->mp->next; 717 10843 Dave if (pw->mp != NULL) { 718 10843 Dave pw->mppages = pw->mp->size >> PAGESHIFT; 719 10843 Dave pw->mpleft = pw->mppages; 720 10843 Dave pw->mpaddr = pw->mp->address >> PAGESHIFT; 721 10843 Dave } 722 10843 Dave } 723 10843 Dave return (PFN_INVALID); 724 10843 Dave } 725 10843 Dave 726 10843 Dave static pgcnt_t 727 10843 Dave dump_pfn_to_bitnum(pfn_t pfn) 728 10843 Dave { 729 10843 Dave struct memlist *mp; 730 10843 Dave pgcnt_t bitnum = 0; 731 10843 Dave 732 10843 Dave for (mp = phys_install; mp != NULL; mp = mp->next) { 733 10843 Dave if (pfn >= (mp->address >> PAGESHIFT) && 734 10843 Dave pfn < ((mp->address + mp->size) >> PAGESHIFT)) 735 10843 Dave return (bitnum + pfn - (mp->address >> PAGESHIFT)); 736 10843 Dave bitnum += mp->size >> PAGESHIFT; 737 10843 Dave } 738 10843 Dave return ((pgcnt_t)-1); 739 10843 Dave } 740 10843 Dave 741 10843 Dave /* 742 10843 Dave * Set/test bitmap for a CBUF_MAPSIZE range which includes pfn. The 743 10843 Dave * mapping of pfn to range index is imperfect because pfn and bitnum 744 10843 Dave * do not have the same phase. To make sure a CBUF_MAPSIZE range is 745 10843 Dave * covered, call this for both ends: 746 10843 Dave * dump_set_used(base) 747 10843 Dave * dump_set_used(base+CBUF_MAPNP-1) 748 10843 Dave * 749 10843 Dave * This is used during a panic dump to mark pages allocated by 750 10843 Dave * dumpsys_get_maxmem(). The macro IS_DUMP_PAGE(pp) is used by 751 10843 Dave * page_get_mnode_freelist() to make sure pages used by dump are never 752 10843 Dave * allocated. 753 10843 Dave */ 754 10843 Dave #define CBUF_MAPP2R(pfn) ((pfn) >> (CBUF_MAPSHIFT - PAGESHIFT)) 755 10843 Dave 756 10843 Dave static void 757 10843 Dave dump_set_used(pfn_t pfn) 758 10843 Dave { 759 10843 Dave 760 10843 Dave pgcnt_t bitnum, rbitnum; 761 10843 Dave 762 10843 Dave bitnum = dump_pfn_to_bitnum(pfn); 763 10843 Dave ASSERT(bitnum != (pgcnt_t)-1); 764 10843 Dave 765 10843 Dave rbitnum = CBUF_MAPP2R(bitnum); 766 10843 Dave ASSERT(rbitnum < dumpcfg.rbitmapsize); 767 10843 Dave 768 10843 Dave BT_SET(dumpcfg.rbitmap, rbitnum); 769 10843 Dave } 770 10843 Dave 771 10843 Dave int 772 10843 Dave dump_test_used(pfn_t pfn) 773 10843 Dave { 774 10843 Dave pgcnt_t bitnum, rbitnum; 775 10843 Dave 776 10843 Dave bitnum = dump_pfn_to_bitnum(pfn); 777 10843 Dave ASSERT(bitnum != (pgcnt_t)-1); 778 10843 Dave 779 10843 Dave rbitnum = CBUF_MAPP2R(bitnum); 780 10843 Dave ASSERT(rbitnum < dumpcfg.rbitmapsize); 781 10843 Dave 782 10843 Dave return (BT_TEST(dumpcfg.rbitmap, rbitnum)); 783 10843 Dave } 784 10843 Dave 785 10843 Dave /* 786 10843 Dave * dumpbzalloc and dumpbzfree are callbacks from the bzip2 library. 787 10843 Dave * dumpsys_get_maxmem() uses them for BZ2_bzCompressInit(). 788 10843 Dave */ 789 10843 Dave static void * 790 10843 Dave dumpbzalloc(void *opaque, int items, int size) 791 10843 Dave { 792 10843 Dave size_t *sz; 793 10843 Dave char *ret; 794 10843 Dave 795 10843 Dave ASSERT(opaque != NULL); 796 10843 Dave sz = opaque; 797 10843 Dave ret = dumpcfg.maxvm + *sz; 798 10843 Dave *sz += items * size; 799 10843 Dave *sz = P2ROUNDUP(*sz, BZ2_BZALLOC_ALIGN); 800 10843 Dave ASSERT(*sz <= dumpcfg.maxvmsize); 801 10843 Dave return (ret); 802 10843 Dave } 803 10843 Dave 804 10843 Dave /*ARGSUSED*/ 805 10843 Dave static void 806 10843 Dave dumpbzfree(void *opaque, void *addr) 807 10843 Dave { 808 10843 Dave } 809 10843 Dave 810 10843 Dave /* 811 10843 Dave * Perform additional checks on the page to see if we can really use 812 10843 Dave * it. The kernel (kas) pages are always set in the bitmap. However, 813 10843 Dave * boot memory pages (prom_ppages or P_BOOTPAGES) are not in the 814 10843 Dave * bitmap. So we check for them. 815 10843 Dave */ 816 10843 Dave static inline int 817 10843 Dave dump_pfn_check(pfn_t pfn) 818 10843 Dave { 819 10843 Dave page_t *pp = page_numtopp_nolock(pfn); 820 10843 Dave if (pp == NULL || pp->p_pagenum != pfn || 821 10843 Dave #if defined(__sparc) 822 11185 Sean pp->p_vnode == &promvp || 823 10843 Dave #else 824 10843 Dave PP_ISBOOTPAGES(pp) || 825 10843 Dave #endif 826 10843 Dave pp->p_toxic != 0) 827 10843 Dave return (0); 828 10843 Dave return (1); 829 10843 Dave } 830 10843 Dave 831 10843 Dave /* 832 10843 Dave * Check a range to see if all contained pages are available and 833 10843 Dave * return non-zero if the range can be used. 834 10843 Dave */ 835 10843 Dave static inline int 836 10843 Dave dump_range_check(pgcnt_t start, pgcnt_t end, pfn_t pfn) 837 10843 Dave { 838 10843 Dave for (; start < end; start++, pfn++) { 839 10843 Dave if (BT_TEST(dumpcfg.bitmap, start)) 840 10843 Dave return (0); 841 10843 Dave if (!dump_pfn_check(pfn)) 842 10843 Dave return (0); 843 10843 Dave } 844 10843 Dave return (1); 845 10843 Dave } 846 10843 Dave 847 10843 Dave /* 848 10843 Dave * dumpsys_get_maxmem() is called during panic. Find unused ranges 849 10843 Dave * and use them for buffers. If we find enough memory switch to 850 10843 Dave * parallel bzip2, otherwise use parallel lzjb. 851 10843 Dave * 852 10843 Dave * It searches the dump bitmap in 2 passes. The first time it looks 853 10843 Dave * for CBUF_MAPSIZE ranges. On the second pass it uses small pages. 854 10843 Dave */ 855 10843 Dave static void 856 10843 Dave dumpsys_get_maxmem() 857 10843 Dave { 858 10843 Dave dumpcfg_t *cfg = &dumpcfg; 859 10843 Dave cbuf_t *endcp = &cfg->cbuf[cfg->ncbuf]; 860 10843 Dave helper_t *endhp = &cfg->helper[cfg->nhelper]; 861 10843 Dave pgcnt_t bitnum, end; 862 10843 Dave size_t sz, endsz, bz2size; 863 10843 Dave pfn_t pfn, off; 864 10843 Dave cbuf_t *cp; 865 10843 Dave helper_t *hp, *ohp; 866 10843 Dave dumpmlw_t mlw; 867 10843 Dave int k; 868 10843 Dave 869 10843 Dave if (cfg->maxsize == 0 || cfg->clevel < DUMP_CLEVEL_LZJB || 870 10843 Dave (dump_conflags & DUMP_ALL) != 0) 871 10843 Dave return; 872 10843 Dave 873 10843 Dave sz = 0; 874 10843 Dave cfg->found4m = 0; 875 10843 Dave cfg->foundsm = 0; 876 10843 Dave 877 10843 Dave /* bitmap of ranges used to estimate which pfns are being used */ 878 10843 Dave bzero(dumpcfg.rbitmap, BT_SIZEOFMAP(dumpcfg.rbitmapsize)); 879 10843 Dave 880 10843 Dave /* find ranges that are not being dumped to use for buffers */ 881 10843 Dave dump_init_memlist_walker(&mlw); 882 10843 Dave for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum = end) { 883 10843 Dave dump_timeleft = dump_timeout; 884 10843 Dave end = bitnum + CBUF_MAPNP; 885 10843 Dave pfn = dump_bitnum_to_pfn(bitnum, &mlw); 886 10843 Dave ASSERT(pfn != PFN_INVALID); 887 10843 Dave 888 10843 Dave /* skip partial range at end of mem segment */ 889 10843 Dave if (mlw.mpleft < CBUF_MAPNP) { 890 10843 Dave end = bitnum + mlw.mpleft; 891 10843 Dave continue; 892 10843 Dave } 893 10843 Dave 894 10843 Dave /* skip non aligned pages */ 895 10843 Dave off = P2PHASE(pfn, CBUF_MAPNP); 896 10843 Dave if (off != 0) { 897 10843 Dave end -= off; 898 10843 Dave continue; 899 10843 Dave } 900 10843 Dave 901 10843 Dave if (!dump_range_check(bitnum, end, pfn)) 902 10843 Dave continue; 903 10843 Dave 904 10843 Dave ASSERT((sz + CBUF_MAPSIZE) <= cfg->maxvmsize); 905 10843 Dave hat_devload(kas.a_hat, cfg->maxvm + sz, CBUF_MAPSIZE, pfn, 906 10843 Dave PROT_READ | PROT_WRITE, HAT_LOAD_NOCONSIST); 907 10843 Dave sz += CBUF_MAPSIZE; 908 10843 Dave cfg->found4m++; 909 10843 Dave 910 10843 Dave /* set the bitmap for both ends to be sure to cover the range */ 911 10843 Dave dump_set_used(pfn); 912 10843 Dave dump_set_used(pfn + CBUF_MAPNP - 1); 913 10843 Dave 914 10843 Dave if (sz >= cfg->maxsize) 915 10843 Dave goto foundmax; 916 10843 Dave } 917 10843 Dave 918 10843 Dave /* Add small pages if we can't find enough large pages. */ 919 10843 Dave dump_init_memlist_walker(&mlw); 920 10843 Dave for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum = end) { 921 10843 Dave dump_timeleft = dump_timeout; 922 10843 Dave end = bitnum + CBUF_MAPNP; 923 10843 Dave pfn = dump_bitnum_to_pfn(bitnum, &mlw); 924 10843 Dave ASSERT(pfn != PFN_INVALID); 925 10843 Dave 926 10843 Dave /* Find any non-aligned pages at start and end of segment. */ 927 10843 Dave off = P2PHASE(pfn, CBUF_MAPNP); 928 10843 Dave if (mlw.mpleft < CBUF_MAPNP) { 929 10843 Dave end = bitnum + mlw.mpleft; 930 10843 Dave } else if (off != 0) { 931 10843 Dave end -= off; 932 10843 Dave } else if (cfg->found4m && dump_test_used(pfn)) { 933 10843 Dave continue; 934 10843 Dave } 935 10843 Dave 936 10843 Dave for (; bitnum < end; bitnum++, pfn++) { 937 10843 Dave dump_timeleft = dump_timeout; 938 10843 Dave if (BT_TEST(dumpcfg.bitmap, bitnum)) 939 10843 Dave continue; 940 10843 Dave if (!dump_pfn_check(pfn)) 941 10843 Dave continue; 942 10843 Dave ASSERT((sz + PAGESIZE) <= cfg->maxvmsize); 943 10843 Dave hat_devload(kas.a_hat, cfg->maxvm + sz, PAGESIZE, pfn, 944 10843 Dave PROT_READ | PROT_WRITE, HAT_LOAD_NOCONSIST); 945 10843 Dave sz += PAGESIZE; 946 10843 Dave cfg->foundsm++; 947 10843 Dave dump_set_used(pfn); 948 10843 Dave if (sz >= cfg->maxsize) 949 10843 Dave goto foundmax; 950 10843 Dave } 951 10843 Dave } 952 10843 Dave 953 10843 Dave /* Fall back to lzjb if we did not get enough memory for bzip2. */ 954 10843 Dave endsz = (cfg->maxsize * cfg->threshold) / cfg->nhelper; 955 10843 Dave if (sz < endsz) { 956 10843 Dave cfg->clevel = DUMP_CLEVEL_LZJB; 957 10843 Dave } 958 10843 Dave 959 10843 Dave /* Allocate memory for as many helpers as we can. */ 960 10843 Dave foundmax: 961 10843 Dave 962 10843 Dave /* Byte offsets into memory found and mapped above */ 963 10843 Dave endsz = sz; 964 10843 Dave sz = 0; 965 10843 Dave 966 10843 Dave /* Set the size for bzip2 state. Only bzip2 needs it. */ 967 10843 Dave bz2size = BZ2_bzCompressInitSize(dump_bzip2_level); 968 10843 Dave 969 10843 Dave /* Skip the preallocate output buffers. */ 970 10843 Dave cp = &cfg->cbuf[MINCBUFS]; 971 10843 Dave 972 10843 Dave /* Use this to move memory up from the preallocated helpers. */ 973 10843 Dave ohp = cfg->helper; 974 10843 Dave 975 10843 Dave /* Loop over all helpers and allocate memory. */ 976 10843 Dave for (hp = cfg->helper; hp < endhp; hp++) { 977 10843 Dave 978 10843 Dave /* Skip preallocated helpers by checking hp->page. */ 979 10843 Dave if (hp->page == NULL) { 980 10843 Dave if (cfg->clevel <= DUMP_CLEVEL_LZJB) { 981 10843 Dave /* lzjb needs 2 1-page buffers */ 982 10843 Dave if ((sz + (2 * PAGESIZE)) > endsz) 983 10843 Dave break; 984 10843 Dave hp->page = cfg->maxvm + sz; 985 10843 Dave sz += PAGESIZE; 986 10843 Dave hp->lzbuf = cfg->maxvm + sz; 987 10843 Dave sz += PAGESIZE; 988 10843 Dave 989 10843 Dave } else if (ohp->lzbuf != NULL) { 990 10843 Dave /* re-use the preallocted lzjb page for bzip2 */ 991 10843 Dave hp->page = ohp->lzbuf; 992 10843 Dave ohp->lzbuf = NULL; 993 10843 Dave ++ohp; 994 10843 Dave 995 10843 Dave } else { 996 10843 Dave /* bzip2 needs a 1-page buffer */ 997 10843 Dave if ((sz + PAGESIZE) > endsz) 998 10843 Dave break; 999 10843 Dave hp->page = cfg->maxvm + sz; 1000 10843 Dave sz += PAGESIZE; 1001 10843 Dave } 1002 10843 Dave } 1003 10843 Dave 1004 10843 Dave /* 1005 10843 Dave * Add output buffers per helper. The number of 1006 10843 Dave * buffers per helper is determined by the ratio of 1007 10843 Dave * ncbuf to nhelper. 1008 10843 Dave */ 1009 10843 Dave for (k = 0; cp < endcp && (sz + CBUF_SIZE) <= endsz && 1010 10843 Dave k < NCBUF_PER_HELPER; k++) { 1011 10843 Dave cp->state = CBUF_FREEBUF; 1012 10843 Dave cp->size = CBUF_SIZE; 1013 10843 Dave cp->buf = cfg->maxvm + sz; 1014 10843 Dave sz += CBUF_SIZE; 1015 10843 Dave ++cp; 1016 10843 Dave } 1017 10843 Dave 1018 10843 Dave /* 1019 10843 Dave * bzip2 needs compression state. Use the dumpbzalloc 1020 10843 Dave * and dumpbzfree callbacks to allocate the memory. 1021 10843 Dave * bzip2 does allocation only at init time. 1022 10843 Dave */ 1023 10843 Dave if (cfg->clevel >= DUMP_CLEVEL_BZIP2) { 1024 10843 Dave if ((sz + bz2size) > endsz) { 1025 10843 Dave hp->page = NULL; 1026 10843 Dave break; 1027 10843 Dave } else { 1028 10843 Dave hp->bzstream.opaque = &sz; 1029 10843 Dave hp->bzstream.bzalloc = dumpbzalloc; 1030 10843 Dave hp->bzstream.bzfree = dumpbzfree; 1031 10843 Dave (void) BZ2_bzCompressInit(&hp->bzstream, 1032 10843 Dave dump_bzip2_level, 0, 0); 1033 10843 Dave hp->bzstream.opaque = NULL; 1034 10843 Dave } 1035 10843 Dave } 1036 10843 Dave } 1037 10843 Dave 1038 10843 Dave /* Finish allocating output buffers */ 1039 10843 Dave for (; cp < endcp && (sz + CBUF_SIZE) <= endsz; cp++) { 1040 10843 Dave cp->state = CBUF_FREEBUF; 1041 10843 Dave cp->size = CBUF_SIZE; 1042 10843 Dave cp->buf = cfg->maxvm + sz; 1043 10843 Dave sz += CBUF_SIZE; 1044 10843 Dave } 1045 10843 Dave 1046 10843 Dave /* Enable IS_DUMP_PAGE macro, which checks for pages we took. */ 1047 10843 Dave if (cfg->found4m || cfg->foundsm) 1048 10843 Dave dump_check_used = 1; 1049 10843 Dave 1050 10843 Dave ASSERT(sz <= endsz); 1051 0 stevel } 1052 0 stevel 1053 0 stevel static void 1054 0 stevel dumphdr_init(void) 1055 0 stevel { 1056 0 stevel pgcnt_t npages = 0; 1057 0 stevel 1058 0 stevel ASSERT(MUTEX_HELD(&dump_lock)); 1059 0 stevel 1060 0 stevel if (dumphdr == NULL) { 1061 0 stevel dumphdr = kmem_zalloc(sizeof (dumphdr_t), KM_SLEEP); 1062 0 stevel dumphdr->dump_magic = DUMP_MAGIC; 1063 0 stevel dumphdr->dump_version = DUMP_VERSION; 1064 0 stevel dumphdr->dump_wordsize = DUMP_WORDSIZE; 1065 0 stevel dumphdr->dump_pageshift = PAGESHIFT; 1066 0 stevel dumphdr->dump_pagesize = PAGESIZE; 1067 0 stevel dumphdr->dump_utsname = utsname; 1068 0 stevel (void) strcpy(dumphdr->dump_platform, platform); 1069 10843 Dave dumpbuf.size = dumpbuf_iosize(maxphys); 1070 10843 Dave dumpbuf.start = kmem_alloc(dumpbuf.size, KM_SLEEP); 1071 10843 Dave dumpbuf.end = dumpbuf.start + dumpbuf.size; 1072 10843 Dave dumpcfg.pids = kmem_alloc(v.v_proc * sizeof (pid_t), KM_SLEEP); 1073 10843 Dave dumpcfg.helpermap = kmem_zalloc(BT_SIZEOFMAP(NCPU), KM_SLEEP); 1074 10843 Dave LOCK_INIT_HELD(&dumpcfg.helper_lock); 1075 0 stevel } 1076 0 stevel 1077 5084 johnlev npages = num_phys_pages(); 1078 0 stevel 1079 10843 Dave if (dumpcfg.bitmapsize != npages) { 1080 10843 Dave size_t rlen = CBUF_MAPP2R(P2ROUNDUP(npages, CBUF_MAPNP)); 1081 0 stevel void *map = kmem_alloc(BT_SIZEOFMAP(npages), KM_SLEEP); 1082 10843 Dave void *rmap = kmem_alloc(BT_SIZEOFMAP(rlen), KM_SLEEP); 1083 10843 Dave 1084 10843 Dave if (dumpcfg.bitmap != NULL) 1085 10843 Dave kmem_free(dumpcfg.bitmap, BT_SIZEOFMAP(dumpcfg. 1086 10843 Dave bitmapsize)); 1087 10843 Dave if (dumpcfg.rbitmap != NULL) 1088 10843 Dave kmem_free(dumpcfg.rbitmap, BT_SIZEOFMAP(dumpcfg. 1089 10843 Dave rbitmapsize)); 1090 10843 Dave dumpcfg.bitmap = map; 1091 10843 Dave dumpcfg.bitmapsize = npages; 1092 10843 Dave dumpcfg.rbitmap = rmap; 1093 10843 Dave dumpcfg.rbitmapsize = rlen; 1094 0 stevel } 1095 0 stevel } 1096 0 stevel 1097 0 stevel /* 1098 0 stevel * Establish a new dump device. 1099 0 stevel */ 1100 0 stevel int 1101 0 stevel dumpinit(vnode_t *vp, char *name, int justchecking) 1102 0 stevel { 1103 0 stevel vnode_t *cvp; 1104 0 stevel vattr_t vattr; 1105 0 stevel vnode_t *cdev_vp; 1106 0 stevel int error = 0; 1107 0 stevel 1108 0 stevel ASSERT(MUTEX_HELD(&dump_lock)); 1109 0 stevel 1110 0 stevel dumphdr_init(); 1111 0 stevel 1112 0 stevel cvp = common_specvp(vp); 1113 0 stevel if (cvp == dumpvp) 1114 0 stevel return (0); 1115 0 stevel 1116 0 stevel /* 1117 0 stevel * Determine whether this is a plausible dump device. We want either: 1118 0 stevel * (1) a real device that's not mounted and has a cb_dump routine, or 1119 0 stevel * (2) a swapfile on some filesystem that has a vop_dump routine. 1120 0 stevel */ 1121 5331 amw if ((error = VOP_OPEN(&cvp, FREAD | FWRITE, kcred, NULL)) != 0) 1122 0 stevel return (error); 1123 0 stevel 1124 0 stevel vattr.va_mask = AT_SIZE | AT_TYPE | AT_RDEV; 1125 5331 amw if ((error = VOP_GETATTR(cvp, &vattr, 0, kcred, NULL)) == 0) { 1126 0 stevel if (vattr.va_type == VBLK || vattr.va_type == VCHR) { 1127 0 stevel if (devopsp[getmajor(vattr.va_rdev)]-> 1128 0 stevel devo_cb_ops->cb_dump == nodev) 1129 0 stevel error = ENOTSUP; 1130 0 stevel else if (vfs_devismounted(vattr.va_rdev)) 1131 0 stevel error = EBUSY; 1132 10588 Eric if (strcmp(ddi_driver_name(VTOS(cvp)->s_dip), 1133 10588 Eric ZFS_DRIVER) == 0 && 1134 10588 Eric IS_SWAPVP(common_specvp(cvp))) 1135 10588 Eric error = EBUSY; 1136 0 stevel } else { 1137 0 stevel if (vn_matchopval(cvp, VOPNAME_DUMP, fs_nosys) || 1138 0 stevel !IS_SWAPVP(cvp)) 1139 0 stevel error = ENOTSUP; 1140 0 stevel } 1141 0 stevel } 1142 0 stevel 1143 0 stevel if (error == 0 && vattr.va_size < 2 * DUMP_LOGSIZE + DUMP_ERPTSIZE) 1144 0 stevel error = ENOSPC; 1145 0 stevel 1146 0 stevel if (error || justchecking) { 1147 5331 amw (void) VOP_CLOSE(cvp, FREAD | FWRITE, 1, (offset_t)0, 1148 5331 amw kcred, NULL); 1149 0 stevel return (error); 1150 0 stevel } 1151 0 stevel 1152 0 stevel VN_HOLD(cvp); 1153 0 stevel 1154 0 stevel if (dumpvp != NULL) 1155 0 stevel dumpfini(); /* unconfigure the old dump device */ 1156 0 stevel 1157 0 stevel dumpvp = cvp; 1158 0 stevel dumpvp_size = vattr.va_size & -DUMP_OFFSET; 1159 0 stevel dumppath = kmem_alloc(strlen(name) + 1, KM_SLEEP); 1160 0 stevel (void) strcpy(dumppath, name); 1161 10843 Dave dumpbuf.iosize = 0; 1162 0 stevel 1163 0 stevel /* 1164 0 stevel * If the dump device is a block device, attempt to open up the 1165 0 stevel * corresponding character device and determine its maximum transfer 1166 0 stevel * size. We use this information to potentially resize dumpbuf to a 1167 0 stevel * larger and more optimal size for performing i/o to the dump device. 1168 0 stevel */ 1169 0 stevel if (cvp->v_type == VBLK && 1170 0 stevel (cdev_vp = makespecvp(VTOS(cvp)->s_dev, VCHR)) != NULL) { 1171 5331 amw if (VOP_OPEN(&cdev_vp, FREAD | FWRITE, kcred, NULL) == 0) { 1172 0 stevel size_t blk_size; 1173 0 stevel struct dk_cinfo dki; 1174 9889 Larry struct dk_minfo minf; 1175 0 stevel 1176 9889 Larry if (VOP_IOCTL(cdev_vp, DKIOCGMEDIAINFO, 1177 9889 Larry (intptr_t)&minf, FKIOCTL, kcred, NULL, NULL) 1178 9889 Larry == 0 && minf.dki_lbsize != 0) 1179 9889 Larry blk_size = minf.dki_lbsize; 1180 0 stevel else 1181 0 stevel blk_size = DEV_BSIZE; 1182 0 stevel 1183 0 stevel if (VOP_IOCTL(cdev_vp, DKIOCINFO, (intptr_t)&dki, 1184 5331 amw FKIOCTL, kcred, NULL, NULL) == 0) { 1185 10843 Dave dumpbuf.iosize = dki.dki_maxtransfer * blk_size; 1186 0 stevel dumpbuf_resize(); 1187 0 stevel } 1188 6423 gw25295 /* 1189 10588 Eric * If we are working with a zvol then dumpify it 1190 10588 Eric * if it's not being used as swap. 1191 6423 gw25295 */ 1192 6423 gw25295 if (strcmp(dki.dki_dname, ZVOL_DRIVER) == 0) { 1193 10588 Eric if (IS_SWAPVP(common_specvp(cvp))) 1194 10588 Eric error = EBUSY; 1195 10588 Eric else if ((error = VOP_IOCTL(cdev_vp, 1196 6423 gw25295 DKIOCDUMPINIT, NULL, FKIOCTL, kcred, 1197 10588 Eric NULL, NULL)) != 0) 1198 6423 gw25295 dumpfini(); 1199 6423 gw25295 } 1200 0 stevel 1201 5331 amw (void) VOP_CLOSE(cdev_vp, FREAD | FWRITE, 1, 0, 1202 5331 amw kcred, NULL); 1203 0 stevel } 1204 0 stevel 1205 0 stevel VN_RELE(cdev_vp); 1206 0 stevel } 1207 0 stevel 1208 0 stevel cmn_err(CE_CONT, "?dump on %s size %llu MB\n", name, dumpvp_size >> 20); 1209 10843 Dave 1210 10843 Dave dump_update_clevel(); 1211 0 stevel 1212 6423 gw25295 return (error); 1213 0 stevel } 1214 0 stevel 1215 0 stevel void 1216 0 stevel dumpfini(void) 1217 0 stevel { 1218 6423 gw25295 vattr_t vattr; 1219 6423 gw25295 boolean_t is_zfs = B_FALSE; 1220 6423 gw25295 vnode_t *cdev_vp; 1221 0 stevel ASSERT(MUTEX_HELD(&dump_lock)); 1222 0 stevel 1223 0 stevel kmem_free(dumppath, strlen(dumppath) + 1); 1224 6423 gw25295 1225 6423 gw25295 /* 1226 6423 gw25295 * Determine if we are using zvols for our dump device 1227 6423 gw25295 */ 1228 6423 gw25295 vattr.va_mask = AT_RDEV; 1229 6423 gw25295 if (VOP_GETATTR(dumpvp, &vattr, 0, kcred, NULL) == 0) { 1230 6423 gw25295 is_zfs = (getmajor(vattr.va_rdev) == 1231 6423 gw25295 ddi_name_to_major(ZFS_DRIVER)) ? B_TRUE : B_FALSE; 1232 6423 gw25295 } 1233 6423 gw25295 1234 6423 gw25295 /* 1235 6423 gw25295 * If we have a zvol dump device then we call into zfs so 1236 6423 gw25295 * that it may have a chance to cleanup. 1237 6423 gw25295 */ 1238 6423 gw25295 if (is_zfs && 1239 6423 gw25295 (cdev_vp = makespecvp(VTOS(dumpvp)->s_dev, VCHR)) != NULL) { 1240 6423 gw25295 if (VOP_OPEN(&cdev_vp, FREAD | FWRITE, kcred, NULL) == 0) { 1241 6423 gw25295 (void) VOP_IOCTL(cdev_vp, DKIOCDUMPFINI, NULL, FKIOCTL, 1242 6423 gw25295 kcred, NULL, NULL); 1243 6423 gw25295 (void) VOP_CLOSE(cdev_vp, FREAD | FWRITE, 1, 0, 1244 6423 gw25295 kcred, NULL); 1245 6423 gw25295 } 1246 6423 gw25295 VN_RELE(cdev_vp); 1247 6423 gw25295 } 1248 0 stevel 1249 5331 amw (void) VOP_CLOSE(dumpvp, FREAD | FWRITE, 1, (offset_t)0, kcred, NULL); 1250 0 stevel 1251 0 stevel VN_RELE(dumpvp); 1252 0 stevel 1253 0 stevel dumpvp = NULL; 1254 0 stevel dumpvp_size = 0; 1255 0 stevel dumppath = NULL; 1256 0 stevel } 1257 0 stevel 1258 0 stevel static offset_t 1259 0 stevel dumpvp_flush(void) 1260 0 stevel { 1261 10843 Dave size_t size = P2ROUNDUP(dumpbuf.cur - dumpbuf.start, PAGESIZE); 1262 10843 Dave hrtime_t iotime; 1263 0 stevel int err; 1264 0 stevel 1265 10843 Dave if (dumpbuf.vp_off + size > dumpbuf.vp_limit) { 1266 0 stevel dump_ioerr = ENOSPC; 1267 10843 Dave dumpbuf.vp_off = dumpbuf.vp_limit; 1268 0 stevel } else if (size != 0) { 1269 10843 Dave iotime = gethrtime(); 1270 10843 Dave dumpsync.iowait += iotime - dumpsync.iowaitts; 1271 0 stevel if (panicstr) 1272 10843 Dave err = VOP_DUMP(dumpvp, dumpbuf.start, 1273 10843 Dave lbtodb(dumpbuf.vp_off), btod(size), NULL); 1274 0 stevel else 1275 10843 Dave err = vn_rdwr(UIO_WRITE, dumpbuf.cdev_vp != NULL ? 1276 10843 Dave dumpbuf.cdev_vp : dumpvp, dumpbuf.start, size, 1277 10843 Dave dumpbuf.vp_off, UIO_SYSSPACE, 0, dumpbuf.vp_limit, 1278 0 stevel kcred, 0); 1279 0 stevel if (err && dump_ioerr == 0) 1280 0 stevel dump_ioerr = err; 1281 10843 Dave dumpsync.iowaitts = gethrtime(); 1282 10843 Dave dumpsync.iotime += dumpsync.iowaitts - iotime; 1283 10843 Dave dumpsync.nwrite += size; 1284 10843 Dave dumpbuf.vp_off += size; 1285 0 stevel } 1286 10843 Dave dumpbuf.cur = dumpbuf.start; 1287 0 stevel dump_timeleft = dump_timeout; 1288 10843 Dave return (dumpbuf.vp_off); 1289 0 stevel } 1290 0 stevel 1291 10843 Dave /* maximize write speed by keeping seek offset aligned with size */ 1292 0 stevel void 1293 0 stevel dumpvp_write(const void *va, size_t size) 1294 0 stevel { 1295 10843 Dave size_t len, off, sz; 1296 10843 Dave 1297 0 stevel while (size != 0) { 1298 10843 Dave len = MIN(size, dumpbuf.end - dumpbuf.cur); 1299 0 stevel if (len == 0) { 1300 10843 Dave off = P2PHASE(dumpbuf.vp_off, dumpbuf.size); 1301 10843 Dave if (off == 0 || !ISP2(dumpbuf.size)) { 1302 10843 Dave (void) dumpvp_flush(); 1303 10843 Dave } else { 1304 10843 Dave sz = dumpbuf.size - off; 1305 10843 Dave dumpbuf.cur = dumpbuf.start + sz; 1306 10843 Dave (void) dumpvp_flush(); 1307 10843 Dave ovbcopy(dumpbuf.start + sz, dumpbuf.start, off); 1308 10843 Dave dumpbuf.cur += off; 1309 10843 Dave } 1310 0 stevel } else { 1311 10843 Dave bcopy(va, dumpbuf.cur, len); 1312 0 stevel va = (char *)va + len; 1313 10843 Dave dumpbuf.cur += len; 1314 0 stevel size -= len; 1315 0 stevel } 1316 0 stevel } 1317 0 stevel } 1318 0 stevel 1319 0 stevel /*ARGSUSED*/ 1320 0 stevel static void 1321 0 stevel dumpvp_ksyms_write(const void *src, void *dst, size_t size) 1322 0 stevel { 1323 0 stevel dumpvp_write(src, size); 1324 0 stevel } 1325 0 stevel 1326 0 stevel /* 1327 0 stevel * Mark 'pfn' in the bitmap and dump its translation table entry. 1328 0 stevel */ 1329 0 stevel void 1330 0 stevel dump_addpage(struct as *as, void *va, pfn_t pfn) 1331 0 stevel { 1332 0 stevel mem_vtop_t mem_vtop; 1333 0 stevel pgcnt_t bitnum; 1334 0 stevel 1335 0 stevel if ((bitnum = dump_pfn_to_bitnum(pfn)) != (pgcnt_t)-1) { 1336 10843 Dave if (!BT_TEST(dumpcfg.bitmap, bitnum)) { 1337 0 stevel dumphdr->dump_npages++; 1338 10843 Dave BT_SET(dumpcfg.bitmap, bitnum); 1339 0 stevel } 1340 0 stevel dumphdr->dump_nvtop++; 1341 0 stevel mem_vtop.m_as = as; 1342 0 stevel mem_vtop.m_va = va; 1343 0 stevel mem_vtop.m_pfn = pfn; 1344 0 stevel dumpvp_write(&mem_vtop, sizeof (mem_vtop_t)); 1345 0 stevel } 1346 0 stevel dump_timeleft = dump_timeout; 1347 0 stevel } 1348 0 stevel 1349 0 stevel /* 1350 0 stevel * Mark 'pfn' in the bitmap 1351 0 stevel */ 1352 0 stevel void 1353 0 stevel dump_page(pfn_t pfn) 1354 0 stevel { 1355 0 stevel pgcnt_t bitnum; 1356 0 stevel 1357 0 stevel if ((bitnum = dump_pfn_to_bitnum(pfn)) != (pgcnt_t)-1) { 1358 10843 Dave if (!BT_TEST(dumpcfg.bitmap, bitnum)) { 1359 0 stevel dumphdr->dump_npages++; 1360 10843 Dave BT_SET(dumpcfg.bitmap, bitnum); 1361 0 stevel } 1362 0 stevel } 1363 0 stevel dump_timeleft = dump_timeout; 1364 0 stevel } 1365 0 stevel 1366 0 stevel /* 1367 0 stevel * Dump the <as, va, pfn> information for a given address space. 1368 0 stevel * SEGOP_DUMP() will call dump_addpage() for each page in the segment. 1369 0 stevel */ 1370 0 stevel static void 1371 0 stevel dump_as(struct as *as) 1372 0 stevel { 1373 0 stevel struct seg *seg; 1374 0 stevel 1375 0 stevel AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 1376 0 stevel for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) { 1377 0 stevel if (seg->s_as != as) 1378 0 stevel break; 1379 0 stevel if (seg->s_ops == NULL) 1380 0 stevel continue; 1381 0 stevel SEGOP_DUMP(seg); 1382 0 stevel } 1383 0 stevel AS_LOCK_EXIT(as, &as->a_lock); 1384 0 stevel 1385 0 stevel if (seg != NULL) 1386 0 stevel cmn_err(CE_WARN, "invalid segment %p in address space %p", 1387 0 stevel (void *)seg, (void *)as); 1388 0 stevel } 1389 0 stevel 1390 0 stevel static int 1391 0 stevel dump_process(pid_t pid) 1392 0 stevel { 1393 0 stevel proc_t *p = sprlock(pid); 1394 0 stevel 1395 0 stevel if (p == NULL) 1396 0 stevel return (-1); 1397 0 stevel if (p->p_as != &kas) { 1398 0 stevel mutex_exit(&p->p_lock); 1399 0 stevel dump_as(p->p_as); 1400 0 stevel mutex_enter(&p->p_lock); 1401 0 stevel } 1402 0 stevel 1403 0 stevel sprunlock(p); 1404 0 stevel 1405 0 stevel return (0); 1406 0 stevel } 1407 0 stevel 1408 0 stevel void 1409 0 stevel dump_ereports(void) 1410 0 stevel { 1411 0 stevel u_offset_t dumpvp_start; 1412 0 stevel erpt_dump_t ed; 1413 0 stevel 1414 0 stevel if (dumpvp == NULL || dumphdr == NULL) 1415 0 stevel return; 1416 0 stevel 1417 10843 Dave dumpbuf.cur = dumpbuf.start; 1418 10843 Dave dumpbuf.vp_limit = dumpvp_size - (DUMP_OFFSET + DUMP_LOGSIZE); 1419 10843 Dave dumpvp_start = dumpbuf.vp_limit - DUMP_ERPTSIZE; 1420 10843 Dave dumpbuf.vp_off = dumpvp_start; 1421 0 stevel 1422 0 stevel fm_ereport_dump(); 1423 0 stevel if (panicstr) 1424 0 stevel errorq_dump(); 1425 0 stevel 1426 0 stevel bzero(&ed, sizeof (ed)); /* indicate end of ereports */ 1427 0 stevel dumpvp_write(&ed, sizeof (ed)); 1428 0 stevel (void) dumpvp_flush(); 1429 0 stevel 1430 0 stevel if (!panicstr) { 1431 0 stevel (void) VOP_PUTPAGE(dumpvp, dumpvp_start, 1432 10843 Dave (size_t)(dumpbuf.vp_off - dumpvp_start), 1433 5331 amw B_INVAL | B_FORCE, kcred, NULL); 1434 0 stevel } 1435 0 stevel } 1436 0 stevel 1437 0 stevel void 1438 0 stevel dump_messages(void) 1439 0 stevel { 1440 0 stevel log_dump_t ld; 1441 0 stevel mblk_t *mctl, *mdata; 1442 0 stevel queue_t *q, *qlast; 1443 0 stevel u_offset_t dumpvp_start; 1444 0 stevel 1445 0 stevel if (dumpvp == NULL || dumphdr == NULL || log_consq == NULL) 1446 0 stevel return; 1447 0 stevel 1448 10843 Dave dumpbuf.cur = dumpbuf.start; 1449 10843 Dave dumpbuf.vp_limit = dumpvp_size - DUMP_OFFSET; 1450 10843 Dave dumpvp_start = dumpbuf.vp_limit - DUMP_LOGSIZE; 1451 10843 Dave dumpbuf.vp_off = dumpvp_start; 1452 0 stevel 1453 0 stevel qlast = NULL; 1454 0 stevel do { 1455 0 stevel for (q = log_consq; q->q_next != qlast; q = q->q_next) 1456 0 stevel continue; 1457 0 stevel for (mctl = q->q_first; mctl != NULL; mctl = mctl->b_next) { 1458 0 stevel dump_timeleft = dump_timeout; 1459 0 stevel mdata = mctl->b_cont; 1460 0 stevel ld.ld_magic = LOG_MAGIC; 1461 0 stevel ld.ld_msgsize = MBLKL(mctl->b_cont); 1462 0 stevel ld.ld_csum = checksum32(mctl->b_rptr, MBLKL(mctl)); 1463 0 stevel ld.ld_msum = checksum32(mdata->b_rptr, MBLKL(mdata)); 1464 0 stevel dumpvp_write(&ld, sizeof (ld)); 1465 0 stevel dumpvp_write(mctl->b_rptr, MBLKL(mctl)); 1466 0 stevel dumpvp_write(mdata->b_rptr, MBLKL(mdata)); 1467 0 stevel } 1468 0 stevel } while ((qlast = q) != log_consq); 1469 0 stevel 1470 0 stevel ld.ld_magic = 0; /* indicate end of messages */ 1471 0 stevel dumpvp_write(&ld, sizeof (ld)); 1472 0 stevel (void) dumpvp_flush(); 1473 0 stevel if (!panicstr) { 1474 0 stevel (void) VOP_PUTPAGE(dumpvp, dumpvp_start, 1475 10843 Dave (size_t)(dumpbuf.vp_off - dumpvp_start), 1476 5331 amw B_INVAL | B_FORCE, kcred, NULL); 1477 0 stevel } 1478 0 stevel } 1479 0 stevel 1480 10843 Dave /* 1481 10843 Dave * The following functions are called on multiple CPUs during dump. 1482 10843 Dave * They must not use most kernel services, because all cross-calls are 1483 10843 Dave * disabled during panic. Therefore, blocking locks and cache flushes 1484 10843 Dave * will not work. 1485 10843 Dave */ 1486 10843 Dave 1487 11178 Dave /* 1488 11178 Dave * Copy pages, trapping ECC errors. Also, for robustness, trap data 1489 11178 Dave * access in case something goes wrong in the hat layer and the 1490 11178 Dave * mapping is broken. 1491 11178 Dave */ 1492 10843 Dave static int 1493 0 stevel dump_pagecopy(void *src, void *dst) 1494 0 stevel { 1495 0 stevel long *wsrc = (long *)src; 1496 0 stevel long *wdst = (long *)dst; 1497 0 stevel const ulong_t ncopies = PAGESIZE / sizeof (long); 1498 0 stevel volatile int w = 0; 1499 0 stevel volatile int ueoff = -1; 1500 0 stevel on_trap_data_t otd; 1501 0 stevel 1502 11178 Dave if (on_trap(&otd, OT_DATA_EC | OT_DATA_ACCESS)) { 1503 10843 Dave if (ueoff == -1) 1504 0 stevel ueoff = w * sizeof (long); 1505 11178 Dave /* report "bad ECC" or "bad address" */ 1506 0 stevel #ifdef _LP64 1507 11178 Dave if (otd.ot_trap & OT_DATA_EC) 1508 11178 Dave wdst[w++] = 0x00badecc00badecc; 1509 11178 Dave else 1510 11178 Dave wdst[w++] = 0x00badadd00badadd; 1511 0 stevel #else 1512 11178 Dave if (otd.ot_trap & OT_DATA_EC) 1513 11178 Dave wdst[w++] = 0x00badecc; 1514 11178 Dave else 1515 11178 Dave wdst[w++] = 0x00badadd; 1516 0 stevel #endif 1517 0 stevel } 1518 0 stevel while (w < ncopies) { 1519 0 stevel wdst[w] = wsrc[w]; 1520 0 stevel w++; 1521 0 stevel } 1522 0 stevel no_trap(); 1523 10843 Dave return (ueoff); 1524 0 stevel } 1525 10843 Dave 1526 10843 Dave static void 1527 10843 Dave dumpsys_close_cq(cqueue_t *cq, int live) 1528 10843 Dave { 1529 10843 Dave if (live) { 1530 10843 Dave mutex_enter(&cq->mutex); 1531 10843 Dave atomic_dec_uint(&cq->open); 1532 10843 Dave cv_signal(&cq->cv); 1533 10843 Dave mutex_exit(&cq->mutex); 1534 10843 Dave } else { 1535 10843 Dave atomic_dec_uint(&cq->open); 1536 10843 Dave } 1537 10843 Dave } 1538 10843 Dave 1539 10843 Dave static inline void 1540 10843 Dave dumpsys_spinlock(lock_t *lp) 1541 10843 Dave { 1542 10843 Dave uint_t backoff = 0; 1543 10843 Dave int loop_count = 0; 1544 10843 Dave 1545 10843 Dave while (LOCK_HELD(lp) || !lock_spin_try(lp)) { 1546 10843 Dave if (++loop_count >= ncpus) { 1547 10843 Dave backoff = mutex_lock_backoff(0); 1548 10843 Dave loop_count = 0; 1549 10843 Dave } else { 1550 10843 Dave backoff = mutex_lock_backoff(backoff); 1551 10843 Dave } 1552 10843 Dave mutex_lock_delay(backoff); 1553 10843 Dave } 1554 10843 Dave } 1555 10843 Dave 1556 10843 Dave static inline void 1557 10843 Dave dumpsys_spinunlock(lock_t *lp) 1558 10843 Dave { 1559 10843 Dave lock_clear(lp); 1560 10843 Dave } 1561 10843 Dave 1562 10843 Dave static inline void 1563 10843 Dave dumpsys_lock(cqueue_t *cq, int live) 1564 10843 Dave { 1565 10843 Dave if (live) 1566 10843 Dave mutex_enter(&cq->mutex); 1567 10843 Dave else 1568 10843 Dave dumpsys_spinlock(&cq->spinlock); 1569 10843 Dave } 1570 10843 Dave 1571 10843 Dave static inline void 1572 10843 Dave dumpsys_unlock(cqueue_t *cq, int live, int signal) 1573 10843 Dave { 1574 10843 Dave if (live) { 1575 10843 Dave if (signal) 1576 10843 Dave cv_signal(&cq->cv); 1577 10843 Dave mutex_exit(&cq->mutex); 1578 10843 Dave } else { 1579 10843 Dave dumpsys_spinunlock(&cq->spinlock); 1580 10843 Dave } 1581 10843 Dave } 1582 10843 Dave 1583 10843 Dave static void 1584 10843 Dave dumpsys_wait_cq(cqueue_t *cq, int live) 1585 10843 Dave { 1586 10843 Dave if (live) { 1587 10843 Dave cv_wait(&cq->cv, &cq->mutex); 1588 10843 Dave } else { 1589 10843 Dave dumpsys_spinunlock(&cq->spinlock); 1590 10843 Dave while (cq->open) 1591 10843 Dave if (cq->first) 1592 10843 Dave break; 1593 10843 Dave dumpsys_spinlock(&cq->spinlock); 1594 10843 Dave } 1595 10843 Dave } 1596 10843 Dave 1597 10843 Dave static void 1598 10843 Dave dumpsys_put_cq(cqueue_t *cq, cbuf_t *cp, int newstate, int live) 1599 10843 Dave { 1600 10843 Dave if (cp == NULL) 1601 10843 Dave return; 1602 10843 Dave 1603 10843 Dave dumpsys_lock(cq, live); 1604 10843 Dave 1605 10843 Dave if (cq->ts != 0) { 1606 10843 Dave cq->empty += gethrtime() - cq->ts; 1607 10843 Dave cq->ts = 0; 1608 10843 Dave } 1609 10843 Dave 1610 10843 Dave cp->state = newstate; 1611 10843 Dave cp->next = NULL; 1612 10843 Dave if (cq->last == NULL) 1613 10843 Dave cq->first = cp; 1614 10843 Dave else 1615 10843 Dave cq->last->next = cp; 1616 10843 Dave cq->last = cp; 1617 10843 Dave 1618 10843 Dave dumpsys_unlock(cq, live, 1); 1619 10843 Dave } 1620 10843 Dave 1621 10843 Dave static cbuf_t * 1622 10843 Dave dumpsys_get_cq(cqueue_t *cq, int live) 1623 10843 Dave { 1624 10843 Dave cbuf_t *cp; 1625 10843 Dave hrtime_t now = gethrtime(); 1626 10843 Dave 1627 10843 Dave dumpsys_lock(cq, live); 1628 10843 Dave 1629 10843 Dave /* CONSTCOND */ 1630 10843 Dave while (1) { 1631 10843 Dave cp = (cbuf_t *)cq->first; 1632 10843 Dave if (cp == NULL) { 1633 10843 Dave if (cq->open == 0) 1634 10843 Dave break; 1635 10843 Dave dumpsys_wait_cq(cq, live); 1636 10843 Dave continue; 1637 10843 Dave } 1638 10843 Dave cq->first = cp->next; 1639 10843 Dave if (cq->first == NULL) { 1640 10843 Dave cq->last = NULL; 1641 10843 Dave cq->ts = now; 1642 10843 Dave } 1643 10843 Dave break; 1644 10843 Dave } 1645 10843 Dave 1646 10843 Dave dumpsys_unlock(cq, live, cq->first != NULL || cq->open == 0); 1647 10843 Dave return (cp); 1648 10843 Dave } 1649 10843 Dave 1650 10843 Dave /* 1651 10843 Dave * Send an error message to the console. If the main task is running 1652 10843 Dave * just write the message via uprintf. If a helper is running the 1653 10843 Dave * message has to be put on a queue for the main task. Setting fmt to 1654 10843 Dave * NULL means flush the error message buffer. If fmt is not NULL, just 1655 10843 Dave * add the text to the existing buffer. 1656 10843 Dave */ 1657 10843 Dave static void 1658 10843 Dave dumpsys_errmsg(helper_t *hp, const char *fmt, ...) 1659 10843 Dave { 1660 10843 Dave dumpsync_t *ds = hp->ds; 1661 10843 Dave cbuf_t *cp = hp->cperr; 1662 10843 Dave va_list adx; 1663 10843 Dave 1664 10843 Dave if (hp->helper == MAINHELPER) { 1665 10843 Dave if (fmt != NULL) { 1666 10843 Dave if (ds->neednl) { 1667 10843 Dave uprintf("\n"); 1668 10843 Dave ds->neednl = 0; 1669 10843 Dave } 1670 10843 Dave va_start(adx, fmt); 1671 10843 Dave vuprintf(fmt, adx); 1672 10843 Dave va_end(adx); 1673 10843 Dave } 1674 10843 Dave } else if (fmt == NULL) { 1675 10843 Dave if (cp != NULL) { 1676 10843 Dave CQ_PUT(mainq, cp, CBUF_ERRMSG); 1677 10843 Dave hp->cperr = NULL; 1678 10843 Dave } 1679 10843 Dave } else { 1680 10843 Dave if (hp->cperr == NULL) { 1681 10843 Dave cp = CQ_GET(freebufq); 1682 10843 Dave hp->cperr = cp; 1683 10843 Dave cp->used = 0; 1684 10843 Dave } 1685 10843 Dave va_start(adx, fmt); 1686 10843 Dave cp->used += vsnprintf(cp->buf + cp->used, cp->size - cp->used, 1687 10843 Dave fmt, adx); 1688 10843 Dave va_end(adx); 1689 10843 Dave if ((cp->used + LOG_MSGSIZE) > cp->size) { 1690 10843 Dave CQ_PUT(mainq, cp, CBUF_ERRMSG); 1691 10843 Dave hp->cperr = NULL; 1692 10843 Dave } 1693 10843 Dave } 1694 10843 Dave } 1695 10843 Dave 1696 10843 Dave /* 1697 10843 Dave * Write an output buffer to the dump file. If the main task is 1698 10843 Dave * running just write the data. If a helper is running the output is 1699 10843 Dave * placed on a queue for the main task. 1700 10843 Dave */ 1701 10843 Dave static void 1702 10843 Dave dumpsys_swrite(helper_t *hp, cbuf_t *cp, size_t used) 1703 10843 Dave { 1704 10843 Dave dumpsync_t *ds = hp->ds; 1705 10843 Dave 1706 10843 Dave if (hp->helper == MAINHELPER) { 1707 10843 Dave HRSTART(ds->perpage, write); 1708 10843 Dave dumpvp_write(cp->buf, used); 1709 10843 Dave HRSTOP(ds->perpage, write); 1710 10843 Dave CQ_PUT(freebufq, cp, CBUF_FREEBUF); 1711 10843 Dave } else { 1712 10843 Dave cp->used = used; 1713 10843 Dave CQ_PUT(mainq, cp, CBUF_WRITE); 1714 10843 Dave } 1715 10843 Dave } 1716 10843 Dave 1717 10843 Dave /* 1718 10843 Dave * Copy one page within the mapped range. The offset starts at 0 and 1719 10843 Dave * is relative to the first pfn. cp->buf + cp->off is the address of 1720 10843 Dave * the first pfn. If dump_pagecopy returns a UE offset, create an 1721 10843 Dave * error message. Returns the offset to the next pfn in the range 1722 10843 Dave * selected by the bitmap. 1723 10843 Dave */ 1724 10843 Dave static int 1725 10843 Dave dumpsys_copy_page(helper_t *hp, int offset) 1726 10843 Dave { 1727 10843 Dave cbuf_t *cp = hp->cpin; 1728 10843 Dave int ueoff; 1729 10843 Dave 1730 10843 Dave ASSERT(cp->off + offset + PAGESIZE <= cp->size); 1731 10843 Dave ASSERT(BT_TEST(dumpcfg.bitmap, cp->bitnum)); 1732 10843 Dave 1733 10843 Dave ueoff = dump_pagecopy(cp->buf + cp->off + offset, hp->page); 1734 10843 Dave 1735 10843 Dave /* ueoff is the offset in the page to a UE error */ 1736 10843 Dave if (ueoff != -1) { 1737 10843 Dave uint64_t pa = ptob(cp->pfn) + offset + ueoff; 1738 10843 Dave 1739 11178 Dave dumpsys_errmsg(hp, "cpu %d: memory error at PA 0x%08x.%08x\n", 1740 11178 Dave CPU->cpu_id, (uint32_t)(pa >> 32), (uint32_t)pa); 1741 10843 Dave } 1742 10843 Dave 1743 10843 Dave /* 1744 10843 Dave * Advance bitnum and offset to the next input page for the 1745 10843 Dave * next call to this function. 1746 10843 Dave */ 1747 10843 Dave offset += PAGESIZE; 1748 10843 Dave cp->bitnum++; 1749 10843 Dave while (cp->off + offset < cp->size) { 1750 10843 Dave if (BT_TEST(dumpcfg.bitmap, cp->bitnum)) 1751 10843 Dave break; 1752 10843 Dave offset += PAGESIZE; 1753 10843 Dave cp->bitnum++; 1754 10843 Dave } 1755 10843 Dave 1756 10843 Dave return (offset); 1757 10843 Dave } 1758 10843 Dave 1759 10843 Dave /* 1760 10843 Dave * Read the helper queue, and copy one mapped page. Return 0 when 1761 10843 Dave * done. Return 1 when a page has been copied into hp->page. 1762 10843 Dave */ 1763 10843 Dave static int 1764 10843 Dave dumpsys_sread(helper_t *hp) 1765 10843 Dave { 1766 10843 Dave dumpsync_t *ds = hp->ds; 1767 10843 Dave 1768 10843 Dave /* CONSTCOND */ 1769 10843 Dave while (1) { 1770 10843 Dave 1771 10843 Dave /* Find the next input buffer. */ 1772 10843 Dave if (hp->cpin == NULL) { 1773 10843 Dave HRSTART(hp->perpage, inwait); 1774 10843 Dave 1775 10843 Dave /* CONSTCOND */ 1776 10843 Dave while (1) { 1777 10843 Dave hp->cpin = CQ_GET(helperq); 1778 10843 Dave dump_timeleft = dump_timeout; 1779 10843 Dave 1780 10843 Dave /* 1781 10843 Dave * NULL return means the helper queue 1782 10843 Dave * is closed and empty. 1783 10843 Dave */ 1784 10843 Dave if (hp->cpin == NULL) 1785 10843 Dave break; 1786 10843 Dave 1787 10843 Dave /* Have input, check for dump I/O error. */ 1788 10843 Dave if (!dump_ioerr) 1789 10843 Dave break; 1790 10843 Dave 1791 10843 Dave /* 1792 10843 Dave * If an I/O error occurs, stay in the 1793 10843 Dave * loop in order to empty the helper 1794 10843 Dave * queue. Return the buffers to the 1795 10843 Dave * main task to unmap and free it. 1796 10843 Dave */ 1797 10843 Dave hp->cpin->used = 0; 1798 10843 Dave CQ_PUT(mainq, hp->cpin, CBUF_USEDMAP); 1799 10843 Dave } 1800 10843 Dave HRSTOP(hp->perpage, inwait); 1801 10843 Dave 1802 10843 Dave /* Stop here when the helper queue is closed. */ 1803 10843 Dave if (hp->cpin == NULL) 1804 10843 Dave break; 1805 10843 Dave 1806 10843 Dave /* Set the offset=0 to get the first pfn. */ 1807 10843 Dave hp->in = 0; 1808 10843 Dave 1809 10843 Dave /* Set the total processed to 0 */ 1810 10843 Dave hp->used = 0; 1811 10843 Dave } 1812 10843 Dave 1813 10843 Dave /* Process the next page. */ 1814 10843 Dave if (hp->used < hp->cpin->used) { 1815 10843 Dave 1816 10843 Dave /* 1817 10843 Dave * Get the next page from the input buffer and 1818 10843 Dave * return a copy. 1819 10843 Dave */ 1820 10843 Dave ASSERT(hp->in != -1); 1821 10843 Dave HRSTART(hp->perpage, copy); 1822 10843 Dave hp->in = dumpsys_copy_page(hp, hp->in); 1823 10843 Dave hp->used += PAGESIZE; 1824 10843 Dave HRSTOP(hp->perpage, copy); 1825 10843 Dave break; 1826 10843 Dave 1827 10843 Dave } else { 1828 10843 Dave 1829 10843 Dave /* 1830 10843 Dave * Done with the input. Flush the VM and 1831 10843 Dave * return the buffer to the main task. 1832 10843 Dave */ 1833 10843 Dave if (panicstr && hp->helper != MAINHELPER) 1834 10843 Dave hat_flush_range(kas.a_hat, 1835 10843 Dave hp->cpin->buf, hp->cpin->size); 1836 10843 Dave dumpsys_errmsg(hp, NULL); 1837 10843 Dave CQ_PUT(mainq, hp->cpin, CBUF_USEDMAP); 1838 10843 Dave hp->cpin = NULL; 1839 10843 Dave } 1840 10843 Dave } 1841 10843 Dave 1842 10843 Dave return (hp->cpin != NULL); 1843 10843 Dave } 1844 10843 Dave 1845 10843 Dave /* 1846 10843 Dave * Compress size bytes starting at buf with bzip2 1847 10843 Dave * mode: 1848 10843 Dave * BZ_RUN add one more compressed page 1849 10843 Dave * BZ_FINISH no more input, flush the state 1850 10843 Dave */ 1851 10843 Dave static void 1852 10843 Dave dumpsys_bzrun(helper_t *hp, void *buf, size_t size, int mode) 1853 10843 Dave { 1854 10843 Dave dumpsync_t *ds = hp->ds; 1855 10843 Dave const int CSIZE = sizeof (dumpcsize_t); 1856 10843 Dave bz_stream *ps = &hp->bzstream; 1857 10843 Dave int rc = 0; 1858 10843 Dave uint32_t csize; 1859 10843 Dave dumpcsize_t cs; 1860 10843 Dave 1861 10843 Dave /* Set input pointers to new input page */ 1862 10843 Dave if (size > 0) { 1863 10843 Dave ps->avail_in = size; 1864 10843 Dave ps->next_in = buf; 1865 10843 Dave } 1866 10843 Dave 1867 10843 Dave /* CONSTCOND */ 1868 10843 Dave while (1) { 1869 10843 Dave 1870 10843 Dave /* Quit when all input has been consumed */ 1871 10843 Dave if (ps->avail_in == 0 && mode == BZ_RUN) 1872 10843 Dave break; 1873 10843 Dave 1874 10843 Dave /* Get a new output buffer */ 1875 10843 Dave if (hp->cpout == NULL) { 1876 10843 Dave HRSTART(hp->perpage, outwait); 1877 10843 Dave hp->cpout = CQ_GET(freebufq); 1878 10843 Dave HRSTOP(hp->perpage, outwait); 1879 10843 Dave ps->avail_out = hp->cpout->size - CSIZE; 1880 10843 Dave ps->next_out = hp->cpout->buf + CSIZE; 1881 10843 Dave } 1882 10843 Dave 1883 10843 Dave /* Compress input, or finalize */ 1884 10843 Dave HRSTART(hp->perpage, compress); 1885 10843 Dave rc = BZ2_bzCompress(ps, mode); 1886 10843 Dave HRSTOP(hp->perpage, compress); 1887 10843 Dave 1888 10843 Dave /* Check for error */ 1889 10843 Dave if (mode == BZ_RUN && rc != BZ_RUN_OK) { 1890 10843 Dave dumpsys_errmsg(hp, "%d: BZ_RUN error %s at page %lx\n", 1891 10843 Dave hp->helper, BZ2_bzErrorString(rc), 1892 10843 Dave hp->cpin->pagenum); 1893 10843 Dave break; 1894 10843 Dave } 1895 10843 Dave 1896 10843 Dave /* Write the buffer if it is full, or we are flushing */ 1897 10843 Dave if (ps->avail_out == 0 || mode == BZ_FINISH) { 1898 10843 Dave csize = hp->cpout->size - CSIZE - ps->avail_out; 1899 10843 Dave cs = DUMP_SET_TAG(csize, hp->tag); 1900 10843 Dave if (csize > 0) { 1901 10843 Dave (void) memcpy(hp->cpout->buf, &cs, CSIZE); 1902 10843 Dave dumpsys_swrite(hp, hp->cpout, csize + CSIZE); 1903 10843 Dave hp->cpout = NULL; 1904 10843 Dave } 1905 10843 Dave } 1906 10843 Dave 1907 10843 Dave /* Check for final complete */ 1908 10843 Dave if (mode == BZ_FINISH) { 1909 10843 Dave if (rc == BZ_STREAM_END) 1910 10843 Dave break; 1911 10843 Dave if (rc != BZ_FINISH_OK) { 1912 10843 Dave dumpsys_errmsg(hp, "%d: BZ_FINISH error %s\n", 1913 10843 Dave hp->helper, BZ2_bzErrorString(rc)); 1914 10843 Dave break; 1915 10843 Dave } 1916 10843 Dave } 1917 10843 Dave } 1918 10843 Dave 1919 10843 Dave /* Cleanup state and buffers */ 1920 10843 Dave if (mode == BZ_FINISH) { 1921 10843 Dave 1922 10843 Dave /* Reset state so that it is re-usable. */ 1923 10843 Dave (void) BZ2_bzCompressReset(&hp->bzstream); 1924 10843 Dave 1925 10843 Dave /* Give any unused outout buffer to the main task */ 1926 10843 Dave if (hp->cpout != NULL) { 1927 10843 Dave hp->cpout->used = 0; 1928 10843 Dave CQ_PUT(mainq, hp->cpout, CBUF_ERRMSG); 1929 10843 Dave hp->cpout = NULL; 1930 10843 Dave } 1931 10843 Dave } 1932 10843 Dave } 1933 10843 Dave 1934 10843 Dave static void 1935 10843 Dave dumpsys_bz2compress(helper_t *hp) 1936 10843 Dave { 1937 10843 Dave dumpsync_t *ds = hp->ds; 1938 10843 Dave dumpstreamhdr_t sh; 1939 10843 Dave 1940 10843 Dave (void) strcpy(sh.stream_magic, DUMP_STREAM_MAGIC); 1941 10843 Dave sh.stream_pagenum = (pgcnt_t)-1; 1942 10843 Dave sh.stream_npages = 0; 1943 10843 Dave hp->cpin = NULL; 1944 10843 Dave hp->cpout = NULL; 1945 10843 Dave hp->cperr = NULL; 1946 10843 Dave hp->in = 0; 1947 10843 Dave hp->out = 0; 1948 10843 Dave hp->bzstream.avail_in = 0; 1949 10843 Dave 1950 10843 Dave /* Bump reference to mainq while we are running */ 1951 10843 Dave CQ_OPEN(mainq); 1952 10843 Dave 1953 10843 Dave /* Get one page at a time */ 1954 10843 Dave while (dumpsys_sread(hp)) { 1955 10843 Dave if (sh.stream_pagenum != hp->cpin->pagenum) { 1956 10843 Dave sh.stream_pagenum = hp->cpin->pagenum; 1957 10843 Dave sh.stream_npages = btop(hp->cpin->used); 1958 10843 Dave dumpsys_bzrun(hp, &sh, sizeof (sh), BZ_RUN); 1959 10843 Dave } 1960 10843 Dave dumpsys_bzrun(hp, hp->page, PAGESIZE, 0); 1961 10843 Dave } 1962 10843 Dave 1963 10843 Dave /* Done with input, flush any partial buffer */ 1964 10843 Dave if (sh.stream_pagenum != (pgcnt_t)-1) { 1965 10843 Dave dumpsys_bzrun(hp, NULL, 0, BZ_FINISH); 1966 10843 Dave dumpsys_errmsg(hp, NULL); 1967 10843 Dave } 1968 10843 Dave 1969 10843 Dave ASSERT(hp->cpin == NULL && hp->cpout == NULL && hp->cperr == NULL); 1970 10843 Dave 1971 10843 Dave /* Decrement main queue count, we are done */ 1972 10843 Dave CQ_CLOSE(mainq); 1973 10843 Dave } 1974 10843 Dave 1975 10843 Dave /* 1976 10843 Dave * Compress with lzjb 1977 10843 Dave * write stream block if full or size==0 1978 10843 Dave * if csize==0 write stream header, else write <csize, data> 1979 10843 Dave * size==0 is a call to flush a buffer 1980 10843 Dave * hp->cpout is the buffer we are flushing or filling 1981 10843 Dave * hp->out is the next index to fill data 1982 10843 Dave * osize is either csize+data, or the size of a stream header 1983 10843 Dave */ 1984 10843 Dave static void 1985 10843 Dave dumpsys_lzjbrun(helper_t *hp, size_t csize, void *buf, size_t size) 1986 10843 Dave { 1987 10843 Dave dumpsync_t *ds = hp->ds; 1988 10843 Dave const int CSIZE = sizeof (dumpcsize_t); 1989 10843 Dave dumpcsize_t cs; 1990 10843 Dave size_t osize = csize > 0 ? CSIZE + size : size; 1991 10843 Dave 1992 10843 Dave /* If flush, and there is no buffer, just return */ 1993 10843 Dave if (size == 0 && hp->cpout == NULL) 1994 10843 Dave return; 1995 10843 Dave 1996 10843 Dave /* If flush, or cpout is full, write it out */ 1997 10843 Dave if (size == 0 || 1998 10843 Dave hp->cpout != NULL && hp->out + osize > hp->cpout->size) { 1999 10843 Dave 2000 10843 Dave /* Set tag+size word at the front of the stream block. */ 2001 10843 Dave cs = DUMP_SET_TAG(hp->out - CSIZE, hp->tag); 2002 10843 Dave (void) memcpy(hp->cpout->buf, &cs, CSIZE); 2003 10843 Dave 2004 10843 Dave /* Write block to dump file. */ 2005 10843 Dave dumpsys_swrite(hp, hp->cpout, hp->out); 2006 10843 Dave 2007 10843 Dave /* Clear pointer to indicate we need a new buffer */ 2008 10843 Dave hp->cpout = NULL; 2009 10843 Dave 2010 10843 Dave /* flushing, we are done */ 2011 10843 Dave if (size == 0) 2012 10843 Dave return; 2013 10843 Dave } 2014 10843 Dave 2015 10843 Dave /* Get an output buffer if we dont have one. */ 2016 10843 Dave if (hp->cpout == NULL) { 2017 10843 Dave HRSTART(hp->perpage, outwait); 2018 10843 Dave hp->cpout = CQ_GET(freebufq); 2019 10843 Dave HRSTOP(hp->perpage, outwait); 2020 10843 Dave hp->out = CSIZE; 2021 10843 Dave } 2022 10843 Dave 2023 10843 Dave /* Store csize word. This is the size of compressed data. */ 2024 10843 Dave if (csize > 0) { 2025 10843 Dave cs = DUMP_SET_TAG(csize, 0); 2026 10843 Dave (void) memcpy(hp->cpout->buf + hp->out, &cs, CSIZE); 2027 10843 Dave hp->out += CSIZE; 2028 10843 Dave } 2029 10843 Dave 2030 10843 Dave /* Store the data. */ 2031 10843 Dave (void) memcpy(hp->cpout->buf + hp->out, buf, size); 2032 10843 Dave hp->out += size; 2033 10843 Dave } 2034 10843 Dave 2035 10843 Dave static void 2036 10843 Dave dumpsys_lzjbcompress(helper_t *hp) 2037 10843 Dave { 2038 10843 Dave dumpsync_t *ds = hp->ds; 2039 10843 Dave size_t csize; 2040 10843 Dave dumpstreamhdr_t sh; 2041 10843 Dave 2042 10843 Dave (void) strcpy(sh.stream_magic, DUMP_STREAM_MAGIC); 2043 10843 Dave sh.stream_pagenum = (pfn_t)-1; 2044 10843 Dave sh.stream_npages = 0; 2045 10843 Dave hp->cpin = NULL; 2046 10843 Dave hp->cpout = NULL; 2047 10843 Dave hp->cperr = NULL; 2048 10843 Dave hp->in = 0; 2049 10843 Dave hp->out = 0; 2050 10843 Dave 2051 10843 Dave /* Bump reference to mainq while we are running */ 2052 10843 Dave CQ_OPEN(mainq); 2053 10843 Dave 2054 10843 Dave /* Get one page at a time */ 2055 10843 Dave while (dumpsys_sread(hp)) { 2056 10843 Dave 2057 10843 Dave /* Create a stream header for each new input map */ 2058 10843 Dave if (sh.stream_pagenum != hp->cpin->pagenum) { 2059 10843 Dave sh.stream_pagenum = hp->cpin->pagenum; 2060 10843 Dave sh.stream_npages = btop(hp->cpin->used); 2061 10843 Dave dumpsys_lzjbrun(hp, 0, &sh, sizeof (sh)); 2062 10843 Dave } 2063 10843 Dave 2064 10843 Dave /* Compress one page */ 2065 10843 Dave HRSTART(hp->perpage, compress); 2066 10843 Dave csize = compress(hp->page, hp->lzbuf, PAGESIZE); 2067 10843 Dave HRSTOP(hp->perpage, compress); 2068 10843 Dave 2069 10843 Dave /* Add csize+data to output block */ 2070 10843 Dave ASSERT(csize > 0 && csize <= PAGESIZE); 2071 10843 Dave dumpsys_lzjbrun(hp, csize, hp->lzbuf, csize); 2072 10843 Dave } 2073 10843 Dave 2074 10843 Dave /* Done with input, flush any partial buffer */ 2075 10843 Dave if (sh.stream_pagenum != (pfn_t)-1) { 2076 10843 Dave dumpsys_lzjbrun(hp, 0, NULL, 0); 2077 10843 Dave dumpsys_errmsg(hp, NULL); 2078 10843 Dave } 2079 10843 Dave 2080 10843 Dave ASSERT(hp->cpin == NULL && hp->cpout == NULL && hp->cperr == NULL); 2081 10843 Dave 2082 10843 Dave /* Decrement main queue count, we are done */ 2083 10843 Dave CQ_CLOSE(mainq); 2084 10843 Dave } 2085 10843 Dave 2086 10843 Dave /* 2087 10843 Dave * Dump helper called from panic_idle() to compress pages. CPUs in 2088 10843 Dave * this path must not call most kernel services. 2089 10843 Dave * 2090 10843 Dave * During panic, all but one of the CPUs is idle. These CPUs are used 2091 10843 Dave * as helpers working in parallel to copy and compress memory 2092 10843 Dave * pages. During a panic, however, these processors cannot call any 2093 10843 Dave * kernel services. This is because mutexes become no-ops during 2094 10843 Dave * panic, and, cross-call interrupts are inhibited. Therefore, during 2095 10843 Dave * panic dump the helper CPUs communicate with the panic CPU using 2096 10843 Dave * memory variables. All memory mapping and I/O is performed by the 2097 10843 Dave * panic CPU. 2098 10843 Dave */ 2099 10843 Dave void 2100 10843 Dave dumpsys_helper() 2101 10843 Dave { 2102 10843 Dave dumpsys_spinlock(&dumpcfg.helper_lock); 2103 10843 Dave if (dumpcfg.helpers_wanted) { 2104 10843 Dave helper_t *hp, *hpend = &dumpcfg.helper[dumpcfg.nhelper]; 2105 10843 Dave 2106 10843 Dave for (hp = dumpcfg.helper; hp != hpend; hp++) { 2107 10843 Dave if (hp->helper == FREEHELPER) { 2108 10843 Dave hp->helper = CPU->cpu_id; 2109 10843 Dave BT_SET(dumpcfg.helpermap, CPU->cpu_seqid); 2110 10843 Dave 2111 10843 Dave dumpsys_spinunlock(&dumpcfg.helper_lock); 2112 10843 Dave 2113 10843 Dave if (dumpcfg.clevel < DUMP_CLEVEL_BZIP2) 2114 10843 Dave dumpsys_lzjbcompress(hp); 2115 10843 Dave else 2116 10843 Dave dumpsys_bz2compress(hp); 2117 10843 Dave 2118 10843 Dave hp->helper = DONEHELPER; 2119 10843 Dave return; 2120 10843 Dave } 2121 10843 Dave } 2122 10843 Dave } 2123 10843 Dave dumpsys_spinunlock(&dumpcfg.helper_lock); 2124 10843 Dave } 2125 10843 Dave 2126 10843 Dave /* 2127 10843 Dave * Dump helper for live dumps. 2128 10843 Dave * These run as a system task. 2129 10843 Dave */ 2130 10843 Dave static void 2131 10843 Dave dumpsys_live_helper(void *arg) 2132 10843 Dave { 2133 10843 Dave helper_t *hp = arg; 2134 10843 Dave 2135 10843 Dave BT_ATOMIC_SET(dumpcfg.helpermap, CPU->cpu_seqid); 2136 10843 Dave if (dumpcfg.clevel < DUMP_CLEVEL_BZIP2) 2137 10843 Dave dumpsys_lzjbcompress(hp); 2138 10843 Dave else 2139 10843 Dave dumpsys_bz2compress(hp); 2140 10843 Dave } 2141 10843 Dave 2142 10843 Dave /* 2143 10843 Dave * Compress one page with lzjb (single threaded case) 2144 10843 Dave */ 2145 10843 Dave static void 2146 10843 Dave dumpsys_lzjb_page(helper_t *hp, cbuf_t *cp) 2147 10843 Dave { 2148 10843 Dave dumpsync_t *ds = hp->ds; 2149 10843 Dave uint32_t csize; 2150 10843 Dave 2151 10843 Dave hp->helper = MAINHELPER; 2152 10843 Dave hp->in = 0; 2153 10843 Dave hp->used = 0; 2154 10843 Dave hp->cpin = cp; 2155 10843 Dave while (hp->used < cp->used) { 2156 10843 Dave HRSTART(hp->perpage, copy); 2157 10843 Dave hp->in = dumpsys_copy_page(hp, hp->in); 2158 10843 Dave hp->used += PAGESIZE; 2159 10843 Dave HRSTOP(hp->perpage, copy); 2160 10843 Dave 2161 10843 Dave HRSTART(hp->perpage, compress); 2162 10843 Dave csize = compress(hp->page, hp->lzbuf, PAGESIZE); 2163 10843 Dave HRSTOP(hp->perpage, compress); 2164 10843 Dave 2165 10843 Dave HRSTART(hp->perpage, write); 2166 10843 Dave dumpvp_write(&csize, sizeof (csize)); 2167 10843 Dave dumpvp_write(hp->lzbuf, csize); 2168 10843 Dave HRSTOP(hp->perpage, write); 2169 10843 Dave } 2170 10843 Dave CQ_PUT(mainq, hp->cpin, CBUF_USEDMAP); 2171 10843 Dave hp->cpin = NULL; 2172 10843 Dave } 2173 10843 Dave 2174 10843 Dave /* 2175 10843 Dave * Main task to dump pages. This is called on the dump CPU. 2176 10843 Dave */ 2177 10843 Dave static void 2178 10843 Dave dumpsys_main_task(void *arg) 2179 10843 Dave { 2180 10843 Dave dumpsync_t *ds = arg; 2181 10843 Dave pgcnt_t pagenum = 0, bitnum = 0, hibitnum; 2182 10843 Dave dumpmlw_t mlw; 2183 10843 Dave cbuf_t *cp; 2184 10843 Dave pgcnt_t baseoff, pfnoff; 2185 10843 Dave pfn_t base, pfn; 2186 10843 Dave int sec; 2187 10843 Dave 2188 10843 Dave dump_init_memlist_walker(&mlw); 2189 10843 Dave 2190 10843 Dave /* CONSTCOND */ 2191 10843 Dave while (1) { 2192 10843 Dave 2193 10843 Dave if (ds->percent > ds->percent_done) { 2194 10843 Dave ds->percent_done = ds->percent; 2195 10843 Dave sec = (gethrtime() - ds->start) / 1000 / 1000 / 1000; 2196 10843 Dave uprintf("^\r%2d:%02d %3d%% done", 2197 10843 Dave sec / 60, sec % 60, ds->percent); 2198 10843 Dave ds->neednl = 1; 2199 10843 Dave } 2200 10843 Dave 2201 10843 Dave while (CQ_IS_EMPTY(mainq) && !CQ_IS_EMPTY(writerq)) { 2202 10843 Dave 2203 10843 Dave /* the writerq never blocks */ 2204 10843 Dave cp = CQ_GET(writerq); 2205 10843 Dave if (cp == NULL) 2206 10843 Dave break; 2207 10843 Dave 2208 10843 Dave dump_timeleft = dump_timeout; 2209 10843 Dave 2210 10843 Dave HRSTART(ds->perpage, write); 2211 10843 Dave dumpvp_write(cp->buf, cp->used); 2212 10843 Dave HRSTOP(ds->perpage, write); 2213 10843 Dave 2214 10843 Dave CQ_PUT(freebufq, cp, CBUF_FREEBUF); 2215 10843 Dave } 2216 10843 Dave 2217 10843 Dave /* 2218 10843 Dave * Wait here for some buffers to process. Returns NULL 2219 10843 Dave * when all helpers have terminated and all buffers 2220 10843 Dave * have been processed. 2221 10843 Dave */ 2222 10843 Dave cp = CQ_GET(mainq); 2223 10843 Dave 2224 10843 Dave if (cp == NULL) { 2225 10843 Dave 2226 10843 Dave /* Drain the write queue. */ 2227 10843 Dave if (!CQ_IS_EMPTY(writerq)) 2228 10843 Dave continue; 2229 10843 Dave 2230 10843 Dave /* Main task exits here. */ 2231 10843 Dave break; 2232 10843 Dave } 2233 10843 Dave 2234 10843 Dave dump_timeleft = dump_timeout; 2235 10843 Dave 2236 10843 Dave switch (cp->state) { 2237 10843 Dave 2238 10843 Dave case CBUF_FREEMAP: 2239 10843 Dave 2240 10843 Dave /* 2241 10843 Dave * Note that we drop CBUF_FREEMAP buffers on 2242 10843 Dave * the floor (they will not be on any cqueue) 2243 10843 Dave * when we no longer need them. 2244 10843 Dave */ 2245 10843 Dave if (bitnum >= dumpcfg.bitmapsize) 2246 10843 Dave break; 2247 10843 Dave 2248 10843 Dave if (dump_ioerr) { 2249 10843 Dave bitnum = dumpcfg.bitmapsize; 2250 10843 Dave CQ_CLOSE(helperq); 2251 10843 Dave break; 2252 10843 Dave } 2253 10843 Dave 2254 10843 Dave HRSTART(ds->perpage, bitmap); 2255 10843 Dave for (; bitnum < dumpcfg.bitmapsize; bitnum++) 2256 10843 Dave if (BT_TEST(dumpcfg.bitmap, bitnum)) 2257 10843 Dave break; 2258 10843 Dave HRSTOP(ds->perpage, bitmap); 2259 10843 Dave dump_timeleft = dump_timeout; 2260 10843 Dave 2261 10843 Dave if (bitnum >= dumpcfg.bitmapsize) { 2262 10843 Dave CQ_CLOSE(helperq); 2263 10843 Dave break; 2264 10843 Dave } 2265 10843 Dave 2266 10843 Dave /* 2267 10843 Dave * Try to map CBUF_MAPSIZE ranges. Can't 2268 10843 Dave * assume that memory segment size is a 2269 10843 Dave * multiple of CBUF_MAPSIZE. Can't assume that 2270 10843 Dave * the segment starts on a CBUF_MAPSIZE 2271 10843 Dave * boundary. 2272 10843 Dave */ 2273 10843 Dave pfn = dump_bitnum_to_pfn(bitnum, &mlw); 2274 10843 Dave ASSERT(pfn != PFN_INVALID); 2275 10843 Dave ASSERT(bitnum + mlw.mpleft <= dumpcfg.bitmapsize); 2276 10843 Dave 2277 10843 Dave base = P2ALIGN(pfn, CBUF_MAPNP); 2278 10843 Dave if (base < mlw.mpaddr) { 2279 10843 Dave base = mlw.mpaddr; 2280 10843 Dave baseoff = P2PHASE(base, CBUF_MAPNP); 2281 10843 Dave } else { 2282 10843 Dave baseoff = 0; 2283 10843 Dave } 2284 10843 Dave 2285 10843 Dave pfnoff = pfn - base; 2286 10843 Dave if (pfnoff + mlw.mpleft < CBUF_MAPNP) { 2287 10843 Dave hibitnum = bitnum + mlw.mpleft; 2288 10843 Dave cp->size = ptob(pfnoff + mlw.mpleft); 2289 10843 Dave } else { 2290 10843 Dave hibitnum = bitnum - pfnoff + CBUF_MAPNP - 2291 10843 Dave baseoff; 2292 10843 Dave cp->size = CBUF_MAPSIZE - ptob(baseoff); 2293 10843 Dave } 2294 10843 Dave 2295 10843 Dave cp->pfn = pfn; 2296 10843 Dave cp->bitnum = bitnum++; 2297 10843 Dave cp->pagenum = pagenum++; 2298 10843 Dave cp->off = ptob(pfnoff); 2299 10843 Dave 2300 10843 Dave for (; bitnum < hibitnum; bitnum++) 2301 10843 Dave if (BT_TEST(dumpcfg.bitmap, bitnum)) 2302 10843 Dave pagenum++; 2303 10843 Dave 2304 10843 Dave dump_timeleft = dump_timeout; 2305 10843 Dave cp->used = ptob(pagenum - cp->pagenum); 2306 10843 Dave 2307 10843 Dave HRSTART(ds->perpage, map); 2308 10843 Dave hat_devload(kas.a_hat, cp->buf, cp->size, base, 2309 10843 Dave PROT_READ, HAT_LOAD_NOCONSIST); 2310 10843 Dave HRSTOP(ds->perpage, map); 2311 10843 Dave 2312 10843 Dave ds->pages_mapped += btop(cp->size); 2313 10843 Dave ds->pages_used += pagenum - cp->pagenum; 2314 10843 Dave 2315 10843 Dave CQ_OPEN(mainq); 2316 10843 Dave 2317 10843 Dave /* 2318 10843 Dave * If there are no helpers the main task does 2319 10843 Dave * non-streams lzjb compress. 2320 10843 Dave */ 2321 10843 Dave if (dumpcfg.clevel == 0) { 2322 10843 Dave dumpsys_lzjb_page(dumpcfg.helper, cp); 2323 10843 Dave break; 2324 10843 Dave } 2325 10843 Dave 2326 10843 Dave /* pass mapped pages to a helper */ 2327 10843 Dave CQ_PUT(helperq, cp, CBUF_INREADY); 2328 10843 Dave 2329 10843 Dave /* the last page was done */ 2330 10843 Dave if (bitnum >= dumpcfg.bitmapsize) 2331 10843 Dave CQ_CLOSE(helperq); 2332 10843 Dave 2333 10843 Dave break; 2334 10843 Dave 2335 10843 Dave case CBUF_USEDMAP: 2336 10843 Dave 2337 10843 Dave ds->npages += btop(cp->used); 2338 10843 Dave 2339 10843 Dave HRSTART(ds->perpage, unmap); 2340 10843 Dave hat_unload(kas.a_hat, cp->buf, cp->size, HAT_UNLOAD); 2341 10843 Dave HRSTOP(ds->perpage, unmap); 2342 10843 Dave 2343 10843 Dave if (bitnum < dumpcfg.bitmapsize) 2344 10843 Dave CQ_PUT(mainq, cp, CBUF_FREEMAP); 2345 10843 Dave CQ_CLOSE(mainq); 2346 10843 Dave 2347 10843 Dave ASSERT(ds->npages <= dumphdr->dump_npages); 2348 10843 Dave ds->percent = ds->npages * 100LL / dumphdr->dump_npages; 2349 10843 Dave break; 2350 10843 Dave 2351 10843 Dave case CBUF_WRITE: 2352 10843 Dave 2353 10843 Dave CQ_PUT(writerq, cp, CBUF_WRITE); 2354 10843 Dave break; 2355 10843 Dave 2356 10843 Dave case CBUF_ERRMSG: 2357 10843 Dave 2358 10843 Dave if (cp->used > 0) { 2359 10843 Dave cp->buf[cp->size - 2] = '\n'; 2360 10843 Dave cp->buf[cp->size - 1] = '\0'; 2361 10843 Dave if (ds->neednl) { 2362 10843 Dave uprintf("\n%s", cp->buf); 2363 10843 Dave ds->neednl = 0; 2364 10843 Dave } else { 2365 10843 Dave uprintf("%s", cp->buf); 2366 10843 Dave } 2367 11178 Dave /* wait for console output */ 2368 11178 Dave drv_usecwait(200000); 2369 11178 Dave dump_timeleft = dump_timeout; 2370 10843 Dave } 2371 10843 Dave CQ_PUT(freebufq, cp, CBUF_FREEBUF); 2372 10843 Dave break; 2373 10843 Dave 2374 10843 Dave default: 2375 10843 Dave uprintf("dump: unexpected buffer state %d, " 2376 10843 Dave "buffer will be lost\n", cp->state); 2377 10843 Dave break; 2378 10843 Dave 2379 10843 Dave } /* end switch */ 2380 10843 Dave 2381 10843 Dave } /* end while(1) */ 2382 10843 Dave } 2383 10843 Dave 2384 10843 Dave #ifdef COLLECT_METRICS 2385 10843 Dave size_t 2386 10843 Dave dumpsys_metrics(dumpsync_t *ds, char *buf, size_t size) 2387 10843 Dave { 2388 10843 Dave dumpcfg_t *cfg = &dumpcfg; 2389 10843 Dave int myid = CPU->cpu_seqid; 2390 10843 Dave int i, compress_ratio; 2391 10843 Dave int sec, iorate; 2392 10843 Dave helper_t *hp, *hpend = &cfg->helper[cfg->nhelper]; 2393 10843 Dave char *e = buf + size; 2394 10843 Dave char *p = buf; 2395 10843 Dave 2396 10843 Dave sec = ds->elapsed / (1000 * 1000 * 1000ULL); 2397 10843 Dave if (sec < 1) 2398 10843 Dave sec = 1; 2399 10843 Dave 2400 10843 Dave if (ds->iotime < 1) 2401 10843 Dave ds->iotime = 1; 2402 10843 Dave iorate = (ds->nwrite * 100000ULL) / ds->iotime; 2403 10843 Dave 2404 10843 Dave compress_ratio = 100LL * ds->npages / btopr(ds->nwrite + 1); 2405 10843 Dave 2406 10843 Dave #define P(...) (p += p < e ? snprintf(p, e - p, __VA_ARGS__) : 0) 2407 10843 Dave 2408 10843 Dave P("Master cpu_seqid,%d\n", CPU->cpu_seqid); 2409 10843 Dave P("Master cpu_id,%d\n", CPU->cpu_id); 2410 10843 Dave P("dump_flags,0x%x\n", dumphdr->dump_flags); 2411 10843 Dave P("dump_ioerr,%d\n", dump_ioerr); 2412 10843 Dave 2413 10843 Dave P("Helpers:\n"); 2414 10843 Dave for (i = 0; i < ncpus; i++) { 2415 10843 Dave if ((i & 15) == 0) 2416 10843 Dave P(",,%03d,", i); 2417 10843 Dave if (i == myid) 2418 10843 Dave P(" M"); 2419 10843 Dave else if (BT_TEST(cfg->helpermap, i)) 2420 10843 Dave P("%4d", cpu_seq[i]->cpu_id); 2421 10843 Dave else 2422 10843 Dave P(" *"); 2423 10843 Dave if ((i & 15) == 15) 2424 10843 Dave P("\n"); 2425 10843 Dave } 2426 10843 Dave 2427 10843 Dave P("ncbuf_used,%d\n", cfg->ncbuf_used); 2428 10843 Dave P("ncmap,%d\n", cfg->ncmap); 2429 10843 Dave 2430 10843 Dave P("Found %ldM ranges,%ld\n", (CBUF_MAPSIZE / DUMP_1MB), cfg->found4m); 2431 10843 Dave P("Found small pages,%ld\n", cfg->foundsm); 2432 10843 Dave 2433 10843 Dave P("Compression level,%d\n", cfg->clevel); 2434 10843 Dave P("Compression type,%s %s\n", cfg->clevel == 0 ? "serial" : "parallel", 2435 10843 Dave cfg->clevel >= DUMP_CLEVEL_BZIP2 ? "bzip2" : "lzjb"); 2436 10843 Dave P("Compression ratio,%d.%02d\n", compress_ratio / 100, compress_ratio % 2437 10843 Dave 100); 2438 10843 Dave P("nhelper_used,%d\n", cfg->nhelper_used); 2439 10843 Dave 2440 10843 Dave P("Dump I/O rate MBS,%d.%02d\n", iorate / 100, iorate % 100); 2441 10843 Dave P("..total bytes,%lld\n", (u_longlong_t)ds->nwrite); 2442 10843 Dave P("..total nsec,%lld\n", (u_longlong_t)ds->iotime); 2443 10843 Dave P("dumpbuf.iosize,%ld\n", dumpbuf.iosize); 2444 10843 Dave P("dumpbuf.size,%ld\n", dumpbuf.size); 2445 10843 Dave 2446 10843 Dave P("Dump pages/sec,%llu\n", (u_longlong_t)ds->npages / sec); 2447 10843 Dave P("Dump pages,%llu\n", (u_longlong_t)ds->npages); 2448 10843 Dave P("Dump time,%d\n", sec); 2449 10843 Dave 2450 10843 Dave if (ds->pages_mapped > 0) 2451 10843 Dave P("per-cent map utilization,%d\n", (int)((100 * ds->pages_used) 2452 10843 Dave / ds->pages_mapped)); 2453 10843 Dave 2454 10843 Dave P("\nPer-page metrics:\n"); 2455 10843 Dave if (ds->npages > 0) { 2456 10843 Dave for (hp = cfg->helper; hp != hpend; hp++) { 2457 10843 Dave #define PERPAGE(x) ds->perpage.x += hp->perpage.x; 2458 10843 Dave PERPAGES; 2459 10843 Dave #undef PERPAGE 2460 10843 Dave } 2461 10843 Dave #define PERPAGE(x) \ 2462 10843 Dave P("%s nsec/page,%d\n", #x, (int)(ds->perpage.x / ds->npages)); 2463 10843 Dave PERPAGES; 2464 10843 Dave #undef PERPAGE 2465 10843 Dave P("freebufq.empty,%d\n", (int)(ds->freebufq.empty / 2466 10843 Dave ds->npages)); 2467 10843 Dave P("helperq.empty,%d\n", (int)(ds->helperq.empty / 2468 10843 Dave ds->npages)); 2469 10843 Dave P("writerq.empty,%d\n", (int)(ds->writerq.empty / 2470 10843 Dave ds->npages)); 2471 10843 Dave P("mainq.empty,%d\n", (int)(ds->mainq.empty / ds->npages)); 2472 10843 Dave 2473 10843 Dave P("I/O wait nsec/page,%llu\n", (u_longlong_t)(ds->iowait / 2474 10843 Dave ds->npages)); 2475 10843 Dave } 2476 10843 Dave #undef P 2477 10843 Dave if (p < e) 2478 10843 Dave bzero(p, e - p); 2479 10843 Dave return (p - buf); 2480 10843 Dave } 2481 10843 Dave #endif /* COLLECT_METRICS */ 2482 0 stevel 2483 0 stevel /* 2484 0 stevel * Dump the system. 2485 0 stevel */ 2486 0 stevel void 2487 0 stevel dumpsys(void) 2488 0 stevel { 2489 10843 Dave dumpsync_t *ds = &dumpsync; 2490 10843 Dave taskq_t *livetaskq = NULL; 2491 0 stevel pfn_t pfn; 2492 0 stevel pgcnt_t bitnum; 2493 0 stevel proc_t *p; 2494 10843 Dave helper_t *hp, *hpend = &dumpcfg.helper[dumpcfg.nhelper]; 2495 10843 Dave cbuf_t *cp; 2496 0 stevel pid_t npids, pidx; 2497 0 stevel char *content; 2498 11178 Dave char *buf; 2499 11178 Dave size_t size; 2500 10843 Dave int save_dump_clevel; 2501 10843 Dave dumpmlw_t mlw; 2502 10843 Dave dumpcsize_t datatag; 2503 10843 Dave dumpdatahdr_t datahdr; 2504 0 stevel 2505 0 stevel if (dumpvp == NULL || dumphdr == NULL) { 2506 0 stevel uprintf("skipping system dump - no dump device configured\n"); 2507 10843 Dave if (panicstr) { 2508 10843 Dave dumpcfg.helpers_wanted = 0; 2509 10843 Dave dumpsys_spinunlock(&dumpcfg.helper_lock); 2510 10843 Dave } 2511 0 stevel return; 2512 0 stevel } 2513 10843 Dave dumpbuf.cur = dumpbuf.start; 2514 10843 Dave 2515 10843 Dave /* clear the sync variables */ 2516 10843 Dave ASSERT(dumpcfg.nhelper > 0); 2517 10843 Dave bzero(ds, sizeof (*ds)); 2518 10843 Dave ds->dumpcpu = CPU->cpu_id; 2519 0 stevel 2520 0 stevel /* 2521 0 stevel * Calculate the starting block for dump. If we're dumping on a 2522 0 stevel * swap device, start 1/5 of the way in; otherwise, start at the 2523 0 stevel * beginning. And never use the first page -- it may be a disk label. 2524 0 stevel */ 2525 0 stevel if (dumpvp->v_flag & VISSWAP) 2526 0 stevel dumphdr->dump_start = P2ROUNDUP(dumpvp_size / 5, DUMP_OFFSET); 2527 0 stevel else 2528 0 stevel dumphdr->dump_start = DUMP_OFFSET; 2529 0 stevel 2530 10843 Dave dumphdr->dump_flags = DF_VALID | DF_COMPLETE | DF_LIVE | DF_COMPRESSED; 2531 0 stevel dumphdr->dump_crashtime = gethrestime_sec(); 2532 0 stevel dumphdr->dump_npages = 0; 2533 0 stevel dumphdr->dump_nvtop = 0; 2534 10843 Dave bzero(dumpcfg.bitmap, BT_SIZEOFMAP(dumpcfg.bitmapsize)); 2535 0 stevel dump_timeleft = dump_timeout; 2536 0 stevel 2537 0 stevel if (panicstr) { 2538 0 stevel dumphdr->dump_flags &= ~DF_LIVE; 2539 5331 amw (void) VOP_DUMPCTL(dumpvp, DUMP_FREE, NULL, NULL); 2540 5331 amw (void) VOP_DUMPCTL(dumpvp, DUMP_ALLOC, NULL, NULL); 2541 0 stevel (void) vsnprintf(dumphdr->dump_panicstring, DUMP_PANICSIZE, 2542 0 stevel panicstr, panicargs); 2543 10843 Dave 2544 0 stevel } 2545 0 stevel 2546 0 stevel if (dump_conflags & DUMP_ALL) 2547 0 stevel content = "all"; 2548 0 stevel else if (dump_conflags & DUMP_CURPROC) 2549 0 stevel content = "kernel + curproc"; 2550 0 stevel else 2551 0 stevel content = "kernel"; 2552 0 stevel uprintf("dumping to %s, offset %lld, content: %s\n", dumppath, 2553 0 stevel dumphdr->dump_start, content); 2554 0 stevel 2555 10843 Dave /* Make sure nodename is current */ 2556 10843 Dave bcopy(utsname.nodename, dumphdr->dump_utsname.nodename, SYS_NMLN); 2557 10843 Dave 2558 10843 Dave /* 2559 10843 Dave * If this is a live dump, try to open a VCHR vnode for better 2560 10843 Dave * performance. We must take care to flush the buffer cache 2561 10843 Dave * first. 2562 10843 Dave */ 2563 10843 Dave if (!panicstr) { 2564 10843 Dave vnode_t *cdev_vp, *cmn_cdev_vp; 2565 10843 Dave 2566 10843 Dave ASSERT(dumpbuf.cdev_vp == NULL); 2567 10843 Dave cdev_vp = makespecvp(VTOS(dumpvp)->s_dev, VCHR); 2568 10843 Dave if (cdev_vp != NULL) { 2569 10843 Dave cmn_cdev_vp = common_specvp(cdev_vp); 2570 10843 Dave if (VOP_OPEN(&cmn_cdev_vp, FREAD | FWRITE, kcred, NULL) 2571 10843 Dave == 0) { 2572 10843 Dave if (vn_has_cached_data(dumpvp)) 2573 10843 Dave (void) pvn_vplist_dirty(dumpvp, 0, NULL, 2574 10843 Dave B_INVAL | B_TRUNC, kcred); 2575 10843 Dave dumpbuf.cdev_vp = cmn_cdev_vp; 2576 10843 Dave } else { 2577 10843 Dave VN_RELE(cdev_vp); 2578 10843 Dave } 2579 10843 Dave } 2580 10843 Dave } 2581 10843 Dave 2582 0 stevel /* 2583 11066 rafael * Store a hires timestamp so we can look it up during debugging. 2584 11066 rafael */ 2585 11066 rafael lbolt_debug_entry(); 2586 11066 rafael 2587 11066 rafael /* 2588 0 stevel * Leave room for the message and ereport save areas and terminal dump 2589 0 stevel * header. 2590 0 stevel */ 2591 10843 Dave dumpbuf.vp_limit = dumpvp_size - DUMP_LOGSIZE - DUMP_OFFSET - 2592 10843 Dave DUMP_ERPTSIZE; 2593 0 stevel 2594 0 stevel /* 2595 0 stevel * Write out the symbol table. It's no longer compressed, 2596 0 stevel * so its 'size' and 'csize' are equal. 2597 0 stevel */ 2598 10843 Dave dumpbuf.vp_off = dumphdr->dump_ksyms = dumphdr->dump_start + PAGESIZE; 2599 0 stevel dumphdr->dump_ksyms_size = dumphdr->dump_ksyms_csize = 2600 0 stevel ksyms_snapshot(dumpvp_ksyms_write, NULL, LONG_MAX); 2601 0 stevel 2602 0 stevel /* 2603 0 stevel * Write out the translation map. 2604 0 stevel */ 2605 0 stevel dumphdr->dump_map = dumpvp_flush(); 2606 0 stevel dump_as(&kas); 2607 3446 mrj dumphdr->dump_nvtop += dump_plat_addr(); 2608 0 stevel 2609 0 stevel /* 2610 0 stevel * call into hat, which may have unmapped pages that also need to 2611 0 stevel * be in the dump 2612 0 stevel */ 2613 0 stevel hat_dump(); 2614 0 stevel 2615 0 stevel if (dump_conflags & DUMP_ALL) { 2616 0 stevel mutex_enter(&pidlock); 2617 0 stevel 2618 0 stevel for (npids = 0, p = practive; p != NULL; p = p->p_next) 2619 10843 Dave dumpcfg.pids[npids++] = p->p_pid; 2620 0 stevel 2621 0 stevel mutex_exit(&pidlock); 2622 0 stevel 2623 0 stevel for (pidx = 0; pidx < npids; pidx++) 2624 10843 Dave (void) dump_process(dumpcfg.pids[pidx]); 2625 0 stevel 2626 10843 Dave for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum++) { 2627 0 stevel dump_timeleft = dump_timeout; 2628 10843 Dave BT_SET(dumpcfg.bitmap, bitnum); 2629 0 stevel } 2630 10843 Dave dumphdr->dump_npages = dumpcfg.bitmapsize; 2631 0 stevel dumphdr->dump_flags |= DF_ALL; 2632 0 stevel 2633 0 stevel } else if (dump_conflags & DUMP_CURPROC) { 2634 0 stevel /* 2635 0 stevel * Determine which pid is to be dumped. If we're panicking, we 2636 0 stevel * dump the process associated with panic_thread (if any). If 2637 0 stevel * this is a live dump, we dump the process associated with 2638 0 stevel * curthread. 2639 0 stevel */ 2640 0 stevel npids = 0; 2641 0 stevel if (panicstr) { 2642 0 stevel if (panic_thread != NULL && 2643 0 stevel panic_thread->t_procp != NULL && 2644 0 stevel panic_thread->t_procp != &p0) { 2645 10843 Dave dumpcfg.pids[npids++] = 2646 0 stevel panic_thread->t_procp->p_pid; 2647 0 stevel } 2648 0 stevel } else { 2649 10843 Dave dumpcfg.pids[npids++] = curthread->t_procp->p_pid; 2650 0 stevel } 2651 0 stevel 2652 10843 Dave if (npids && dump_process(dumpcfg.pids[0]) == 0) 2653 0 stevel dumphdr->dump_flags |= DF_CURPROC; 2654 0 stevel else 2655 0 stevel dumphdr->dump_flags |= DF_KERNEL; 2656 0 stevel 2657 0 stevel } else { 2658 0 stevel dumphdr->dump_flags |= DF_KERNEL; 2659 0 stevel } 2660 0 stevel 2661 0 stevel dumphdr->dump_hashmask = (1 << highbit(dumphdr->dump_nvtop - 1)) - 1; 2662 0 stevel 2663 0 stevel /* 2664 0 stevel * Write out the pfn table. 2665 0 stevel */ 2666 0 stevel dumphdr->dump_pfn = dumpvp_flush(); 2667 10843 Dave dump_init_memlist_walker(&mlw); 2668 10843 Dave for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum++) { 2669 0 stevel dump_timeleft = dump_timeout; 2670 10843 Dave if (!BT_TEST(dumpcfg.bitmap, bitnum)) 2671 0 stevel continue; 2672 10843 Dave pfn = dump_bitnum_to_pfn(bitnum, &mlw); 2673 0 stevel ASSERT(pfn != PFN_INVALID); 2674 0 stevel dumpvp_write(&pfn, sizeof (pfn_t)); 2675 0 stevel } 2676 3446 mrj dump_plat_pfn(); 2677 0 stevel 2678 0 stevel /* 2679 0 stevel * Write out all the pages. 2680 10843 Dave * Map pages, copy them handling UEs, compress, and write them out. 2681 10843 Dave * Cooperate with any helpers running on CPUs in panic_idle(). 2682 0 stevel */ 2683 0 stevel dumphdr->dump_data = dumpvp_flush(); 2684 10843 Dave 2685 10843 Dave bzero(dumpcfg.helpermap, BT_SIZEOFMAP(NCPU)); 2686 10843 Dave ds->live = dumpcfg.clevel > 0 && 2687 10843 Dave (dumphdr->dump_flags & DF_LIVE) != 0; 2688 10843 Dave 2689 10843 Dave save_dump_clevel = dumpcfg.clevel; 2690 10843 Dave if (panicstr) 2691 10843 Dave dumpsys_get_maxmem(); 2692 10843 Dave else if (dumpcfg.clevel >= DUMP_CLEVEL_BZIP2) 2693 10843 Dave dumpcfg.clevel = DUMP_CLEVEL_LZJB; 2694 10843 Dave 2695 10843 Dave dumpcfg.nhelper_used = 0; 2696 10843 Dave for (hp = dumpcfg.helper; hp != hpend; hp++) { 2697 10843 Dave if (hp->page == NULL) { 2698 10843 Dave hp->helper = DONEHELPER; 2699 0 stevel continue; 2700 10843 Dave } 2701 10843 Dave ++dumpcfg.nhelper_used; 2702 10843 Dave hp->helper = FREEHELPER; 2703 10843 Dave hp->taskqid = NULL; 2704 10843 Dave hp->ds = ds; 2705 10843 Dave bzero(&hp->perpage, sizeof (hp->perpage)); 2706 10843 Dave if (dumpcfg.clevel >= DUMP_CLEVEL_BZIP2) 2707 10843 Dave (void) BZ2_bzCompressReset(&hp->bzstream); 2708 10843 Dave } 2709 0 stevel 2710 10843 Dave CQ_OPEN(freebufq); 2711 10843 Dave CQ_OPEN(helperq); 2712 10843 Dave 2713 10843 Dave dumpcfg.ncbuf_used = 0; 2714 10843 Dave for (cp = dumpcfg.cbuf; cp != &dumpcfg.cbuf[dumpcfg.ncbuf]; cp++) { 2715 10843 Dave if (cp->buf != NULL) { 2716 10843 Dave CQ_PUT(freebufq, cp, CBUF_FREEBUF); 2717 10843 Dave ++dumpcfg.ncbuf_used; 2718 0 stevel } 2719 0 stevel } 2720 0 stevel 2721 10843 Dave for (cp = dumpcfg.cmap; cp != &dumpcfg.cmap[dumpcfg.ncmap]; cp++) 2722 10843 Dave CQ_PUT(mainq, cp, CBUF_FREEMAP); 2723 10843 Dave 2724 10843 Dave ds->start = gethrtime(); 2725 10843 Dave ds->iowaitts = ds->start; 2726 10843 Dave 2727 10843 Dave /* start helpers */ 2728 10843 Dave if (ds->live) { 2729 10843 Dave int n = dumpcfg.nhelper_used; 2730 10843 Dave int pri = MINCLSYSPRI - 25; 2731 10843 Dave 2732 10843 Dave livetaskq = taskq_create("LiveDump", n, pri, n, n, 2733 10843 Dave TASKQ_PREPOPULATE); 2734 10843 Dave for (hp = dumpcfg.helper; hp != hpend; hp++) { 2735 10843 Dave if (hp->page == NULL) 2736 10843 Dave continue; 2737 10843 Dave hp->helper = hp - dumpcfg.helper; 2738 10843 Dave hp->taskqid = taskq_dispatch(livetaskq, 2739 10843 Dave dumpsys_live_helper, (void *)hp, TQ_NOSLEEP); 2740 10843 Dave } 2741 10843 Dave 2742 10843 Dave } else { 2743 11178 Dave if (panicstr) 2744 11178 Dave kmem_dump_begin(); 2745 10843 Dave dumpcfg.helpers_wanted = dumpcfg.clevel > 0; 2746 10843 Dave dumpsys_spinunlock(&dumpcfg.helper_lock); 2747 10843 Dave } 2748 10843 Dave 2749 10843 Dave /* run main task */ 2750 10843 Dave dumpsys_main_task(ds); 2751 10843 Dave 2752 10843 Dave ds->elapsed = gethrtime() - ds->start; 2753 10843 Dave if (ds->elapsed < 1) 2754 10843 Dave ds->elapsed = 1; 2755 10843 Dave 2756 10843 Dave if (livetaskq != NULL) 2757 10843 Dave taskq_destroy(livetaskq); 2758 10843 Dave 2759 10843 Dave if (ds->neednl) { 2760 10843 Dave uprintf("\n"); 2761 10843 Dave ds->neednl = 0; 2762 10843 Dave } 2763 10843 Dave 2764 10843 Dave /* record actual pages dumped */ 2765 10843 Dave dumphdr->dump_npages = ds->npages; 2766 10843 Dave 2767 10843 Dave /* platform-specific data */ 2768 10843 Dave dumphdr->dump_npages += dump_plat_data(dumpcfg.cbuf[0].buf); 2769 10843 Dave 2770 10843 Dave /* note any errors by clearing DF_COMPLETE */ 2771 10843 Dave if (dump_ioerr || ds->npages < dumphdr->dump_npages) 2772 10843 Dave dumphdr->dump_flags &= ~DF_COMPLETE; 2773 10843 Dave 2774 10843 Dave /* end of stream blocks */ 2775 10843 Dave datatag = 0; 2776 10843 Dave dumpvp_write(&datatag, sizeof (datatag)); 2777 10843 Dave 2778 11178 Dave bzero(&datahdr, sizeof (datahdr)); 2779 11178 Dave 2780 11178 Dave /* buffer for metrics */ 2781 11178 Dave buf = dumpcfg.cbuf[0].buf; 2782 11178 Dave size = MIN(dumpcfg.cbuf[0].size, DUMP_OFFSET - sizeof (dumphdr_t) - 2783 11178 Dave sizeof (dumpdatahdr_t)); 2784 11178 Dave 2785 11178 Dave /* finish the kmem intercepts, collect kmem verbose info */ 2786 11178 Dave if (panicstr) { 2787 11178 Dave datahdr.dump_metrics = kmem_dump_finish(buf, size); 2788 11178 Dave buf += datahdr.dump_metrics; 2789 11178 Dave size -= datahdr.dump_metrics; 2790 11178 Dave } 2791 11178 Dave 2792 10843 Dave /* compression info in data header */ 2793 10843 Dave datahdr.dump_datahdr_magic = DUMP_DATAHDR_MAGIC; 2794 10843 Dave datahdr.dump_datahdr_version = DUMP_DATAHDR_VERSION; 2795 10843 Dave datahdr.dump_maxcsize = CBUF_SIZE; 2796 10843 Dave datahdr.dump_maxrange = CBUF_MAPSIZE / PAGESIZE; 2797 10843 Dave datahdr.dump_nstreams = dumpcfg.nhelper_used; 2798 10843 Dave datahdr.dump_clevel = dumpcfg.clevel; 2799 10843 Dave #ifdef COLLECT_METRICS 2800 10843 Dave if (dump_metrics_on) 2801 11178 Dave datahdr.dump_metrics += dumpsys_metrics(ds, buf, size); 2802 10843 Dave #endif 2803 10843 Dave datahdr.dump_data_csize = dumpvp_flush() - dumphdr->dump_data; 2804 0 stevel 2805 0 stevel /* 2806 0 stevel * Write out the initial and terminal dump headers. 2807 0 stevel */ 2808 10843 Dave dumpbuf.vp_off = dumphdr->dump_start; 2809 0 stevel dumpvp_write(dumphdr, sizeof (dumphdr_t)); 2810 0 stevel (void) dumpvp_flush(); 2811 0 stevel 2812 10843 Dave dumpbuf.vp_limit = dumpvp_size; 2813 10843 Dave dumpbuf.vp_off = dumpbuf.vp_limit - DUMP_OFFSET; 2814 0 stevel dumpvp_write(dumphdr, sizeof (dumphdr_t)); 2815 10843 Dave dumpvp_write(&datahdr, sizeof (dumpdatahdr_t)); 2816 10843 Dave dumpvp_write(dumpcfg.cbuf[0].buf, datahdr.dump_metrics); 2817 10843 Dave 2818 0 stevel (void) dumpvp_flush(); 2819 0 stevel 2820 10843 Dave uprintf("\r%3d%% done: %llu pages dumped, ", 2821 10843 Dave ds->percent_done, (u_longlong_t)ds->npages); 2822 0 stevel 2823 0 stevel if (dump_ioerr == 0) { 2824 0 stevel uprintf("dump succeeded\n"); 2825 0 stevel } else { 2826 0 stevel uprintf("dump failed: error %d\n", dump_ioerr); 2827 10843 Dave #ifdef DEBUG 2828 10843 Dave if (panicstr) 2829 0 stevel debug_enter("dump failed"); 2830 10843 Dave #endif 2831 0 stevel } 2832 0 stevel 2833 0 stevel /* 2834 0 stevel * Write out all undelivered messages. This has to be the *last* 2835 0 stevel * thing we do because the dump process itself emits messages. 2836 0 stevel */ 2837 0 stevel if (panicstr) { 2838 0 stevel dump_ereports(); 2839 0 stevel dump_messages(); 2840 0 stevel } 2841 0 stevel 2842 0 stevel delay(2 * hz); /* let people see the 'done' message */ 2843 0 stevel dump_timeleft = 0; 2844 0 stevel dump_ioerr = 0; 2845 10843 Dave 2846 10843 Dave /* restore settings after live dump completes */ 2847 10843 Dave if (!panicstr) { 2848 10843 Dave dumpcfg.clevel = save_dump_clevel; 2849 10843 Dave 2850 10843 Dave /* release any VCHR open of the dump device */ 2851 10843 Dave if (dumpbuf.cdev_vp != NULL) { 2852 10843 Dave (void) VOP_CLOSE(dumpbuf.cdev_vp, FREAD | FWRITE, 1, 0, 2853 10843 Dave kcred, NULL); 2854 10843 Dave VN_RELE(dumpbuf.cdev_vp); 2855 10843 Dave dumpbuf.cdev_vp = NULL; 2856 10843 Dave } 2857 10843 Dave } 2858 0 stevel } 2859 0 stevel 2860 0 stevel /* 2861 0 stevel * This function is called whenever the memory size, as represented 2862 0 stevel * by the phys_install list, changes. 2863 0 stevel */ 2864 0 stevel void 2865 0 stevel dump_resize() 2866 0 stevel { 2867 0 stevel mutex_enter(&dump_lock); 2868 0 stevel dumphdr_init(); 2869 0 stevel dumpbuf_resize(); 2870 10843 Dave dump_update_clevel(); 2871 0 stevel mutex_exit(&dump_lock); 2872 0 stevel } 2873 6423 gw25295 2874 6423 gw25295 /* 2875 6423 gw25295 * This function allows for dynamic resizing of a dump area. It assumes that 2876 6423 gw25295 * the underlying device has update its appropriate size(9P). 2877 6423 gw25295 */ 2878 6423 gw25295 int 2879 6423 gw25295 dumpvp_resize() 2880 6423 gw25295 { 2881 6423 gw25295 int error; 2882 6423 gw25295 vattr_t vattr; 2883 6423 gw25295 2884 6423 gw25295 mutex_enter(&dump_lock); 2885 6423 gw25295 vattr.va_mask = AT_SIZE; 2886 6423 gw25295 if ((error = VOP_GETATTR(dumpvp, &vattr, 0, kcred, NULL)) != 0) { 2887 6423 gw25295 mutex_exit(&dump_lock); 2888 6423 gw25295 return (error); 2889 6423 gw25295 } 2890 6423 gw25295 2891 6423 gw25295 if (error == 0 && vattr.va_size < 2 * DUMP_LOGSIZE + DUMP_ERPTSIZE) { 2892 6423 gw25295 mutex_exit(&dump_lock); 2893 6423 gw25295 return (ENOSPC); 2894 6423 gw25295 } 2895 6423 gw25295 2896 6423 gw25295 dumpvp_size = vattr.va_size & -DUMP_OFFSET; 2897 6423 gw25295 mutex_exit(&dump_lock); 2898 6423 gw25295 return (0); 2899 6423 gw25295 } 2900