1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "@(#)diskomizer64mpism.h 1.44 09/05/26 SMI" 28 29 /* 30 * diskomizer64mpism 31 * 32 * Write to and then read from disk partitions and or files. 33 * 34 * Chris.Gerhard (at) uk.sun.com - SMCC CTE 35 */ 36 37 #ifndef _DISKOMIZER64MPISM_H 38 #define _DISKOMIZER64MPISM_H 39 40 #ifdef __cplusplus 41 extern "C" { 42 #endif 43 44 45 #ifndef _REENTRANT 46 #define _REENTRANT 47 #endif 48 #include <note.h> 49 #include <sys/types.h> 50 #include <unistd.h> 51 #include <sys/stat.h> 52 #include <fcntl.h> 53 #include <stdio.h> 54 #include <stdlib.h> 55 #include <stdarg.h> 56 #include <string.h> 57 #include <sys/time.h> 58 #include <sys/errno.h> 59 #include <pthread.h> 60 #include <sys/shm.h> 61 #include <limits.h> 62 #include <signal.h> 63 #include <sys/wait.h> 64 #include <alloca.h> 65 #include <sys/mman.h> 66 #include <errno.h> 67 #include <strings.h> 68 #include <diskomizer/assert.h> 69 #include <sys/termios.h> 70 #include <diskomizer/daio.h> 71 #include <diskomizer/tnf.h> 72 #include <diskomizer/recent.h> 73 74 #define MAX(A, B) (((A) > (B)) ? (A) : (B)) 75 #define MIN(A, B) (((A) < (B)) ? (A) : (B)) 76 #define MILLION 1000000 77 #define THOUSAND 1000 78 #define BILLION (MILLION * THOUSAND) 79 #define ACCURACY BILLION 80 #ifdef __sparc 81 #define SPARC 82 #endif 83 #define TIME_FORMAT "%T %d/%b/%Y" 84 85 #define MAP_BITS WORD_BIT 86 typedef uint32_t bitmap_t; 87 /* typedef unsigned long long ullong_t; Now in <diskomizer/recent.h> */ 88 typedef uint16_t ushort16_t; 89 typedef uchar_t iolen_index_t; 90 #define MAX_IO_LENS UCHAR_MAX 91 #define INDEX_TO_DIOLEN(A) (opts.disk_io_sizes.vals[A]) 92 #define ARRAY_LEN(A) (sizeof (A)/sizeof ((A)[0])) 93 94 /* These are currently per process */ 95 struct times { 96 long long best; 97 long long worst; 98 long long ave; 99 long long count; 100 long long last_few[100]; 101 char *str; 102 }; 103 struct aioqtop { 104 struct aio_str *head; 105 struct aio_str *tail; 106 }; 107 /* 108 * Device control is shared between processes. So it has it's own lock to 109 * protect it's contents. 110 */ 111 typedef enum { 112 DEV_NOT_READY, /* when device is not yet ready to start */ 113 DEV_RUNNING, /* when the processes_stopped count is 0 */ 114 DEV_STOPPING, /* This is a request to tell all the procs to stop */ 115 DEV_STOPPED, /* when the processes_stopped count is nprocs */ 116 DEV_STARTING /* this one tells the procs to start again */ 117 } dev_state; 118 struct device_control { 119 pthread_mutex_t lock; /* Must be initialized as a inter process lock */ 120 struct timeval state_ttl; 121 dev_state state; 122 long processes_stopped; /* The number of processes which have stopped */ 123 }; 124 125 struct device_id { 126 ino_t ino; /* Inode number */ 127 /* 128 * The device id if a char special or block special, padded to 129 * 64 bits to prevent the 32bit build being padded in an 130 * unctorolled way 131 */ 132 uint64_t dev; 133 }; 134 /* 135 * the fds structure, used for doing alternate paths. This forms 136 * a ring attached to the device structure 137 * 138 * The timing stats are also collected per fd (path). 139 */ 140 struct fds { 141 int fd; 142 char *name; 143 char *longname; 144 char *shortname; 145 struct device_id devid; 146 struct aioqtop all_aios; /* sorted list of all aios outstanding */ 147 struct aio_str *oldest_io; 148 long number_of_hung_read; 149 long total_read; 150 long number_of_hung_write; 151 long total_write; 152 time_t last_report; /* time when the last hang was reported */ 153 hrtime_t last_write_time; 154 hrtime_t last_read_time; 155 struct times read_times; 156 struct times write_times; 157 /* 158 * Need_to_stop: Set non zero when the path has failed but the fact 159 * has not yet been comunicated to the other processes. 160 */ 161 unsigned need_to_stop : 1; 162 /* 163 * Stop flag: set non zero when the path has failed and the fact has 164 * been comunicated, or we got the failure message from another 165 * process. 166 */ 167 unsigned stop_flag : 1; /* set to 1 when the path is "failed" */ 168 unsigned error_path : 1; /* used as a fail over path */ 169 unsigned created : 1; 170 unsigned path_id : 8; 171 void *shared_data_handle; 172 struct fds *next; 173 }; 174 /* #include "frags.h" */ 175 /* 176 * there is one of these per diskomizer block that is in use on each device 177 * bing accessed. So it is well worth the effort to make these as small as 178 * possible. 179 */ 180 struct blks { 181 time_t last_requested; 182 union { 183 time_t prev_requested; 184 /* 185 * This is only set if the block could not be written during 186 * the sequential run, it is checked during an assert. 187 * Since once the block is written this is no longer checked 188 * it can share space with prev_requested. 189 */ 190 int was_unwritten; 191 } u; 192 #define SET_LAST_RETURN(A, B) A = (B); 193 #define GET_LAST_RETURN(A) A 194 #if LONG_BIT == 32 195 #define BIT2CHARSTAR(A) ((uchar_t *)((A) == 0 ? 0L : 1L)) 196 /* 197 * 24 bits give a maximum value for the delta of 194.18 days. 198 * If the io is taking that long then the device is not working 199 * so I'm happy to only to use 24 bits. This saves 8 bits of 200 * space per diskomiser block on each device. In a 64 bit world 201 * this is a waste of time due to padding, so just use ints, and 202 * if there is 64bit address space we won't be short of space! 203 */ 204 unsigned last_returned_delta : 24; 205 unsigned read_count : 8; 206 #else 207 #define BIT2CHARSTAR(A) ((uchar_t *)((A) == 0 ? 0LL : 1LL)) 208 uint_t last_returned_delta; 209 uint_t read_count; 210 #endif 211 unsigned hdrchksum : 16; /* the checksum from the header */ 212 unsigned path_id : 8; /* hold the id of the fd down which this went */ 213 unsigned bad_hdr : 1; /* 1 if the header written was bad */ 214 unsigned ab : 1; /* 1 for type A, 0 for type B */ 215 unsigned bad_chksum : 1; /* 0 if the checksum written was bad */ 216 #define SEQUENCE_BITS 5 217 unsigned sequence : SEQUENCE_BITS; /* Sequence number */ 218 /* 219 * Union holding either the read only or the read write information. 220 */ 221 union { 222 /* 223 * Length's offsets of the last and previous ios. 224 * we could do a nice structure that contains all 225 * the previous ios but it would end up being padded 226 * and I'm short of space. 227 */ 228 struct { 229 iolen_index_t last_iolen; 230 iolen_index_t last_off; 231 iolen_index_t prev_iolen; 232 iolen_index_t prev_off; 233 /* 234 * Put a pointer as the last element to force the 235 * compiler to generate the correct allignment. 236 * If the stucture is not the "correct" 237 * size then the compiler will pad it. 238 * 239 */ 240 uchar_t *last_io; 241 uchar_t *prev_io; 242 } w; 243 /* 244 * In the read-only world, only fixed size io is supported. 245 * If this structure grows to be bigger than the one above, 246 * then it needs to handle alignment issues. 247 */ 248 struct { 249 check_t last_chksum; 250 check_t prev_chksum; 251 uint_t last_io:1; 252 uint_t prev_io:1; 253 } o; 254 } r; 255 }; 256 257 #define DEV_BLOCK_HANDLE(A, B) ((A)->blocks->handles[(B) / (A)->blocks->len]) 258 #define DEV_BLOCK_INDEX(A, B) ((B) % (A)->blocks->len) 259 260 #define AIO_BLOCK_HANDLE(A) DEV_BLOCK_HANDLE((A)->dev, (A)->off) 261 #define AIO_BLOCK_INDEX(A) DEV_BLOCK_INDEX((A)->dev, (A)->off) 262 263 /* 264 * diff return is returned by the diff printing routines giving a count 265 * of the number of bits changed and also which bits were seen in error. 266 */ 267 struct diff_return { 268 int64_t count; 269 off64_t bits; 270 }; 271 272 273 struct shm_handle { 274 long len; 275 int count; 276 void *handles[1]; 277 }; 278 279 struct offset_list { 280 ullong_t offset; 281 struct offset_list *next; 282 }; 283 284 typedef void (*choose_block_t)(bitmap_t *map, 285 struct aio_str *aiop, ullong_t start, ullong_t len, int maplen); 286 /* 287 * The device structure. There is one of these per device (file) that is 288 * being diskomized. They are linked together in a single list with 289 * head being the global "devices". 290 * 291 */ 292 struct device { 293 struct fds *fdhead; /* list of device paths. They must all be to */ 294 /* the same physical device */ 295 char *logicalname; 296 void *writemap_handle; /* bit map protecting the blocks on disk */ 297 void *shared_data_handle; /* any data shared between all processes */ 298 struct disko_partition *v_part; /* the vtoc partition info */ 299 ulong_t writemap_size; /* size of the write map */ 300 ulong_t running_rthreads; /* the number of read threads running */ 301 ullong_t block; /* the last offset that has been written to */ 302 ullong_t read_start_block; /* the block on which to start reads */ 303 ullong_t length; /* the length in bytes of the area we are writing to */ 304 ullong_t next_write_blk; /* The next block to do a write I/O to */ 305 ullong_t next_read_blk; /* The next block to do a read I/O to */ 306 /* 307 * count of errors that have not yet been added to the shared 308 * errors 309 */ 310 int errors:29; 311 int stop_flag:1; 312 int need_to_stop:1; 313 int failed_to_push_unwritten:1; 314 uint_t device_block_size; /* the block size of the underlying device */ 315 long seq_passes; /* the number of sequential passes to do */ 316 /* 317 * A list of blocks that have not yet been written as they were locked 318 */ 319 struct offset_list *unwritten; 320 long long countdown; /* number of reads to complete before we stop */ 321 struct shm_handle *blocks; /* information about blocks */ 322 choose_block_t choose_block; 323 /* 324 * The time to live of this state. 325 * 326 * If the device is stopped then this time indicates when the 327 * device should be restarted. The first process to get run after 328 * this time will change the state from DEV_STOPPED to DEV_STARTING 329 * and decrement the stopped process count. All the other processes 330 * just decrement the stopped process count until it is zero, then 331 * the state is set to DEV_RUNNING. 332 * 333 * If the device is running then the first process to reach this 334 * time sets ti to DEV_STOPPING and increments the stopped process 335 * count. When the stopped process count is equal to nprocs the 336 * device state is set to stopped and no io will be taking place. 337 */ 338 struct timeval state_ttl; 339 dev_state state; 340 struct aioqtop stopped_ios; 341 /* 342 * If a path fails outstanding ios are cancelled and submitted 343 * down an alternative path, if available. 344 */ 345 struct aioqtop cancelled; 346 /* 347 * deferred ios are ios that could not get queued for 348 * some reason. 349 */ 350 struct aioqtop deferred_ios; 351 struct device_control *control; /* The shared control info */ 352 struct recent_blocks *recent; 353 struct device *next; /* the next device */ 354 }; 355 356 /* 357 * err_type and err_info are used by report_error. All errors should be 358 * reported via report_error, that way you always get all the info. 359 */ 360 typedef enum { 361 ERR_SYS, 362 ERR_CORRUPT, 363 ERR_HUNG, 364 ERR_DEFERRED 365 } err_type; 366 367 struct error_desc { 368 uint_t HEADER_CHECKSUM_ERR:1; 369 /* The lenght in the header does not match the length we expect */ 370 uint_t LENGTH_MISMATCH:1; 371 uint_t BODY_CHECKSUM_ERR:1; 372 /* Matches the device given in error.dev */ 373 uint_t MATCHING_DEVICE:1; 374 /* Matches the last write to the offset given in error.off */ 375 uint_t MATCHING_LAST:1; 376 /* Matches the previous write to the offset given in error.off */ 377 uint_t MATCHING_PREV:1; 378 uint_t UNABLE_TO_LOCK:1; 379 }; 380 381 struct error { 382 struct error_desc desc; 383 /* this errors instance number */ 384 int instance; 385 /* If this is a follow on error the previous instance number */ 386 int previous; 387 /* if this is a follow on error the parent's instance number */ 388 int parent; 389 check_t bad_checksum; 390 struct fds *fd; 391 struct device *dev; 392 char *diff_file; 393 /* diskomizer offset */ 394 off64_t doff; 395 time_t last_requested; 396 struct diff_return dr; 397 long delta; 398 uint32_t len; 399 unsigned path_id : 8; 400 }; 401 402 #include "bufs.h" 403 /* 404 * NB. The aio_result structure MUST be the first element in this structure 405 */ 406 typedef struct aio_str { 407 daio_result_t aio_res; /* aio_result passed to aioread/write */ 408 struct timeval tv; /* time when the aio request was submitted */ 409 struct device *dev; /* The device the aio is to/from */ 410 struct fds *fd; /* the file descriptor for this i/o */ 411 uchar_t *buf; /* The buffer being used for this I/O */ 412 ullong_t off; /* The offset being used for this I/O */ 413 uint16_t retrycnt; /* number of times we have retried this I/O */ 414 iolen_index_t iolen; /* The index into the iolen array */ 415 int count; /* The number times we have done I/O */ 416 time_t (*handler)(struct aio_str *aio_resp, ullong_t start); 417 struct bufhdr hdr; 418 struct daio_id daio_id; /* the id of the buffer */ 419 struct error error; /* error reporting info */ 420 struct aio_str *next; /* linked list for cancellation and stats */ 421 struct aio_str *prev; /* linked list for removing entries */ 422 } aio_str_t; 423 TNF_DECLARE_RECORD(aio_str_t, aio_tnf_str); 424 typedef enum { 425 CONTINUE, /* continue with error processing. */ 426 RETRY, /* Do the aioread again */ 427 BREAK /* All is well, break from error processing */ 428 } loop_type; 429 430 union err_info { 431 time_t time; 432 char *str; 433 }; 434 435 typedef loop_type 436 (*on_error_t)(ullong_t start, struct aio_str *aio_resp); 437 438 extern void nop(void); 439 extern iolen_index_t max_disk_io_len; 440 extern struct device *devices; /* all the devices there are */ 441 extern pid_t pgrp; 442 extern int nfunc_bufs; 443 extern uchar_t (*init_uchar_func)(int bufno, int i); 444 extern void (*shm_chmod)(void *addr, ulong_t len, int mode); 445 /* 446 * All the external functions in diskomizer64mpism.c 447 */ 448 #ifdef __sparc 449 extern void flush(int32_t *x); 450 extern void flush_windows(void); 451 #endif 452 extern time_t handle_write(struct aio_str *aio_resp, ullong_t start); 453 extern time_t handle_write_then_read(struct aio_str *aio_resp, ullong_t start); 454 extern time_t handle_read_then_write(struct aio_str *aio_resp, ullong_t start); 455 extern time_t handle_readonly_seq(struct aio_str *aio_resp, ullong_t start); 456 extern int how_many_devices(struct device *devices); 457 extern void print_number(unsigned long long i, char *singular, 458 char *plural); 459 extern void * alloc_mem(long a, long b); 460 extern ullong_t find_next_free(bitmap_t map[], ullong_t offset, 461 int len, int maplen); 462 extern void clear_write(bitmap_t map[], ullong_t off, ulong_t maplen); 463 extern ulong_t my_lrand(void); 464 extern int longest_logical_name(void); 465 extern int longest_device_name(void); 466 extern void *my_calloc(long a, long b); 467 extern char *alloc_time_now_fmt(char *fmt); 468 extern int this_proc(void); 469 extern int is_readonly(void); 470 extern int would_stop_before(time_t secs); 471 extern pid_t master_pid(void); 472 extern void check_exit_flag(void); 473 extern struct fds *find_path(struct fds *fdhead, char path_id); 474 extern ullong_t diskomizer_off2byteoff(ullong_t off); 475 extern char *diff_file(void); 476 /* 477 * macros. 478 */ 479 #define ZERO_OBJ(X) (void) memset(&X, NULL, sizeof (X)) 480 #define GET_OFF(X) (X / MAP_BITS) 481 #define GET_BIT(X) (1 << (X % MAP_BITS)) 482 483 #ifdef __cplusplus 484 } 485 #endif 486 487 #endif /* _DISKOMIZER64MPISM_H */ 488