Home | History | Annotate | Download | only in diskomizer
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #pragma ident	"@(#)diskomizer64mpism.h	1.44	09/05/26 SMI"
     28 
     29 /*
     30  *	diskomizer64mpism
     31  *
     32  *	Write to and then read from disk partitions and or files.
     33  *
     34  *	Chris.Gerhard (at) uk.sun.com - SMCC CTE
     35  */
     36 
     37 #ifndef _DISKOMIZER64MPISM_H
     38 #define	_DISKOMIZER64MPISM_H
     39 
     40 #ifdef	__cplusplus
     41 extern "C" {
     42 #endif
     43 
     44 
     45 #ifndef	_REENTRANT
     46 #define	_REENTRANT
     47 #endif
     48 #include <note.h>
     49 #include <sys/types.h>
     50 #include <unistd.h>
     51 #include <sys/stat.h>
     52 #include <fcntl.h>
     53 #include <stdio.h>
     54 #include <stdlib.h>
     55 #include <stdarg.h>
     56 #include <string.h>
     57 #include <sys/time.h>
     58 #include <sys/errno.h>
     59 #include <pthread.h>
     60 #include <sys/shm.h>
     61 #include <limits.h>
     62 #include <signal.h>
     63 #include <sys/wait.h>
     64 #include <alloca.h>
     65 #include <sys/mman.h>
     66 #include <errno.h>
     67 #include <strings.h>
     68 #include <diskomizer/assert.h>
     69 #include <sys/termios.h>
     70 #include <diskomizer/daio.h>
     71 #include <diskomizer/tnf.h>
     72 #include <diskomizer/recent.h>
     73 
     74 #define	MAX(A, B) (((A) > (B)) ? (A) : (B))
     75 #define	MIN(A, B) (((A) < (B)) ? (A) : (B))
     76 #define	MILLION 1000000
     77 #define	THOUSAND 1000
     78 #define	BILLION (MILLION * THOUSAND)
     79 #define	ACCURACY BILLION
     80 #ifdef __sparc
     81 #define	SPARC
     82 #endif
     83 #define	TIME_FORMAT "%T %d/%b/%Y"
     84 
     85 #define	MAP_BITS WORD_BIT
     86 typedef uint32_t bitmap_t;
     87 /* typedef unsigned long long  ullong_t; Now in <diskomizer/recent.h> */
     88 typedef uint16_t ushort16_t;
     89 typedef uchar_t iolen_index_t;
     90 #define	MAX_IO_LENS UCHAR_MAX
     91 #define	INDEX_TO_DIOLEN(A) (opts.disk_io_sizes.vals[A])
     92 #define	ARRAY_LEN(A) (sizeof (A)/sizeof ((A)[0]))
     93 
     94 /* These are currently per process */
     95 struct times {
     96 	long long best;
     97 	long long worst;
     98 	long long ave;
     99 	long long count;
    100 	long long last_few[100];
    101 	char *str;
    102 };
    103 struct aioqtop {
    104 	struct aio_str  *head;
    105 	struct aio_str  *tail;
    106 };
    107 /*
    108  * Device control is shared between processes. So it has it's own lock to
    109  * protect it's contents.
    110  */
    111 typedef enum {
    112 	DEV_NOT_READY, /* when device is not yet ready to start */
    113 	DEV_RUNNING, /* when the processes_stopped count is 0 */
    114 	DEV_STOPPING, /* This is a request to tell all the procs to stop */
    115 	DEV_STOPPED, /* when the processes_stopped count is nprocs */
    116 	DEV_STARTING /* this one tells the procs to start again */
    117 } dev_state;
    118 struct device_control {
    119 	pthread_mutex_t lock; /* Must be initialized as a inter process lock */
    120 	struct	timeval state_ttl;
    121 	dev_state state;
    122 	long processes_stopped; /* The number of processes which have stopped */
    123 };
    124 
    125 struct device_id {
    126 	ino_t    ino;	/* Inode number */
    127 	/*
    128 	 * The device id if a char special or block special, padded to
    129 	 * 64 bits to prevent the 32bit build being padded in an
    130 	 * unctorolled way
    131 	 */
    132 	uint64_t    dev;
    133 };
    134 /*
    135  * the fds structure, used for doing alternate paths. This forms
    136  * a ring attached to the device structure
    137  *
    138  * The timing stats are also collected per fd (path).
    139  */
    140 struct fds {
    141 	int fd;
    142 	char *name;
    143 	char *longname;
    144 	char *shortname;
    145 	struct device_id devid;
    146 	struct aioqtop all_aios; /* sorted list of all aios outstanding */
    147 	struct aio_str  *oldest_io;
    148 	long   number_of_hung_read;
    149 	long   total_read;
    150 	long   number_of_hung_write;
    151 	long   total_write;
    152 	time_t last_report; /* time when the last hang was reported */
    153 	hrtime_t last_write_time;
    154 	hrtime_t last_read_time;
    155 	struct times read_times;
    156 	struct times write_times;
    157 	/*
    158 	 * Need_to_stop: Set non zero when the path has failed but the fact
    159 	 * has not yet been comunicated to the other processes.
    160 	 */
    161 	unsigned need_to_stop : 1;
    162 	/*
    163 	 * Stop flag: set non zero when the path has failed and the fact has
    164 	 * been comunicated, or we got the failure message from another
    165 	 * process.
    166 	 */
    167 	unsigned stop_flag : 1; /* set to 1 when the path is "failed" */
    168 	unsigned error_path : 1; /* used as a fail over path */
    169 	unsigned created : 1;
    170 	unsigned path_id : 8;
    171 	void *shared_data_handle;
    172 	struct fds *next;
    173 };
    174 /* #include "frags.h" */
    175 /*
    176  * there is one of these per diskomizer block that is in use on each device
    177  * bing accessed. So it is well worth the effort to make these as small as
    178  * possible.
    179  */
    180 struct blks {
    181 	time_t last_requested;
    182 	union {
    183 		time_t prev_requested;
    184 		/*
    185 		 * This is only set if the block could not be written during
    186 		 * the sequential run, it is checked during an assert.
    187 		 * Since once the block is written this is no longer checked
    188 		 * it can share space with prev_requested.
    189 		 */
    190 		int  was_unwritten;
    191 	} u;
    192 #define	SET_LAST_RETURN(A, B) A = (B);
    193 #define	GET_LAST_RETURN(A) A
    194 #if LONG_BIT == 32
    195 #define	BIT2CHARSTAR(A) ((uchar_t *)((A) == 0 ? 0L : 1L))
    196 	/*
    197 	 * 24 bits give a maximum value for the delta of 194.18 days.
    198 	 * If the io is taking that long then the device is not working
    199 	 * so I'm happy to only to use 24 bits.  This saves 8 bits of
    200 	 * space per diskomiser block on each device. In a 64 bit world
    201 	 * this is a waste of time due to padding, so just use ints, and
    202 	 * if there is 64bit address space we won't be short of space!
    203 	 */
    204 	unsigned last_returned_delta : 24;
    205 	unsigned read_count : 8;
    206 #else
    207 #define	BIT2CHARSTAR(A) ((uchar_t *)((A) == 0 ? 0LL : 1LL))
    208 	uint_t last_returned_delta;
    209 	uint_t read_count;
    210 #endif
    211 	unsigned hdrchksum : 16; /* the checksum from the header */
    212 	unsigned path_id : 8; /* hold the id of the fd down which this went */
    213 	unsigned bad_hdr : 1; /* 1 if the header written was bad */
    214 	unsigned ab : 1; /* 1 for type A, 0 for type B */
    215 	unsigned bad_chksum : 1; /* 0 if the checksum written was bad */
    216 #define	SEQUENCE_BITS 5
    217 	unsigned sequence : SEQUENCE_BITS; /* Sequence number */
    218 	/*
    219 	 * Union holding either the read only or the read write information.
    220 	 */
    221 	union {
    222 		/*
    223 		 * 	Length's offsets of the last and previous ios.
    224 		 *	we could do a nice structure that contains all
    225 		 *	the previous ios but it would end up being padded
    226 		 *	and I'm short of space.
    227 		 */
    228 		struct {
    229 			iolen_index_t last_iolen;
    230 			iolen_index_t last_off;
    231 			iolen_index_t prev_iolen;
    232 			iolen_index_t prev_off;
    233 			/*
    234 			 * Put a pointer as the last element to force the
    235 			 * compiler to generate the correct allignment.
    236 			 * If the stucture is not the "correct"
    237 			 * size then the compiler will pad it.
    238 			 *
    239 			 */
    240 			uchar_t *last_io;
    241 			uchar_t *prev_io;
    242 		} w;
    243 		/*
    244 		 * In the read-only world, only fixed size io is supported.
    245 		 * If this structure grows to be bigger than the one above,
    246 		 * then it needs to handle alignment issues.
    247 		 */
    248 		struct {
    249 			check_t last_chksum;
    250 			check_t prev_chksum;
    251 			uint_t last_io:1;
    252 			uint_t prev_io:1;
    253 		} o;
    254 	} r;
    255 };
    256 
    257 #define	DEV_BLOCK_HANDLE(A, B) ((A)->blocks->handles[(B) / (A)->blocks->len])
    258 #define	DEV_BLOCK_INDEX(A, B) ((B) % (A)->blocks->len)
    259 
    260 #define	AIO_BLOCK_HANDLE(A) DEV_BLOCK_HANDLE((A)->dev, (A)->off)
    261 #define	AIO_BLOCK_INDEX(A) DEV_BLOCK_INDEX((A)->dev, (A)->off)
    262 
    263 /*
    264  * diff return is returned by the diff printing routines giving a count
    265  * of the number of bits changed and also which bits were seen in error.
    266  */
    267 struct diff_return {
    268 	int64_t count;
    269 	off64_t bits;
    270 };
    271 
    272 
    273 struct shm_handle {
    274 	long len;
    275 	int count;
    276 	void *handles[1];
    277 };
    278 
    279 struct offset_list {
    280 	ullong_t  offset;
    281 	struct offset_list *next;
    282 };
    283 
    284 typedef void (*choose_block_t)(bitmap_t *map,
    285 		struct aio_str *aiop, ullong_t start, ullong_t len, int maplen);
    286 /*
    287  * The device structure.  There is one of these per device (file) that is
    288  * being diskomized.	They are linked together in a single list with
    289  * head being the global "devices".
    290  *
    291  */
    292 struct device {
    293 	struct fds *fdhead; /* list of device paths. They must all be to */
    294 			    /* the same physical device */
    295 	char *logicalname;
    296 	void *writemap_handle; /* bit map protecting the blocks on disk */
    297 	void *shared_data_handle; /* any data shared between all processes */
    298 	struct disko_partition *v_part; /* the vtoc partition info */
    299 	ulong_t writemap_size; /* size of the write map */
    300 	ulong_t running_rthreads; /* the number of read threads running */
    301 	ullong_t block; /* the last offset that has been written to */
    302 	ullong_t read_start_block; /* the block on which to start reads */
    303 	ullong_t length; /* the length in bytes of the area we are writing to */
    304 	ullong_t next_write_blk; /* The next block to do a write I/O to */
    305 	ullong_t next_read_blk; /* The next block to do a read I/O to */
    306 	/*
    307 	 * count of errors that have not yet been added to the shared
    308 	 * errors
    309 	 */
    310 	int errors:29;
    311 	int stop_flag:1;
    312 	int need_to_stop:1;
    313 	int failed_to_push_unwritten:1;
    314 	uint_t device_block_size; /* the block size of the underlying device */
    315 	long seq_passes; /* the number of sequential passes to do */
    316 	/*
    317 	 * A list of blocks that have not  yet been written as they were locked
    318 	 */
    319 	struct offset_list *unwritten;
    320 	long long countdown; /* number of reads to complete before we stop */
    321 	struct shm_handle *blocks; /* information about blocks */
    322 	choose_block_t choose_block;
    323 	/*
    324 	 * The time to live of this state.
    325 	 *
    326 	 * If the device is stopped then this time indicates when the
    327 	 * device should be restarted.  The first process to get run after
    328 	 * this time will change the state from DEV_STOPPED to DEV_STARTING
    329 	 * and decrement the stopped process count. All the other processes
    330 	 * just decrement the stopped process count until it is zero, then
    331 	 * the state is set to DEV_RUNNING.
    332 	 *
    333 	 * If the device is running then the first process to reach this
    334 	 * time sets ti to DEV_STOPPING and increments the stopped process
    335 	 * count. When the stopped process count is equal to nprocs the
    336 	 * device state  is set to stopped and no io will be taking place.
    337 	 */
    338 	struct	timeval state_ttl;
    339 	dev_state state;
    340 	struct aioqtop stopped_ios;
    341 	/*
    342 	 * If a path fails outstanding ios are cancelled and submitted
    343 	 * down an alternative path, if available.
    344 	 */
    345 	struct aioqtop cancelled;
    346 	/*
    347 	 * deferred ios are ios that could not get queued for
    348 	 * some reason.
    349 	 */
    350 	struct aioqtop deferred_ios;
    351 	struct device_control *control; /* The shared control info */
    352 	struct recent_blocks *recent;
    353 	struct device *next; /* the next device */
    354 };
    355 
    356 /*
    357  * err_type and err_info are used by report_error.  All errors should be
    358  * reported via report_error, that way you always get all the info.
    359  */
    360 typedef enum {
    361 	ERR_SYS,
    362 	ERR_CORRUPT,
    363 	ERR_HUNG,
    364 	ERR_DEFERRED
    365 } err_type;
    366 
    367 struct error_desc {
    368 	uint_t HEADER_CHECKSUM_ERR:1;
    369 	/* The lenght in the header does not match the length we expect */
    370 	uint_t LENGTH_MISMATCH:1;
    371 	uint_t BODY_CHECKSUM_ERR:1;
    372 	/* Matches the device given in error.dev */
    373 	uint_t MATCHING_DEVICE:1;
    374 	/* Matches the last write to the offset given in error.off */
    375 	uint_t MATCHING_LAST:1;
    376 	/* Matches the previous write to the offset given in error.off */
    377 	uint_t MATCHING_PREV:1;
    378 	uint_t UNABLE_TO_LOCK:1;
    379 };
    380 
    381 struct error {
    382 	struct error_desc desc;
    383 	/* this errors instance number */
    384 	int instance;
    385 	/* If this is a follow on error the previous instance number */
    386 	int previous;
    387 	/* if this is a follow on error the parent's instance number */
    388 	int parent;
    389 	check_t bad_checksum;
    390 	struct fds *fd;
    391 	struct device *dev;
    392 	char *diff_file;
    393 	/* diskomizer offset */
    394 	off64_t doff;
    395 	time_t last_requested;
    396 	struct diff_return dr;
    397 	long delta;
    398 	uint32_t len;
    399 	unsigned path_id : 8;
    400 };
    401 
    402 #include "bufs.h"
    403 /*
    404  * NB. The aio_result structure MUST be the first element in this structure
    405  */
    406 typedef struct aio_str {
    407 	daio_result_t	aio_res; /* aio_result passed to aioread/write */
    408 	struct	timeval tv;	/* time when the aio request was submitted */
    409 	struct	device *dev;	/* The device the aio is to/from */
    410 	struct  fds	*fd;	/* the file descriptor for this i/o */
    411 	uchar_t		*buf;	/* The buffer being used for this I/O */
    412 	ullong_t	off;		/* The offset being used for this I/O */
    413 	uint16_t	retrycnt; /* number of times we have retried this I/O */
    414 	iolen_index_t	iolen;	/* The index into the iolen array */
    415 	int	count;		/* The number times we have done I/O */
    416 	time_t	(*handler)(struct aio_str *aio_resp, ullong_t start);
    417 	struct bufhdr hdr;
    418 	struct daio_id daio_id;	/* the id of the buffer */
    419 	struct error error; /* error reporting info */
    420 	struct aio_str	*next;	/* linked list for cancellation and stats */
    421 	struct aio_str	*prev;	/* linked list for removing entries */
    422 } aio_str_t;
    423 TNF_DECLARE_RECORD(aio_str_t, aio_tnf_str);
    424 typedef enum {
    425 	CONTINUE, /* continue with error processing. */
    426 	RETRY,	  /* Do the aioread again */
    427 	BREAK	  /* All is well, break from error processing */
    428 } loop_type;
    429 
    430 union err_info {
    431 	time_t time;
    432 	char *str;
    433 };
    434 
    435 typedef loop_type
    436 	(*on_error_t)(ullong_t start, struct aio_str *aio_resp);
    437 
    438 extern void nop(void);
    439 extern iolen_index_t max_disk_io_len;
    440 extern struct device *devices; /* all the devices there are */
    441 extern pid_t pgrp;
    442 extern int  nfunc_bufs;
    443 extern uchar_t (*init_uchar_func)(int bufno, int i);
    444 extern void (*shm_chmod)(void *addr, ulong_t len, int mode);
    445 /*
    446  * All the external functions in diskomizer64mpism.c
    447  */
    448 #ifdef __sparc
    449 extern void flush(int32_t *x);
    450 extern void flush_windows(void);
    451 #endif
    452 extern time_t handle_write(struct aio_str *aio_resp, ullong_t start);
    453 extern time_t handle_write_then_read(struct aio_str *aio_resp, ullong_t start);
    454 extern time_t handle_read_then_write(struct aio_str *aio_resp, ullong_t start);
    455 extern time_t handle_readonly_seq(struct aio_str *aio_resp, ullong_t start);
    456 extern int how_many_devices(struct device *devices);
    457 extern void print_number(unsigned long long  i, char *singular,
    458 	char *plural);
    459 extern void * alloc_mem(long a, long b);
    460 extern ullong_t find_next_free(bitmap_t map[], ullong_t offset,
    461 	int len, int maplen);
    462 extern void clear_write(bitmap_t map[], ullong_t off, ulong_t maplen);
    463 extern ulong_t my_lrand(void);
    464 extern int longest_logical_name(void);
    465 extern int longest_device_name(void);
    466 extern void *my_calloc(long a, long b);
    467 extern char *alloc_time_now_fmt(char *fmt);
    468 extern int this_proc(void);
    469 extern int is_readonly(void);
    470 extern int would_stop_before(time_t secs);
    471 extern pid_t master_pid(void);
    472 extern void check_exit_flag(void);
    473 extern struct fds *find_path(struct fds *fdhead, char path_id);
    474 extern ullong_t diskomizer_off2byteoff(ullong_t off);
    475 extern char *diff_file(void);
    476 /*
    477  * macros.
    478  */
    479 #define	ZERO_OBJ(X) (void) memset(&X, NULL, sizeof (X))
    480 #define	GET_OFF(X) (X / MAP_BITS)
    481 #define	GET_BIT(X) (1 << (X % MAP_BITS))
    482 
    483 #ifdef	__cplusplus
    484 }
    485 #endif
    486 
    487 #endif /* _DISKOMIZER64MPISM_H */
    488