Home | History | Annotate | Download | only in lvm
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #ifndef _SYS_MD_MDDB_H
     27 #define	_SYS_MD_MDDB_H
     28 
     29 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     30 
     31 #include <sys/types.h>
     32 #include <sys/buf.h>
     33 
     34 #ifdef	__cplusplus
     35 extern "C" {
     36 #endif
     37 
     38 #if 0 /* DRP FOR DEBUGGING */
     39 #define	MDDB_FAKE
     40 #endif
     41 
     42 /* Private flags */
     43 #define	MD_PRV_GOTIT		0x0001	/* Been snarfed */
     44 #define	MD_PRV_DELETE		0x0002	/* Record pending to be deleted */
     45 #define	MD_PRV_COMMIT		0x0004	/* Record pending to be commited */
     46 #define	MD_PRV_CLEANUP		0x0008	/* Record pending to be cleaned up */
     47 #define	MD_PRV_CONVD		0x0010  /* Record has been converted (32->64) */
     48 #define	MD_PRV_PENDDEL		(MD_PRV_GOTIT | MD_PRV_DELETE)
     49 #define	MD_PRV_PENDCOM		(MD_PRV_GOTIT | MD_PRV_COMMIT)
     50 #define	MD_PRV_PENDCLEAN	(MD_PRV_GOTIT | MD_PRV_CLEANUP)
     51 
     52 
     53 #define	MDDB_E_INVALID	(-1)	/* an invalid argument was passed */
     54 #define	MDDB_E_EXISTS	(-2)	/* doing an operation a 2nd time which can */
     55 				/*	only be done once */
     56 #define	MDDB_E_MASTER	(-3)	/* problem occurred accessing mastor block */
     57 				/*	returned from NEW_DEV	*/
     58 #define	MDDB_E_TOOSMALL	(-4)	/* device is not large enough */
     59 #define	MDDB_E_NORECORD	(-5)	/* record does not exits */
     60 				/*
     61 				 *	returned from:	mddb_getnextrec
     62 				 *			mddb_getrecsize
     63 				 *			mddb_commitrec
     64 				 *			mddb_commitrecs
     65 				 *			mddb_deleterec
     66 				 */
     67 #define	MDDB_E_NOSPACE	(-6)	/* no space to create record */
     68 #define	MDDB_E_NOTNOW	(-7)	/* do not presently have enough resources */
     69 				/*	to perform requested operation */
     70 #define	MDDB_E_NODB	(-8)	/* no database exist */
     71 #define	MDDB_E_NOTOWNER (-9)	/* have not been told to grab this set */
     72 #define	MDDB_E_STALE	(-10)	/* database is stale */
     73 #define	MDDB_E_TOOFEW	(-11)	/* not enough replicas available */
     74 #define	MDDB_E_TAGDATA	(-12)	/* tagged data detected */
     75 #define	MDDB_E_ACCOK	(-13)	/* 50/50 mode */
     76 #define	MDDB_E_NTAGDATA	(-14)	/* tagop try, no tag data */
     77 #define	MDDB_E_ACCNOTOK	(-15)	/* accop try, no accept possible */
     78 #define	MDDB_E_NOLOCBLK	(-16)	/* No valid locators found */
     79 #define	MDDB_E_NOLOCNMS	(-17)	/* No valid locator name information */
     80 #define	MDDB_E_NODIRBLK	(-18)	/* No directory blocks found */
     81 #define	MDDB_E_NOTAGREC	(-19)	/* No tag record blocks found */
     82 #define	MDDB_E_NOTAG	(-20)	/* No matching tag record found */
     83 #define	MDDB_E_NODEVID	(-21)	/* No device id found */
     84 
     85 #define	MDDB_MINBLKS		16	/* enough for a few metadevices */
     86 #define	MDDB_MAXBLKS		8192	/* size of free bit map (must be / 8) */
     87 #define	MDDB_MN_MINBLKS		32768	/* Multinode metadb minimum size */
     88 					/* 16MB */
     89 #define	MDDB_MN_MAXBLKS		524288	/* size of free bit map (must be / 8) */
     90 					/* 256MB */
     91 
     92 #define	MDDB_C_STALE		0x0001
     93 #define	MDDB_C_TOOFEW		0x0002
     94 #define	MDDB_C_NOTOWNER		0x0004
     95 #define	MDDB_C_SET_MN_STALE	0x0008	/* Set MN set to stale */
     96 #define	MDDB_C_IMPORT		0x0010
     97 
     98 /*
     99  * Defines used to set/reset new master flag in set structure.
    100  * Used during reconfig cycle to determine quickly if there is
    101  * new master for the set.
    102  */
    103 #define	MDDB_NM_SET		0x0001
    104 #define	MDDB_NM_RESET		0x0002
    105 #define	MDDB_NM_GET		0x0004
    106 
    107 /* Definitions of flag in Locator Block Device ID data area - mddb_did_info */
    108 #define	MDDB_DID_EXISTS		0x0001	/* Device ID exists */
    109 #define	MDDB_DID_VALID		0x0002	/* Device ID valid on current system */
    110 #define	MDDB_DID_UPDATED	0x0004  /* locator/sidelocator info updated */
    111 
    112 /* Definitions of flag in Locator Block - mddb_lb */
    113 #define	MDDB_DEVID_STYLE	0x0001	/* Locator Block in Device ID format */
    114 #define	MDDB_MNSET		0x0002  /* MDDB is for a multi-node set */
    115 
    116 
    117 #define	MDDB_MAX_PATCH	25		/* number of locations that */
    118 					/*	can be patched in etc/system */
    119 
    120 /*
    121  * Set struct used by all parts of the driver, to store anchor pointers.
    122  *
    123  * Lock associated with field in this structure:
    124  *
    125  * Some of fields are accessible by both the single threaded ioctl thread
    126  * and internal threads such as resync, hotsparing...etc.  In this case
    127  * additional protection is needed.  For example, s_db is protected by
    128  * s_dbmx additionally and s_un, s_ui are protected by md_unit_array_rw.lock
    129  * s_nm, s_nmid, s_did_nm and s_did_nmid and s_dtp are protected by nm_lock
    130  * Rest of other fileds are protected by md_mx.  Two fields s_un_next and
    131  * s_un_avail are introduced by the friendly name project and are ONLY
    132  * accessible via a single threaded ioctl thread which already is protected
    133  * by the ioctl lock and there is no need to add extra protection to them.
    134  * However, in the future if they become accessible by other internal threads
    135  * then an additional protection such as md_mx lock is highly recommended.
    136  *
    137  */
    138 typedef struct md_set {
    139 	uint_t		s_status;	/* set status */
    140 	void		**s_ui;		/* set unit incore anchor */
    141 	void		**s_un;		/* set unit anchor */
    142 	void		*s_hsp;		/* set Hot Spare Pool anchor */
    143 	void		*s_hs;		/* set Hot Spare anchor */
    144 	void		*s_db;		/* set MDDB anchor */
    145 	kmutex_t	s_dbmx;		/* set MDDB mutex */
    146 	void		*s_nm;		/* set namespace anchor */
    147 	mddb_recid_t	s_nmid;		/* set namespace anchor record */
    148 	void		*s_did_nm;	/* set device id namespace anchor */
    149 	mddb_recid_t	s_did_nmid;	/* set device id namespace anchor rec */
    150 	void		*s_dtp;		/* set data tag rec */
    151 	int		s_am_i_master;	/* incore master flag for this node */
    152 	md_mn_nodeid_t	s_nodeid;	/* nodeid of this node - for MN sets */
    153 	uint_t		s_rcnt;		/* incore resync count for set */
    154 	unit_t		s_un_next;	/* s_un scan starts here */
    155 	unit_t		s_un_avail;	/* number of avail slots */
    156 } md_set_t;
    157 
    158 
    159 #define	MDDB_MAGIC_MB	0x6d646d62	/* magic number for master blocks */
    160 #define	MDDB_MAGIC_DB	0x6d646462	/* magic number for directory blocks */
    161 #define	MDDB_MAGIC_RB	0x6d647262	/* magic number for record blocks */
    162 #define	MDDB_MAGIC_LB	0x6d646c62	/* magic number for locator blocks */
    163 #define	MDDB_MAGIC_LN	0x6d646c6e	/* magic number for locator names */
    164 #define	MDDB_MAGIC_DT	0x6d646474	/* magic number for data tag */
    165 #define	MDDB_MAGIC_DI	0x6d646469	/* magic number for device ID block */
    166 #define	MDDB_MAGIC_DU	0x6d646475	/* magic num for dummy mb */
    167 #define	MDDB_MAGIC_DE	0x6d646465	/* magic num for mb devid */
    168 
    169 #define	MDDB_GLOBAL_XOR 1234567890
    170 
    171 #define	MDDB_REV_MAJOR  (uint_t)0xff00
    172 #define	MDDB_REV_MINOR  (uint_t)0x00ff
    173 
    174 /*
    175  * MDDB_REV_MNMB:
    176  * If a MN diskset, master block revision is set to MDDB_REV_MNMB.
    177  * Even though the master block structure is no different
    178  * for a MN set, setting the revision field to a different
    179  * number keeps any pre-MN_diskset code from accessing
    180  * this diskset.  It also allows for an early determination
    181  * of a MN diskset when reading in from disk so that the
    182  * proper size locator block and locator names structure
    183  * can be read in thus saving time on diskset startup.
    184  * Since no change in master block structure, the MDDB_REV_MINOR
    185  * portion of the revision was incremented.
    186  *
    187  * MDDB_REV_MNLB:
    188  * If a MN diskset, the locator block structure is a different size in
    189  * order to accomodate up to MD_MNMAXSIDES nodes in a diskset
    190  * with any nodeid (sideno) allowed.
    191  * The revision is set to MDDB_REV_MNLB which is a change of the
    192  * MDDB_REV_MAJOR portion of the revision.
    193  *
    194  * MDDB_REV_MNLN:
    195  * If a MN diskset, the locator names is a different size in
    196  * order to accomodate up to MD_MNMAXSIDES nodes in a diskset
    197  * with any nodeid (sideno) allowed.
    198  * The revision is set to MDDB_REV_MNLN which is a change of the
    199  * MDDB_REV_MAJOR portion of the revision.
    200  *
    201  * The record blocks have two binary properties.  A record block can
    202  * represent either a 32 or 64 bit unit.  A record block can also represent
    203  * a traditionally named unit or a friendly named unit.  Thus, there are
    204  * minor revisions of record block.
    205  *
    206  *		Traditional		Friendly
    207  *		Name			Name
    208  *		-----------		--------
    209  * 32 bit	MDDB_REV_RB		MDDB_REV_RBFN
    210  * 64 bit	MDDB_REV_RB64		MDDB_REV_RB64FN
    211  */
    212 
    213 #define	MDDB_REV_MB	(uint_t)0x0201
    214 #define	MDDB_REV_MNMB	(uint_t)0x0202
    215 #define	MDDB_REV_DB	(uint_t)0x0201
    216 #define	MDDB_REV_LB	(uint_t)0x0500
    217 #define	MDDB_REV_MNLB	(uint_t)0x0600
    218 #define	MDDB_REV_LN	(uint_t)0x0100
    219 #define	MDDB_REV_MNLN	(uint_t)0x0300
    220 #define	MDDB_REV_RB	(uint_t)0x0200
    221 #define	MDDB_REV_RB64	(uint_t)0x0201
    222 #define	MDDB_REV_RBFN	(uint_t)0x0202
    223 #define	MDDB_REV_RB64FN	(uint_t)0x0203
    224 #define	MDDB_REV_DT	(uint_t)0x0100
    225 #define	MDDB_REV_DI	(uint_t)0x0100
    226 
    227 /*
    228  * Transfer record block friendly name status to unit/hs structure.
    229  */
    230 #define	MDDB_NOTE_FN(rbv, unv)	switch (rbv) { \
    231 				case MDDB_REV_RB: \
    232 				case MDDB_REV_RB64: \
    233 					unv &= ~MD_FN_META_DEV; \
    234 					break; \
    235 				case MDDB_REV_RBFN: \
    236 				case MDDB_REV_RB64FN: \
    237 					unv |= MD_FN_META_DEV; \
    238 					break;	\
    239 				}
    240 
    241 #define	MDDB_BSIZE	(uint_t)DEV_BSIZE
    242 #define	MDDB_PREFIXCNT	10
    243 #define	MDDB_DRVNMCNT   10
    244 
    245 typedef int	mddb_block_t;
    246 
    247 #if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
    248 #pragma pack(4)
    249 #endif
    250 typedef struct md_mnname_suffix {
    251 	md_name_suffix	mn_ln_suffix;
    252 	uint_t		mn_ln_sideno;
    253 } md_mnname_suffix_t;
    254 
    255 typedef	struct mddb_ln {
    256 	int			ln_magic;
    257 	uint_t			ln_revision;
    258 	uint_t			ln_checksum;
    259 	struct timeval32	ln_timestamp;
    260 	md_name_prefix		ln_prefixes[MDDB_PREFIXCNT];
    261 	/* Don't change array sizes without changing RNDUP_BLKCNT */
    262 	md_name_suffix		ln_suffixes[MD_MAXSIDES][MDDB_NLB];
    263 } mddb_ln_t;
    264 
    265 /*
    266  * Locator name structure for MN diskset.  Same as for traditional
    267  * and local diskset except that more sides are supported and the
    268  * side number can be any number since the side number is stored
    269  * in the ln_mnsuffixes structure instead of being used as an index
    270  * into that array.  This means that the whole array may need to be
    271  * searched in order to find the correct information given a side number.
    272  */
    273 typedef	struct mddb_mnln {
    274 	int			ln_magic;
    275 	uint_t			ln_revision;
    276 	uint_t			ln_checksum;
    277 	struct timeval32	ln_timestamp;
    278 	md_name_prefix		ln_prefixes[MDDB_PREFIXCNT];
    279 	/* Don't change array sizes without changing MDDB_MNLNCNT */
    280 	md_mnname_suffix_t	ln_mnsuffixes[MD_MNMAXSIDES][MDDB_NLB];
    281 } mddb_mnln_t;
    282 
    283 #define	RNDUP_BLKCNT(sz, delta)	(((sz) - \
    284 				    ((delta) * \
    285 				    ((MD_MAXSIDES  - 1) * MDDB_NLB)) + \
    286 				    MDDB_BSIZE - 1) / MDDB_BSIZE)
    287 #define	MDDB_LNCNT		RNDUP_BLKCNT(sizeof (mddb_ln_t), 0)
    288 #define	MDDB_LOCAL_LNCNT	RNDUP_BLKCNT(sizeof (mddb_ln_t), \
    289 				    sizeof (md_name_suffix))
    290 
    291 #define	MDDB_MNLNCNT		((sizeof (mddb_mnln_t) + (MDDB_BSIZE - 1)) \
    292 				    / MDDB_BSIZE)
    293 
    294 typedef struct mddb_dt {
    295 	uint_t		dt_mag;
    296 	uint_t		dt_rev;
    297 	uint_t		dt_cks;
    298 	mddb_dtag_t	dt_dtag;
    299 } mddb_dt_t;
    300 
    301 #define	MDDB_DT_BYTES	(roundup(sizeof (mddb_dt_t), MDDB_BSIZE))
    302 #define	MDDB_DT_BLOCKS	(btodb(MDDB_DT_BYTES))
    303 
    304 typedef union identifier {
    305 	char			serial[MDDB_SN_LEN];
    306 	struct timeval32	createtime;
    307 } identifier_t;
    308 
    309 typedef struct mddb_locator {
    310 	dev32_t		l_dev;
    311 	daddr32_t	l_blkno;
    312 	int		l_flags;
    313 } mddb_locator_t;
    314 
    315 typedef struct mddb_sidelocator {
    316 	uchar_t		l_drvnm_index;
    317 	minor_t		l_mnum;
    318 } mddb_sidelocator_t;
    319 
    320 typedef struct mddb_mnsidelocator {
    321 	uchar_t		mnl_drvnm_index;
    322 	minor_t		mnl_mnum;
    323 	uint_t		mnl_sideno;
    324 } mddb_mnsidelocator_t;
    325 
    326 typedef struct mddb_drvnm {
    327 	uchar_t		dn_len;
    328 	char		dn_data[MD_MAXDRVNM];
    329 } mddb_drvnm_t;
    330 
    331 /*
    332  * Locator Block Device ID Information
    333  * Several device id's may share one disk block in an effort to
    334  * conserve used replica space.
    335  */
    336 typedef struct mddb_did_info {
    337 	uint_t		info_flags;	/* MDDB Device ID flags */
    338 	uint_t		info_firstblk;	/* Device ID Start Block */
    339 	uint_t		info_blkcnt;	/* Device ID Block Count */
    340 	uint_t		info_offset;	/* Device ID offset w/i Block */
    341 	uint_t		info_length;	/* Device ID Length */
    342 	uint_t		info_checksum;	/* Device ID Checksum */
    343 	char		info_minor_name[32]; /* Minor name of lb dev */
    344 } mddb_did_info_t;
    345 
    346 typedef struct mddb_did_blk {
    347 	int		blk_magic;	/* used for verification */
    348 	uint_t		blk_revision;	/* used for verification */
    349 	int		blk_checksum;	/* used for verification */
    350 	uint_t		blk_commitcnt;	/* matches LB's commitcnt */
    351 	mddb_did_info_t	blk_info[MDDB_NLB];
    352 } mddb_did_blk_t;
    353 #if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
    354 #pragma pack()
    355 #endif
    356 
    357 #define	MDDB_DID_BYTES	(roundup(sizeof (mddb_did_blk_t), MDDB_BSIZE))
    358 #define	MDDB_DID_BLOCKS	(btodb(MDDB_DID_BYTES))
    359 
    360 /*
    361  * Device ID Disk Blocks.
    362  * Incore linked list of disk blocks containing device IDs.
    363  * The list is built when reading in the mddb_did_blk structure and
    364  * when reading in the actual disk blocks containing device ids.
    365  * This list is used to easily write out all disk blocks containing
    366  * device ids.
    367  */
    368 typedef struct mddb_did_db {
    369 	uint_t		db_firstblk;	/* Disk Block's logical addr */
    370 	uint_t		db_blkcnt;	/* Contig Disk Block Count */
    371 	caddr_t		db_ptr;		/* Ptr to incore Block(s) */
    372 	struct mddb_did_db	*db_next;	/* Ptr to next in list */
    373 } mddb_did_db_t;
    374 
    375 /*
    376  * Device ID Free List.
    377  * Incore linked list of free space in disk blocks containing device IDs.
    378  * Used to manage placement of device IDs in disk blocks.
    379  * All disk blocks on free list are also in linked list of disk block
    380  * containing device IDs (mddb_did_db_t).
    381  */
    382 typedef struct mddb_did_free {
    383 	uint_t			free_blk;	/* Disk Block's logical addr */
    384 	uint_t			free_offset;	/* offset of free space */
    385 	uint_t			free_length;	/* length of free space */
    386 	struct mddb_did_free	*free_next;	/* Ptr to next in list */
    387 } mddb_did_free_t;
    388 
    389 /*
    390  * Device ID Incore Area
    391  *    Contains pointer to Device ID Disk Block list and
    392  *         Device ID Free List.
    393  *    Also contains incore array of pointers to device IDs.  Pointers
    394  *    point into the device ID Disk Block list and are used as a
    395  *    shortcut to find incore device IDs.
    396  */
    397 typedef struct mddb_did_ic {
    398 	mddb_did_blk_t	*did_ic_blkp;
    399 	mddb_did_db_t	*did_ic_dbp;
    400 	mddb_did_free_t	*did_ic_freep;
    401 	ddi_devid_t	did_ic_devid[MDDB_NLB]; /* Ptr to device IDs */
    402 } mddb_did_ic_t;
    403 
    404 /*
    405  * Locator Block (LB):
    406  *	- Are fixed size, but the size is different
    407  *		for local/shared set db replicas.
    408  *	- All LB's start at logical block 0.
    409  * 	- After a replica quorum is found, there is
    410  *	  is only one incore copy of the LB.
    411  *	- LB's are only written when replicas are added, deleted, or errored.
    412  *	- LB's provide information about other replica's and their state.
    413  */
    414 #if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
    415 #pragma pack(4)
    416 #endif
    417 typedef struct mddb_lb {
    418 	int			lb_magic;	/* used for verification */
    419 	uint_t			lb_revision;	/* used for verification */
    420 	int			lb_checksum;	/* used for verification */
    421 	uint_t			lb_commitcnt;	/* IMPORTANT */
    422 	struct timeval32	lb_timestamp;	/* informative only */
    423 	int			lb_loccnt;	/* used for verification */
    424 	identifier_t		lb_ident;	/* used for verification */
    425 	uint_t			lb_flags;	/* flags describing LB */
    426 	uint_t			lb_spare[8];	/* Spare/Pad */
    427 	mddb_block_t		lb_didfirstblk;	/* Devid Array Start Block */
    428 	mddb_block_t		lb_didblkcnt;	/* Devid Array Number Blocks */
    429 	mddb_block_t		lb_dtfirstblk;	/* Data Tag Start Block */
    430 	mddb_block_t		lb_dtblkcnt;	/* Data Tag Number Block(s) */
    431 	struct timeval32	lb_inittime;	/* creation of database */
    432 	set_t			lb_setno;	/* used for verification */
    433 	mddb_block_t		lb_blkcnt;	/* used for verification */
    434 	mddb_block_t		lb_lnfirstblk;
    435 	mddb_block_t		lb_lnblkcnt;
    436 	mddb_block_t		lb_dbfirstblk;
    437 	mddb_drvnm_t		lb_drvnm[MDDB_DRVNMCNT];
    438 	mddb_locator_t		lb_locators[MDDB_NLB];
    439 	/* Don't change array sizes without changing RNDUP_BLKCNT */
    440 	mddb_sidelocator_t	lb_sidelocators[MD_MAXSIDES][MDDB_NLB];
    441 } mddb_lb_t;
    442 #if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
    443 #pragma pack()
    444 #endif
    445 
    446 /*
    447  * Locator block structure for MN diskset.  Same as for traditional
    448  * and local diskset except that more sides are supported and the
    449  * side number can be any number since the side number is stored
    450  * in the lb_mnsidelocators structure instead of being used as an index
    451  * into that array.  This means that the whole array may need to be
    452  * searched in order to find the correct information given a side number.
    453  */
    454 typedef struct mddb_mnlb {
    455 	int			lb_magic;	/* used for verification */
    456 	uint_t			lb_revision;	/* used for verification */
    457 	int			lb_checksum;	/* used for verification */
    458 	uint_t			lb_commitcnt;	/* IMPORTANT */
    459 	struct timeval32	lb_timestamp;	/* informative only */
    460 	int			lb_loccnt;	/* used for verification */
    461 	identifier_t		lb_ident;	/* used for verification */
    462 	uint_t			lb_flags;	/* flags describing LB */
    463 	uint_t			lb_spare[8];	/* Spare/Pad */
    464 	mddb_block_t		lb_didfirstblk;	/* Devid Array Start Block */
    465 	mddb_block_t		lb_didblkcnt;	/* Devid Array Number Blocks */
    466 	mddb_block_t		lb_dtfirstblk;	/* Data Tag Start Block */
    467 	mddb_block_t		lb_dtblkcnt;	/* Data Tag Number Block(s) */
    468 	struct timeval32	lb_inittime;	/* creation of database */
    469 	set_t			lb_setno;	/* used for verification */
    470 	mddb_block_t		lb_blkcnt;	/* used for verification */
    471 	mddb_block_t		lb_lnfirstblk;
    472 	mddb_block_t		lb_lnblkcnt;
    473 	mddb_block_t		lb_dbfirstblk;
    474 	mddb_drvnm_t		lb_drvnm[MDDB_DRVNMCNT];
    475 	mddb_locator_t		lb_locators[MDDB_NLB];
    476 	/* Don't change array sizes without changing MDDB_MNLBCNT */
    477 	mddb_mnsidelocator_t	lb_mnsidelocators[MD_MNMAXSIDES][MDDB_NLB];
    478 } mddb_mnlb_t;
    479 
    480 
    481 #define	MDDB_LBCNT		RNDUP_BLKCNT(sizeof (mddb_lb_t), 0)
    482 #define	MDDB_LOCAL_LBCNT	RNDUP_BLKCNT(sizeof (mddb_lb_t), \
    483 				    sizeof (mddb_sidelocator_t))
    484 
    485 #define	MDDB_MNLBCNT		((sizeof (mddb_mnlb_t) + (MDDB_BSIZE - 1)) \
    486 				    / MDDB_BSIZE)
    487 
    488 typedef struct mddb_map {
    489 	daddr32_t		m_consecutive;
    490 	daddr32_t		m_firstblk;
    491 } mddb_map_t;
    492 
    493 /*
    494  * Master block(s) (MB)
    495  * 	- Are written by userland; Never by the driver!
    496  *	- Each replica has there own master blocks,
    497  *		the master block(s) are not shared.
    498  *	- MB's are not in the logical block address space of the database.
    499  *	- MB's are a fixed size record (MDDB_BSIZE)
    500  *	- MB's provide the logical to physical block translation,
    501  *		for their replica.
    502  */
    503 typedef	struct mddb_mb {
    504 	int			mb_magic;	/* used for verification */
    505 	uint_t			mb_revision;	/* used for verification */
    506 	uint_t			mb_checksum;	/* used for verification */
    507 #ifdef _LP64
    508 	uint32_t		mb_next;	/* incore to next mb */
    509 #else
    510 	struct mddb_mb		*mb_next;	/* incore to next mb */
    511 #endif	/* _LP64 */
    512 	daddr32_t		mb_nextblk;	/* block # for next mb */
    513 	md_timeval32_t		mb_timestamp;	/* timestamp */
    514 	daddr32_t		mb_blkcnt;	/* size of blkmap */
    515 	daddr32_t		mb_blkno;	/* physical loc. for this MB */
    516 	set_t			mb_setno;	/* used for verification */
    517 	struct timeval32	mb_setcreatetime; /* set creation timestamp */
    518 	int			spares[7];
    519 	mddb_map_t		mb_blkmap;	/* logical->physical blk map */
    520 	int			mb_devid_magic;	/* verify devid in mb */
    521 	short			mb_devid_len;	/* len of following devid */
    522 	char			mb_devid[1];	/* devid byte array */
    523 } mddb_mb_t;
    524 
    525 /*
    526  * In-core version of mddb_mb. It is known that the mddb_mb is 512 bytes on
    527  * disk, really, and so this structure is 512 + sizeof(struct mddb_mb_ic *)
    528  */
    529 #define	MDDB_IC_BSIZE	(MDDB_BSIZE + sizeof (struct mddb_mb_ic *))
    530 typedef struct mddb_mb_ic {
    531 	struct mddb_mb_ic 	*mbi_next;
    532 	struct mddb_mb		mbi_mddb_mb;
    533 } mddb_mb_ic_t;
    534 
    535 
    536 /*
    537  * there can be no address in record block. The checksum must
    538  * stay the same where ever the record is in memory. Many
    539  * things depend on this. Also the timestamp is the time the the
    540  * record was committed not the time it was written to a particular
    541  * device.
    542  *
    543  * Old definition of mddb_rb, for 32-bit apps and libraries
    544  */
    545 typedef struct mddb_rb {
    546 	uint_t			rb_magic;
    547 	uint_t			rb_revision;
    548 	uint_t			rb_checksum;
    549 	uint_t			rb_checksum_fiddle;
    550 	uint_t			rb_private;
    551 	void			*rb_userdata;
    552 	uint_t			rb_commitcnt;
    553 	uint_t			rb_spare[1];
    554 	struct timeval32	rb_timestamp;
    555 	int			rb_data[1];
    556 } mddb_rb_t;
    557 
    558 /* This is, and always will be, the on-disk version of mddb_rb */
    559 typedef struct mddb_rb32 {
    560 	uint_t			rb_magic;
    561 	uint_t			rb_revision;
    562 	uint_t			rb_checksum;
    563 	uint_t			rb_checksum_fiddle;
    564 	uint_t			rb_private;
    565 	uint32_t		rb_userdata;
    566 	uint_t			rb_commitcnt;
    567 	uint_t			rb_spare[1];
    568 	struct timeval32	rb_timestamp;
    569 	int			rb_data[1];
    570 } mddb_rb32_t;
    571 
    572 /*
    573  * directory entries
    574  */
    575 typedef struct mddb_optinfo {
    576 	int		o_li;
    577 	int		o_flags;
    578 } mddb_optinfo_t;
    579 
    580 /* Old definition of mddb_de, for 32-bit apps and libraries */
    581 typedef struct mddb_de {
    582 	struct mddb_de	*de_next;
    583 	mddb_rb_t	*de_rb;
    584 	mddb_recid_t	de_recid;
    585 	mddb_type_t	de_type1;
    586 	uint_t		de_type2;
    587 	uint_t		de_reqsize;
    588 	uint_t		de_recsize;
    589 	mddb_block_t	de_blkcount;
    590 	uint_t		de_flags;
    591 	mddb_optinfo_t	de_optinfo[2];
    592 	mddb_block_t	de_blks[1];
    593 } mddb_de_t;
    594 
    595 /*
    596  * In core version of mddb_de, includes pointer for mddb_rb32_t user data
    597  * mddb_rb32_t is used incore
    598  */
    599 typedef struct mddb_de_ic {
    600 	void			*de_rb_userdata;
    601 	void			*de_rb_userdata_ic;
    602 	uint_t			de_owner_nodeid;
    603 	struct mddb_de_ic	*de_next;
    604 	mddb_rb32_t		*de_rb;
    605 	mddb_recid_t		de_recid;
    606 	mddb_type_t		de_type1;
    607 	uint_t			de_type2;
    608 	size_t			de_reqsize;
    609 	size_t			de_icreqsize;
    610 	size_t			de_recsize;
    611 	uint_t			de_blkcount;
    612 	uint_t			de_flags;
    613 	mddb_optinfo_t		de_optinfo[2];
    614 	mddb_block_t		de_blks[1];
    615 } mddb_de_ic_t;
    616 
    617 typedef struct mddb_db {
    618 	uint_t			db_magic;
    619 	uint_t			db_revision;
    620 	uint_t			db_checksum;
    621 	mddb_block_t		db_blknum;
    622 	struct mddb_db		*db_next;
    623 	mddb_block_t		db_nextblk;
    624 	struct timeval32	db_timestamp;
    625 	uint_t			db_recsum;
    626 #ifdef _KERNEL
    627 	mddb_de_ic_t		*db_firstentry;
    628 #else
    629 	mddb_de_t		*db_firstentry;
    630 #endif
    631 } mddb_db_t;
    632 
    633 /*
    634  * This is, and always will be, the on-disk version of mddb_de
    635  * When mddb_de32 is read in it is converted into mddb_de_ic
    636  */
    637 typedef struct mddb_de32 {
    638 	uint32_t	de32_next;
    639 	uint32_t	de32_rb;
    640 	mddb_recid_t	de32_recid;
    641 	mddb_type_t	de32_type1;
    642 	uint_t		de32_type2;
    643 	uint_t		de32_reqsize;
    644 	uint_t		de32_recsize;
    645 	mddb_block_t	de32_blkcount;
    646 	uint_t		de32_flags;
    647 	mddb_optinfo_t	de32_optinfo[2];
    648 	mddb_block_t	de32_blks[1];
    649 } mddb_de32_t;
    650 
    651 /*
    652  * This is, and always will be, the on-disk version of mddb_db
    653  * When mddb_db32 is read in it is converted into mddb_db
    654  * To minimize impact on mddb format mddb_db fileds remain intact
    655  */
    656 typedef struct mddb_db32 {
    657 	uint_t			db32_magic;
    658 	uint_t			db32_revision;
    659 	uint_t			db32_checksum;
    660 	mddb_block_t		db32_blknum;
    661 	uint32_t		db32_next;
    662 	mddb_block_t		db32_nextblk;
    663 	struct timeval32	db32_timestamp;
    664 	uint_t			db32_recsum;
    665 	uint32_t		db32_firstentry;
    666 } mddb_db32_t;
    667 
    668 #define	de32tode(from, to) \
    669 	{ \
    670 	int i; \
    671 	to->de_rb_userdata = NULL; \
    672 	to->de_owner_nodeid = MD_MN_INVALID_NID; \
    673 	to->de_next = (struct mddb_de_ic *)(uintptr_t)from->de32_next; \
    674 	to->de_rb = (mddb_rb32_t *)(uintptr_t)from->de32_rb; \
    675 	to->de_recid =  from->de32_recid; \
    676 	to->de_type1 =  from->de32_type1; \
    677 	to->de_type2 =  from->de32_type2; \
    678 	to->de_reqsize =  from->de32_reqsize; \
    679 	to->de_recsize =  from->de32_recsize; \
    680 	to->de_blkcount =  from->de32_blkcount; \
    681 	to->de_flags =  from->de32_flags; \
    682 	to->de_optinfo[0] =  from->de32_optinfo[0]; \
    683 	to->de_optinfo[1] =  from->de32_optinfo[1]; \
    684 	for (i = 0; i < from->de32_blkcount; i++) \
    685 		to->de_blks[i] = from->de32_blks[i]; \
    686 	}
    687 
    688 #define	detode32(from, to) \
    689 	{ \
    690 	int i; \
    691 	to->de32_next = (uint32_t)(uintptr_t)from->de_next; \
    692 	to->de32_rb = (uint32_t)(uintptr_t)from->de_rb; \
    693 	to->de32_recid =  from->de_recid; \
    694 	to->de32_type1 =  from->de_type1; \
    695 	to->de32_type2 =  from->de_type2; \
    696 	to->de32_reqsize =  from->de_reqsize; \
    697 	to->de32_recsize =  from->de_recsize; \
    698 	to->de32_blkcount =  from->de_blkcount; \
    699 	to->de32_flags =  from->de_flags; \
    700 	to->de32_optinfo[0] =  from->de_optinfo[0]; \
    701 	to->de32_optinfo[1] =  from->de_optinfo[1]; \
    702 	for (i = 0; i < from->de_blkcount; i++) \
    703 		to->de32_blks[i] = from->de_blks[i]; \
    704 	}
    705 
    706 #define	db32todb(from, to) \
    707 	to->db_magic = from->db32_magic; \
    708 	to->db_revision = from->db32_revision; \
    709 	to->db_checksum = from->db32_checksum; \
    710 	to->db_blknum = from->db32_blknum; \
    711 	to->db_next = (struct mddb_db *)(uintptr_t)from->db32_next; \
    712 	to->db_nextblk = from->db32_nextblk; \
    713 	to->db_timestamp = from->db32_timestamp; \
    714 	to->db_recsum = from->db32_recsum; \
    715 	to->db_firstentry = (mddb_de_ic_t *)(uintptr_t)from->db32_firstentry;
    716 
    717 #define	dbtodb32(from, to) \
    718 	to->db32_magic = from->db_magic; \
    719 	to->db32_revision = from->db_revision; \
    720 	to->db32_checksum = from->db_checksum; \
    721 	to->db32_blknum = from->db_blknum; \
    722 	to->db32_next = (uint32_t)(uintptr_t)from->db_next; \
    723 	to->db32_nextblk = from->db_nextblk; \
    724 	to->db32_timestamp = from->db_timestamp; \
    725 	to->db32_recsum = from->db_recsum; \
    726 	to->db32_firstentry = (uint32_t)(uintptr_t)from->db_firstentry;
    727 
    728 /*
    729  * information about a replica of the data base
    730  */
    731 typedef struct mddb_ri {
    732 	struct mddb_ri		*ri_next;
    733 	uint_t			ri_flags;
    734 	uint_t			ri_commitcnt;
    735 	int			ri_transplant;
    736 	md_dev64_t		ri_dev;
    737 	daddr32_t		ri_blkno;
    738 	char			ri_driver[16];
    739 	mddb_mb_ic_t		*ri_mbip;
    740 	mddb_lb_t		*ri_lbp;
    741 	mddb_dt_t		*ri_dtp;
    742 	mddb_did_ic_t		*ri_did_icp;
    743 	ddi_devid_t		ri_devid;
    744 	ddi_devid_t		ri_old_devid;
    745 	char			ri_minor_name[MDDB_MINOR_NAME_MAX];
    746 	char			ri_devname[MAXPATHLEN];
    747 } mddb_ri_t;
    748 
    749 typedef struct mddb_bf {
    750 	struct mddb_bf	*bf_next;
    751 	mddb_locator_t	*bf_locator;
    752 	buf_t		bf_buf;
    753 } mddb_bf_t;
    754 
    755 /*
    756  * Information for sets of databases (which include replicas)
    757  */
    758 #define	MDDB_BITSRECID	31
    759 #define	MDDB_SETSHIFT	(MDDB_BITSRECID - MD_BITSSET)
    760 #define	MDDB_SETMASK	(MD_SETMASK << MDDB_SETSHIFT)
    761 #define	MDDB_RECIDMASK	((1 << MDDB_SETSHIFT) - 1)
    762 
    763 #define	DBSET(id)	(((id) & MDDB_SETMASK) >> MDDB_SETSHIFT)
    764 #define	DBID(id)	((id) & MDDB_RECIDMASK)
    765 #define	MAKERECID(s, i)	((((s) << MDDB_SETSHIFT) & MDDB_SETMASK) | \
    766 			((i) & MDDB_RECIDMASK))
    767 
    768 #define	MDDB_PARSE_LOCBLK	0x00000001
    769 #define	MDDB_PARSE_LOCNM	0x00000002
    770 #define	MDDB_PARSE_OPTRECS	0x00000004
    771 #define	MDDB_PARSE_MASK		0x0000000F
    772 
    773 
    774 #define	MDDB_BLOCK_PARSE	0x00000001	/* Block sending parse msgs */
    775 #define	MDDB_UNBLOCK_PARSE	0x00000002	/* Unblock sending parse msgs */
    776 
    777 /*
    778  * We need to keep s_ident and s_inittime 32 bit.  They are used in mddb_lb
    779  */
    780 typedef struct mddb_set {
    781 	uint_t		s_setno;		/* set number */
    782 	uint_t		s_sideno;		/* side number */
    783 	identifier_t	s_ident;		/* set identifier */
    784 	char		*s_setname;		/* set name */
    785 	mddb_mb_ic_t	**s_mbiarray;		/* master blocks array */
    786 	mddb_db_t	*s_dbp;			/* directory block */
    787 	mddb_lb_t	*s_lbp;			/* locator block */
    788 						/* May be cast to mddb_mnlb_t */
    789 						/* if accessing sidenames in */
    790 						/* MN diskset */
    791 	mddb_ln_t	*s_lnp;			/* locator names block */
    792 						/* May be cast to mddb_mnln_t */
    793 						/* if accessing sidenames in */
    794 						/* MN diskset */
    795 	mddb_dtag_lst_t	*s_dtlp;		/* List of data tags found */
    796 	mddb_did_ic_t	*s_did_icp;		/* Device ID incore area */
    797 	mddb_ri_t	*s_rip;			/* replicas incore list */
    798 	int		s_freeblkcnt;		/* visable for test code */
    799 	int		s_totalblkcnt;		/* visable for test code */
    800 	int		s_mn_parseflags;	/* mddb parse flags for MNset */
    801 	int		s_mn_parseflags_sending; /* parse flgs sent to slaves */
    802 	uchar_t		*s_freebitmap;		/* free blocks bitmap */
    803 	uint_t		s_freebitmapsize;	/* size of bitmap */
    804 	struct timeval32	s_inittime;	/* timestamp set created */
    805 	mddb_recid_t	s_zombie;		/* zombie record - createrec */
    806 	int		s_staledeletes;		/* number of stale deleterec */
    807 	int		s_optcmtcnt;		/* Following are opt. record */
    808 	int		s_opthavelck;		/*   bookkeeping records ... */
    809 	int		s_optwantlck;
    810 	kcondvar_t	s_optwantlck_cv;
    811 	int		s_optwaiterr;
    812 	int		s_opthungerr;
    813 	kcondvar_t	s_opthungerr_cv;
    814 	int		s_opthavequeuinglck;
    815 	int		s_optwantqueuinglck;
    816 	kcondvar_t	s_optqueuing_cv;
    817 	ulong_t		s_bufmisses;
    818 	mddb_bf_t	*s_freebufhead;
    819 	int		s_bufwakeup;
    820 	kcondvar_t	s_buf_cv;
    821 	size_t		s_databuffer_size;
    822 	void		*s_databuffer;
    823 	int		s_singlelockgotten;
    824 	int		s_singlelockwanted;
    825 	kcondvar_t	s_single_thread_cv;
    826 	md_hi_arr_t	s_med;
    827 } mddb_set_t;
    828 
    829 #ifndef MDDB_FAKE
    830 #ifdef _KERNEL
    831 /* md_mddb.c */
    832 extern uint_t			mddb_lb_did_convert(mddb_set_t *,
    833 				    uint_t, uint_t *);
    834 extern void			mddb_locatorblock2splitname(mddb_ln_t *,
    835 				    int, side_t, md_splitname *);
    836 extern int			mddb_configure(mddb_cfgcmd_t,
    837 				    struct mddb_config *);
    838 extern mddb_recid_t		mddb_getnextrec(mddb_recid_t,
    839 				    mddb_type_t, uint_t);
    840 extern int			mddb_getoptloc(mddb_optloc_t *);
    841 extern void			*mddb_getrecaddr(mddb_recid_t);
    842 extern void			*mddb_getrecaddr_resize(mddb_recid_t, size_t,
    843 				    off_t);
    844 extern int			mddb_getrecprivate(mddb_recid_t);
    845 extern void			mddb_setrecprivate(mddb_recid_t, uint_t);
    846 extern mddb_de_ic_t		*mddb_getrecdep(mddb_recid_t);
    847 extern mddb_type_t		mddb_getrectype1(mddb_recid_t);
    848 extern int			mddb_getrectype2(mddb_recid_t);
    849 extern int			mddb_getrecsize(mddb_recid_t);
    850 extern int			mddb_commitrec(mddb_recid_t);
    851 extern int			mddb_commitrecs(mddb_recid_t *);
    852 extern int			mddb_deleterec(mddb_recid_t);
    853 extern mddb_recstatus_t		mddb_getrecstatus(mddb_recid_t);
    854 extern mddb_recid_t		mddb_createrec(size_t usersize,
    855 				    mddb_type_t type, uint_t type2,
    856 				    md_create_rec_option_t option, set_t setno);
    857 extern void			mddb_init(void);
    858 extern void			mddb_unload(void);
    859 extern void			mddb_unload_set(set_t setno);
    860 extern mddb_recid_t		mddb_makerecid(set_t setno, mddb_recid_t id);
    861 extern set_t			mddb_getsetnum(mddb_recid_t id);
    862 extern char			*mddb_getsetname(set_t setno);
    863 extern side_t			mddb_getsidenum(set_t setno);
    864 extern int			mddb_ownset(set_t setno);
    865 extern int			getmed_ioctl(mddb_med_parm_t *medpp, int mode);
    866 extern int			setmed_ioctl(mddb_med_parm_t *medpp, int mode);
    867 extern int			updmed_ioctl(mddb_med_upd_parm_t *medpp,
    868 				    int mode);
    869 extern int			take_set(mddb_config_t *cp, int mode);
    870 extern int			release_set(mddb_config_t *cp, int mode);
    871 extern int			gettag_ioctl(mddb_dtag_get_parm_t *dtgpp,
    872 				    int mode);
    873 extern int			usetag_ioctl(mddb_dtag_use_parm_t *dtupp,
    874 				    int mode);
    875 extern int			accept_ioctl(mddb_accept_parm_t *medpp,
    876 				    int mode);
    877 extern int			md_update_locator_namespace(set_t setno,
    878 				    side_t side, char *dname, char *pname,
    879 				    md_dev64_t devt);
    880 extern int			mddb_validate_lb(set_t setno, int *rmaxsz);
    881 extern int			mddb_getinvlb_devid(set_t setno, int count,
    882 				    int size, char **ctdptr);
    883 extern int			md_update_minor(set_t, side_t, mdkey_t);
    884 extern int			md_update_nm_rr_did_ioctl(mddb_config_t *cp);
    885 extern int			md_update_top_device_minor(set_t, side_t,
    886 				    md_dev64_t);
    887 #ifdef DEBUG
    888 extern void			mddb_check(void);
    889 #endif /* DEBUG */
    890 #endif /* _KERNEL */
    891 
    892 #else
    893 
    894 caddr_t mddb_fakeit;
    895 
    896 #define	md_lb_did_convert(a, b, c)	(0)
    897 #define	mddb_configure(a, b)	(0)
    898 #define	mddb_getnextrec(a, b, c)		((mddb_recid_t)0)
    899 #define	mddb_getrecaddr(a)	(mddb_fakeit)
    900 #define	mddb_getrecprivate(a)	(0)
    901 #define	mddb_setrecprivate(a, b) (0)
    902 #define	mddb_getrectype1(a)	(0)
    903 #define	mddb_getrectype2(a)	(0)
    904 #define	mddb_getrecsize(a)	(0)
    905 #define	mddb_commitrec(a)	(0)
    906 #define	mddb_commitrecs(a)	(0)
    907 #define	mddb_deleterec(a)	(0)
    908 #define	mddb_getrecstatus(a)	(MDDB_OK)
    909 #define	mddb_createrec(s, a, b)	(0xffff & (int)(mddb_fakeit = \
    910 					(caddr_t)kmem_zalloc(s, KM_SLEEP)))
    911 #define	mddb_unload()		(0)
    912 
    913 #endif
    914 
    915 #define	MDDB_NOSLEEP	1
    916 #define	MDDB_SLEEPOK	0
    917 
    918 #define	MDDB_NOOLDOK	0x1
    919 #define	MDDB_MUSTEXIST	0x2
    920 #define	MDDB_NOINIT	0x4
    921 #define	MDDB_MULTINODE	0x8
    922 #define	MDDB_MN_STALE	0x10	/* MN set is stale */
    923 
    924 /* Flags passed to selectreplicas - not a bit mask */
    925 #define	MDDB_SCANALL		1
    926 #define	MDDB_RETRYSCAN		0
    927 #define	MDDB_SCANALLSYNC	2	/* During reconfig, sync up incore */
    928 					/* and ondisk mddb by writing incore */
    929 					/* values to disk.  Don't write */
    930 					/* change log records. */
    931 
    932 /* Flags passed to writestart and writecopy */
    933 #define	MDDB_WRITECOPY_ALL	1	/* Write all incore mddb to disk */
    934 #define	MDDB_WRITECOPY_SYNC	2	/* Write incore mddb to disk except */
    935 					/* 	- change log records */
    936 					/*	- optimized resync records */
    937 
    938 
    939 #define	MDDB_PROBE	1
    940 #define	MDDB_NOPROBE	0
    941 
    942 
    943 /*
    944  * MN diskset definitions used to determine if a slave can write
    945  * directly to the mddb.  ONLY_MASTER only allows the master node
    946  * to write to the mddb.  ANY_NODE allows any node to write
    947  * to the mddb.
    948  */
    949 #define	MDDB_WR_ONLY_MASTER	0
    950 #define	MDDB_WR_ANY_NODE	1
    951 
    952 #define	MDDB_L_LOCKED	0x0001	/* this record is locked */
    953 #define	MDDB_L_WANTED	0x0002
    954 
    955 #ifdef	__cplusplus
    956 }
    957 #endif
    958 
    959 #endif	/* _SYS_MD_MDDB_H */
    960