Home | History | Annotate | Download | only in common
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 
     27 /*
     28  * AVL - generic AVL tree implementation for FileBench use.
     29  * -Adapted from the avl.c open source code used in the Solaris Kernel-
     30  *
     31  * A complete description of AVL trees can be found in many CS textbooks.
     32  *
     33  * Here is a very brief overview. An AVL tree is a binary search tree that is
     34  * almost perfectly balanced. By "almost" perfectly balanced, we mean that at
     35  * any given node, the left and right subtrees are allowed to differ in height
     36  * by at most 1 level.
     37  *
     38  * This relaxation from a perfectly balanced binary tree allows doing
     39  * insertion and deletion relatively efficiently. Searching the tree is
     40  * still a fast operation, roughly O(log(N)).
     41  *
     42  * The key to insertion and deletion is a set of tree maniuplations called
     43  * rotations, which bring unbalanced subtrees back into the semi-balanced state.
     44  *
     45  * This implementation of AVL trees has the following peculiarities:
     46  *
     47  *	- The AVL specific data structures are physically embedded as fields
     48  *	  in the "using" data structures.  To maintain generality the code
     49  *	  must constantly translate between "avl_node_t *" and containing
     50  *	  data structure "void *"s by adding/subracting the avl_offset.
     51  *
     52  *	- Since the AVL data is always embedded in other structures, there is
     53  *	  no locking or memory allocation in the AVL routines. This must be
     54  *	  provided for by the enclosing data structure's semantics. Typically,
     55  *	  avl_insert()/_add()/_remove()/avl_insert_here() require some kind of
     56  *	  exclusive write lock. Other operations require a read lock.
     57  *
     58  *      - The implementation uses iteration instead of explicit recursion,
     59  *	  since it is intended to run on limited size kernel stacks. Since
     60  *	  there is no recursion stack present to move "up" in the tree,
     61  *	  there is an explicit "parent" link in the avl_node_t.
     62  *
     63  *      - The left/right children pointers of a node are in an array.
     64  *	  In the code, variables (instead of constants) are used to represent
     65  *	  left and right indices.  The implementation is written as if it only
     66  *	  dealt with left handed manipulations.  By changing the value assigned
     67  *	  to "left", the code also works for right handed trees.  The
     68  *	  following variables/terms are frequently used:
     69  *
     70  *		int left;	// 0 when dealing with left children,
     71  *				// 1 for dealing with right children
     72  *
     73  *		int left_heavy;	// -1 when left subtree is taller at some node,
     74  *				// +1 when right subtree is taller
     75  *
     76  *		int right;	// will be the opposite of left (0 or 1)
     77  *		int right_heavy;// will be the opposite of left_heavy (-1 or 1)
     78  *
     79  *		int direction;  // 0 for "<" (ie. left child); 1 for ">" (right)
     80  *
     81  *	  Though it is a little more confusing to read the code, the approach
     82  *	  allows using half as much code (and hence cache footprint) for tree
     83  *	  manipulations and eliminates many conditional branches.
     84  *
     85  *	- The avl_index_t is an opaque "cookie" used to find nodes at or
     86  *	  adjacent to where a new value would be inserted in the tree. The value
     87  *	  is a modified "avl_node_t *".  The bottom bit (normally 0 for a
     88  *	  pointer) is set to indicate if that the new node has a value greater
     89  *	  than the value of the indicated "avl_node_t *".
     90  */
     91 
     92 #include "filebench.h"
     93 #include "fb_avl.h"
     94 
     95 /*
     96  * Small arrays to translate between balance (or diff) values and child indeces.
     97  *
     98  * Code that deals with binary tree data structures will randomly use
     99  * left and right children when examining a tree.  C "if()" statements
    100  * which evaluate randomly suffer from very poor hardware branch prediction.
    101  * In this code we avoid some of the branch mispredictions by using the
    102  * following translation arrays. They replace random branches with an
    103  * additional memory reference. Since the translation arrays are both very
    104  * small the data should remain efficiently in cache.
    105  */
    106 static const int  avl_child2balance[2]	= {-1, 1};
    107 static const int  avl_balance2child[]	= {0, 0, 1};
    108 
    109 
    110 /*
    111  * Walk from one node to the previous valued node (ie. an infix walk
    112  * towards the left). At any given node we do one of 2 things:
    113  *
    114  * - If there is a left child, go to it, then to it's rightmost descendant.
    115  *
    116  * - otherwise we return thru parent nodes until we've come from a right child.
    117  *
    118  * Return Value:
    119  * NULL - if at the end of the nodes
    120  * otherwise next node
    121  */
    122 void *
    123 avl_walk(avl_tree_t *tree, void	*oldnode, int left)
    124 {
    125 	size_t off = tree->avl_offset;
    126 	avl_node_t *node = AVL_DATA2NODE(oldnode, off);
    127 	int right = 1 - left;
    128 	int was_child;
    129 
    130 
    131 	/*
    132 	 * nowhere to walk to if tree is empty
    133 	 */
    134 	if (node == NULL)
    135 		return (NULL);
    136 
    137 	/*
    138 	 * Visit the previous valued node. There are two possibilities:
    139 	 *
    140 	 * If this node has a left child, go down one left, then all
    141 	 * the way right.
    142 	 */
    143 	if (node->avl_child[left] != NULL) {
    144 		for (node = node->avl_child[left];
    145 		    node->avl_child[right] != NULL;
    146 		    node = node->avl_child[right])
    147 			;
    148 	/*
    149 	 * Otherwise, return thru left children as far as we can.
    150 	 */
    151 	} else {
    152 		for (;;) {
    153 			was_child = AVL_XCHILD(node);
    154 			node = AVL_XPARENT(node);
    155 			if (node == NULL)
    156 				return (NULL);
    157 			if (was_child == right)
    158 				break;
    159 		}
    160 	}
    161 
    162 	return (AVL_NODE2DATA(node, off));
    163 }
    164 
    165 /*
    166  * Return the lowest valued node in a tree or NULL.
    167  * (leftmost child from root of tree)
    168  */
    169 void *
    170 avl_first(avl_tree_t *tree)
    171 {
    172 	avl_node_t *node;
    173 	avl_node_t *prev = NULL;
    174 	size_t off = tree->avl_offset;
    175 
    176 	for (node = tree->avl_root; node != NULL; node = node->avl_child[0])
    177 		prev = node;
    178 
    179 	if (prev != NULL)
    180 		return (AVL_NODE2DATA(prev, off));
    181 	return (NULL);
    182 }
    183 
    184 /*
    185  * Return the highest valued node in a tree or NULL.
    186  * (rightmost child from root of tree)
    187  */
    188 void *
    189 avl_last(avl_tree_t *tree)
    190 {
    191 	avl_node_t *node;
    192 	avl_node_t *prev = NULL;
    193 	size_t off = tree->avl_offset;
    194 
    195 	for (node = tree->avl_root; node != NULL; node = node->avl_child[1])
    196 		prev = node;
    197 
    198 	if (prev != NULL)
    199 		return (AVL_NODE2DATA(prev, off));
    200 	return (NULL);
    201 }
    202 
    203 /*
    204  * Access the node immediately before or after an insertion point.
    205  *
    206  * "avl_index_t" is a (avl_node_t *) with the bottom bit indicating a child
    207  *
    208  * Return value:
    209  *	NULL: no node in the given direction
    210  *	"void *"  of the found tree node
    211  */
    212 void *
    213 avl_nearest(avl_tree_t *tree, avl_index_t where, int direction)
    214 {
    215 	int child = AVL_INDEX2CHILD(where);
    216 	avl_node_t *node = AVL_INDEX2NODE(where);
    217 	void *data;
    218 	size_t off = tree->avl_offset;
    219 
    220 	if (node == NULL) {
    221 		if (tree->avl_root != NULL)
    222 			filebench_log(LOG_ERROR,
    223 			    "Null Node Pointer Supplied");
    224 		return (NULL);
    225 	}
    226 	data = AVL_NODE2DATA(node, off);
    227 	if (child != direction)
    228 		return (data);
    229 
    230 	return (avl_walk(tree, data, direction));
    231 }
    232 
    233 
    234 /*
    235  * Search for the node which contains "value".  The algorithm is a
    236  * simple binary tree search.
    237  *
    238  * return value:
    239  *	NULL: the value is not in the AVL tree
    240  *		*where (if not NULL)  is set to indicate the insertion point
    241  *	"void *"  of the found tree node
    242  */
    243 void *
    244 avl_find(avl_tree_t *tree, void *value, avl_index_t *where)
    245 {
    246 	avl_node_t *node;
    247 	avl_node_t *prev = NULL;
    248 	int child = 0;
    249 	int diff;
    250 	size_t off = tree->avl_offset;
    251 
    252 	for (node = tree->avl_root; node != NULL;
    253 	    node = node->avl_child[child]) {
    254 
    255 		prev = node;
    256 
    257 		diff = tree->avl_compar(value, AVL_NODE2DATA(node, off));
    258 		if (!((-1 <= diff) && (diff <= 1))) {
    259 			filebench_log(LOG_ERROR, "avl compare error");
    260 			return (NULL);
    261 		}
    262 		if (diff == 0) {
    263 			if (where != NULL)
    264 				*where = 0;
    265 
    266 			return (AVL_NODE2DATA(node, off));
    267 		}
    268 		child = avl_balance2child[1 + diff];
    269 
    270 	}
    271 
    272 	if (where != NULL)
    273 		*where = AVL_MKINDEX(prev, child);
    274 
    275 	return (NULL);
    276 }
    277 
    278 
    279 /*
    280  * Perform a rotation to restore balance at the subtree given by depth.
    281  *
    282  * This routine is used by both insertion and deletion. The return value
    283  * indicates:
    284  *	 0 : subtree did not change height
    285  *	!0 : subtree was reduced in height
    286  *
    287  * The code is written as if handling left rotations, right rotations are
    288  * symmetric and handled by swapping values of variables right/left[_heavy]
    289  *
    290  * On input balance is the "new" balance at "node". This value is either
    291  * -2 or +2.
    292  */
    293 static int
    294 avl_rotation(avl_tree_t *tree, avl_node_t *node, int balance)
    295 {
    296 	int left = !(balance < 0);	/* when balance = -2, left will be 0 */
    297 	int right = 1 - left;
    298 	int left_heavy = balance >> 1;
    299 	int right_heavy = -left_heavy;
    300 	avl_node_t *parent = AVL_XPARENT(node);
    301 	avl_node_t *child = node->avl_child[left];
    302 	avl_node_t *cright;
    303 	avl_node_t *gchild;
    304 	avl_node_t *gright;
    305 	avl_node_t *gleft;
    306 	int which_child = AVL_XCHILD(node);
    307 	int child_bal = AVL_XBALANCE(child);
    308 
    309 	/* BEGIN CSTYLED */
    310 	/*
    311 	 * case 1 : node is overly left heavy, the left child is balanced or
    312 	 * also left heavy. This requires the following rotation.
    313 	 *
    314 	 *                   (node bal:-2)
    315 	 *                    /           \
    316 	 *                   /             \
    317 	 *              (child bal:0 or -1)
    318 	 *              /    \
    319 	 *             /      \
    320 	 *                     cright
    321 	 *
    322 	 * becomes:
    323 	 *
    324 	 *              (child bal:1 or 0)
    325 	 *              /        \
    326 	 *             /          \
    327 	 *                        (node bal:-1 or 0)
    328 	 *                         /     \
    329 	 *                        /       \
    330 	 *                     cright
    331 	 *
    332 	 * we detect this situation by noting that child's balance is not
    333 	 * right_heavy.
    334 	 */
    335 	/* END CSTYLED */
    336 	if (child_bal != right_heavy) {
    337 
    338 		/*
    339 		 * compute new balance of nodes
    340 		 *
    341 		 * If child used to be left heavy (now balanced) we reduced
    342 		 * the height of this sub-tree -- used in "return...;" below
    343 		 */
    344 		child_bal += right_heavy; /* adjust towards right */
    345 
    346 		/*
    347 		 * move "cright" to be node's left child
    348 		 */
    349 		cright = child->avl_child[right];
    350 		node->avl_child[left] = cright;
    351 		if (cright != NULL) {
    352 			AVL_SETPARENT(cright, node);
    353 			AVL_SETCHILD(cright, left);
    354 		}
    355 
    356 		/*
    357 		 * move node to be child's right child
    358 		 */
    359 		child->avl_child[right] = node;
    360 		AVL_SETBALANCE(node, -child_bal);
    361 		AVL_SETCHILD(node, right);
    362 		AVL_SETPARENT(node, child);
    363 
    364 		/*
    365 		 * update the pointer into this subtree
    366 		 */
    367 		AVL_SETBALANCE(child, child_bal);
    368 		AVL_SETCHILD(child, which_child);
    369 		AVL_SETPARENT(child, parent);
    370 		if (parent != NULL)
    371 			parent->avl_child[which_child] = child;
    372 		else
    373 			tree->avl_root = child;
    374 
    375 		return (child_bal == 0);
    376 	}
    377 
    378 	/* BEGIN CSTYLED */
    379 	/*
    380 	 * case 2 : When node is left heavy, but child is right heavy we use
    381 	 * a different rotation.
    382 	 *
    383 	 *                   (node b:-2)
    384 	 *                    /   \
    385 	 *                   /     \
    386 	 *                  /       \
    387 	 *             (child b:+1)
    388 	 *              /     \
    389 	 *             /       \
    390 	 *                   (gchild b: != 0)
    391 	 *                     /  \
    392 	 *                    /    \
    393 	 *                 gleft   gright
    394 	 *
    395 	 * becomes:
    396 	 *
    397 	 *              (gchild b:0)
    398 	 *              /       \
    399 	 *             /         \
    400 	 *            /           \
    401 	 *        (child b:?)   (node b:?)
    402 	 *         /  \          /   \
    403 	 *        /    \        /     \
    404 	 *            gleft   gright
    405 	 *
    406 	 * computing the new balances is more complicated. As an example:
    407 	 *	 if gchild was right_heavy, then child is now left heavy
    408 	 *		else it is balanced
    409 	 */
    410 	/* END CSTYLED */
    411 	gchild = child->avl_child[right];
    412 	gleft = gchild->avl_child[left];
    413 	gright = gchild->avl_child[right];
    414 
    415 	/*
    416 	 * move gright to left child of node and
    417 	 *
    418 	 * move gleft to right child of node
    419 	 */
    420 	node->avl_child[left] = gright;
    421 	if (gright != NULL) {
    422 		AVL_SETPARENT(gright, node);
    423 		AVL_SETCHILD(gright, left);
    424 	}
    425 
    426 	child->avl_child[right] = gleft;
    427 	if (gleft != NULL) {
    428 		AVL_SETPARENT(gleft, child);
    429 		AVL_SETCHILD(gleft, right);
    430 	}
    431 
    432 	/*
    433 	 * move child to left child of gchild and
    434 	 *
    435 	 * move node to right child of gchild and
    436 	 *
    437 	 * fixup parent of all this to point to gchild
    438 	 */
    439 	balance = AVL_XBALANCE(gchild);
    440 	gchild->avl_child[left] = child;
    441 	AVL_SETBALANCE(child, (balance == right_heavy ? left_heavy : 0));
    442 	AVL_SETPARENT(child, gchild);
    443 	AVL_SETCHILD(child, left);
    444 
    445 	gchild->avl_child[right] = node;
    446 	AVL_SETBALANCE(node, (balance == left_heavy ? right_heavy : 0));
    447 	AVL_SETPARENT(node, gchild);
    448 	AVL_SETCHILD(node, right);
    449 
    450 	AVL_SETBALANCE(gchild, 0);
    451 	AVL_SETPARENT(gchild, parent);
    452 	AVL_SETCHILD(gchild, which_child);
    453 	if (parent != NULL)
    454 		parent->avl_child[which_child] = gchild;
    455 	else
    456 		tree->avl_root = gchild;
    457 
    458 	return (1);	/* the new tree is always shorter */
    459 }
    460 
    461 
    462 /*
    463  * Insert a new node into an AVL tree at the specified (from avl_find()) place.
    464  *
    465  * Newly inserted nodes are always leaf nodes in the tree, since avl_find()
    466  * searches out to the leaf positions.  The avl_index_t indicates the node
    467  * which will be the parent of the new node.
    468  *
    469  * After the node is inserted, a single rotation further up the tree may
    470  * be necessary to maintain an acceptable AVL balance.
    471  */
    472 void
    473 avl_insert(avl_tree_t *tree, void *new_data, avl_index_t where)
    474 {
    475 	avl_node_t *node;
    476 	avl_node_t *parent = AVL_INDEX2NODE(where);
    477 	int old_balance;
    478 	int new_balance;
    479 	int which_child = AVL_INDEX2CHILD(where);
    480 	size_t off = tree->avl_offset;
    481 
    482 	if (tree == NULL) {
    483 		filebench_log(LOG_ERROR, "No Tree Supplied");
    484 		return;
    485 	}
    486 #ifdef _LP64
    487 	if (((uintptr_t)new_data & 0x7) != 0) {
    488 		filebench_log(LOG_ERROR, "Missaligned pointer to new data");
    489 		return;
    490 	}
    491 #endif
    492 
    493 	node = AVL_DATA2NODE(new_data, off);
    494 
    495 	/*
    496 	 * First, add the node to the tree at the indicated position.
    497 	 */
    498 	++tree->avl_numnodes;
    499 
    500 	node->avl_child[0] = NULL;
    501 	node->avl_child[1] = NULL;
    502 
    503 	AVL_SETCHILD(node, which_child);
    504 	AVL_SETBALANCE(node, 0);
    505 	AVL_SETPARENT(node, parent);
    506 	if (parent != NULL) {
    507 		if (parent->avl_child[which_child] != NULL)
    508 			filebench_log(LOG_DEBUG_IMPL,
    509 			    "Overwriting existing pointer");
    510 
    511 		parent->avl_child[which_child] = node;
    512 	} else {
    513 		if (tree->avl_root != NULL)
    514 			filebench_log(LOG_DEBUG_IMPL,
    515 			    "Overwriting existing pointer");
    516 
    517 		tree->avl_root = node;
    518 	}
    519 	/*
    520 	 * Now, back up the tree modifying the balance of all nodes above the
    521 	 * insertion point. If we get to a highly unbalanced ancestor, we
    522 	 * need to do a rotation.  If we back out of the tree we are done.
    523 	 * If we brought any subtree into perfect balance (0), we are also done.
    524 	 */
    525 	for (;;) {
    526 		node = parent;
    527 		if (node == NULL)
    528 			return;
    529 
    530 		/*
    531 		 * Compute the new balance
    532 		 */
    533 		old_balance = AVL_XBALANCE(node);
    534 		new_balance = old_balance + avl_child2balance[which_child];
    535 
    536 		/*
    537 		 * If we introduced equal balance, then we are done immediately
    538 		 */
    539 		if (new_balance == 0) {
    540 			AVL_SETBALANCE(node, 0);
    541 			return;
    542 		}
    543 
    544 		/*
    545 		 * If both old and new are not zero we went
    546 		 * from -1 to -2 balance, do a rotation.
    547 		 */
    548 		if (old_balance != 0)
    549 			break;
    550 
    551 		AVL_SETBALANCE(node, new_balance);
    552 		parent = AVL_XPARENT(node);
    553 		which_child = AVL_XCHILD(node);
    554 	}
    555 
    556 	/*
    557 	 * perform a rotation to fix the tree and return
    558 	 */
    559 	(void) avl_rotation(tree, node, new_balance);
    560 }
    561 
    562 /*
    563  * Insert "new_data" in "tree" in the given "direction" either after or
    564  * before (AVL_AFTER, AVL_BEFORE) the data "here".
    565  *
    566  * Insertions can only be done at empty leaf points in the tree, therefore
    567  * if the given child of the node is already present we move to either
    568  * the AVL_PREV or AVL_NEXT and reverse the insertion direction. Since
    569  * every other node in the tree is a leaf, this always works.
    570  *
    571  * To help developers using this interface, we assert that the new node
    572  * is correctly ordered at every step of the way in DEBUG kernels.
    573  */
    574 void
    575 avl_insert_here(
    576 	avl_tree_t *tree,
    577 	void *new_data,
    578 	void *here,
    579 	int direction)
    580 {
    581 	avl_node_t *node;
    582 	int child = direction;	/* rely on AVL_BEFORE == 0, AVL_AFTER == 1 */
    583 
    584 	if ((tree == NULL) || (new_data == NULL) || (here == NULL) ||
    585 	    !((direction == AVL_BEFORE) || (direction == AVL_AFTER))) {
    586 		filebench_log(LOG_ERROR,
    587 		    "avl_insert_here: Bad Parameters Passed");
    588 		return;
    589 	}
    590 
    591 	/*
    592 	 * If corresponding child of node is not NULL, go to the neighboring
    593 	 * node and reverse the insertion direction.
    594 	 */
    595 	node = AVL_DATA2NODE(here, tree->avl_offset);
    596 
    597 	if (node->avl_child[child] != NULL) {
    598 		node = node->avl_child[child];
    599 		child = 1 - child;
    600 		while (node->avl_child[child] != NULL)
    601 			node = node->avl_child[child];
    602 
    603 	}
    604 	if (node->avl_child[child] != NULL)
    605 		filebench_log(LOG_DEBUG_IMPL, "Overwriting existing pointer");
    606 
    607 	avl_insert(tree, new_data, AVL_MKINDEX(node, child));
    608 }
    609 
    610 /*
    611  * Add a new node to an AVL tree.
    612  */
    613 void
    614 avl_add(avl_tree_t *tree, void *new_node)
    615 {
    616 	avl_index_t where;
    617 
    618 	/*
    619 	 * This is unfortunate. Give up.
    620 	 */
    621 	if (avl_find(tree, new_node, &where) != NULL) {
    622 		filebench_log(LOG_ERROR,
    623 		    "Attempting to insert already inserted node");
    624 		return;
    625 	}
    626 	avl_insert(tree, new_node, where);
    627 }
    628 
    629 /*
    630  * Delete a node from the AVL tree.  Deletion is similar to insertion, but
    631  * with 2 complications.
    632  *
    633  * First, we may be deleting an interior node. Consider the following subtree:
    634  *
    635  *     d           c            c
    636  *    / \         / \          / \
    637  *   b   e       b   e        b   e
    638  *  / \	        / \          /
    639  * a   c       a            a
    640  *
    641  * When we are deleting node (d), we find and bring up an adjacent valued leaf
    642  * node, say (c), to take the interior node's place. In the code this is
    643  * handled by temporarily swapping (d) and (c) in the tree and then using
    644  * common code to delete (d) from the leaf position.
    645  *
    646  * Secondly, an interior deletion from a deep tree may require more than one
    647  * rotation to fix the balance. This is handled by moving up the tree through
    648  * parents and applying rotations as needed. The return value from
    649  * avl_rotation() is used to detect when a subtree did not change overall
    650  * height due to a rotation.
    651  */
    652 void
    653 avl_remove(avl_tree_t *tree, void *data)
    654 {
    655 	avl_node_t *delete;
    656 	avl_node_t *parent;
    657 	avl_node_t *node;
    658 	avl_node_t tmp;
    659 	int old_balance;
    660 	int new_balance;
    661 	int left;
    662 	int right;
    663 	int which_child;
    664 	size_t off = tree->avl_offset;
    665 
    666 	if (tree == NULL) {
    667 		filebench_log(LOG_ERROR, "No Tree Supplied");
    668 		return;
    669 	}
    670 
    671 	delete = AVL_DATA2NODE(data, off);
    672 
    673 	/*
    674 	 * Deletion is easiest with a node that has at most 1 child.
    675 	 * We swap a node with 2 children with a sequentially valued
    676 	 * neighbor node. That node will have at most 1 child. Note this
    677 	 * has no effect on the ordering of the remaining nodes.
    678 	 *
    679 	 * As an optimization, we choose the greater neighbor if the tree
    680 	 * is right heavy, otherwise the left neighbor. This reduces the
    681 	 * number of rotations needed.
    682 	 */
    683 	if (delete->avl_child[0] != NULL && delete->avl_child[1] != NULL) {
    684 
    685 		/*
    686 		 * choose node to swap from whichever side is taller
    687 		 */
    688 		old_balance = AVL_XBALANCE(delete);
    689 		left = avl_balance2child[old_balance + 1];
    690 		right = 1 - left;
    691 
    692 		/*
    693 		 * get to the previous value'd node
    694 		 * (down 1 left, as far as possible right)
    695 		 */
    696 		for (node = delete->avl_child[left];
    697 		    node->avl_child[right] != NULL;
    698 		    node = node->avl_child[right])
    699 			;
    700 
    701 		/*
    702 		 * create a temp placeholder for 'node'
    703 		 * move 'node' to delete's spot in the tree
    704 		 */
    705 		tmp = *node;
    706 
    707 		*node = *delete;
    708 		if (node->avl_child[left] == node)
    709 			node->avl_child[left] = &tmp;
    710 
    711 		parent = AVL_XPARENT(node);
    712 		if (parent != NULL)
    713 			parent->avl_child[AVL_XCHILD(node)] = node;
    714 		else
    715 			tree->avl_root = node;
    716 		AVL_SETPARENT(node->avl_child[left], node);
    717 		AVL_SETPARENT(node->avl_child[right], node);
    718 
    719 		/*
    720 		 * Put tmp where node used to be (just temporary).
    721 		 * It always has a parent and at most 1 child.
    722 		 */
    723 		delete = &tmp;
    724 		parent = AVL_XPARENT(delete);
    725 		parent->avl_child[AVL_XCHILD(delete)] = delete;
    726 		which_child = (delete->avl_child[1] != 0);
    727 		if (delete->avl_child[which_child] != NULL)
    728 			AVL_SETPARENT(delete->avl_child[which_child], delete);
    729 	}
    730 
    731 
    732 	/*
    733 	 * Here we know "delete" is at least partially a leaf node. It can
    734 	 * be easily removed from the tree.
    735 	 */
    736 	if (tree->avl_numnodes == 0) {
    737 		filebench_log(LOG_ERROR,
    738 		    "Deleting Node from already empty tree");
    739 		return;
    740 	}
    741 
    742 	--tree->avl_numnodes;
    743 	parent = AVL_XPARENT(delete);
    744 	which_child = AVL_XCHILD(delete);
    745 	if (delete->avl_child[0] != NULL)
    746 		node = delete->avl_child[0];
    747 	else
    748 		node = delete->avl_child[1];
    749 
    750 	/*
    751 	 * Connect parent directly to node (leaving out delete).
    752 	 */
    753 	if (node != NULL) {
    754 		AVL_SETPARENT(node, parent);
    755 		AVL_SETCHILD(node, which_child);
    756 	}
    757 	if (parent == NULL) {
    758 		tree->avl_root = node;
    759 		return;
    760 	}
    761 	parent->avl_child[which_child] = node;
    762 
    763 
    764 	/*
    765 	 * Since the subtree is now shorter, begin adjusting parent balances
    766 	 * and performing any needed rotations.
    767 	 */
    768 	do {
    769 
    770 		/*
    771 		 * Move up the tree and adjust the balance
    772 		 *
    773 		 * Capture the parent and which_child values for the next
    774 		 * iteration before any rotations occur.
    775 		 */
    776 		node = parent;
    777 		old_balance = AVL_XBALANCE(node);
    778 		new_balance = old_balance - avl_child2balance[which_child];
    779 		parent = AVL_XPARENT(node);
    780 		which_child = AVL_XCHILD(node);
    781 
    782 		/*
    783 		 * If a node was in perfect balance but isn't anymore then
    784 		 * we can stop, since the height didn't change above this point
    785 		 * due to a deletion.
    786 		 */
    787 		if (old_balance == 0) {
    788 			AVL_SETBALANCE(node, new_balance);
    789 			break;
    790 		}
    791 
    792 		/*
    793 		 * If the new balance is zero, we don't need to rotate
    794 		 * else
    795 		 * need a rotation to fix the balance.
    796 		 * If the rotation doesn't change the height
    797 		 * of the sub-tree we have finished adjusting.
    798 		 */
    799 		if (new_balance == 0)
    800 			AVL_SETBALANCE(node, new_balance);
    801 		else if (!avl_rotation(tree, node, new_balance))
    802 			break;
    803 	} while (parent != NULL);
    804 }
    805 
    806 #define	AVL_REINSERT(tree, obj)		\
    807 	avl_remove((tree), (obj));	\
    808 	avl_add((tree), (obj))
    809 
    810 boolean_t
    811 avl_update_lt(avl_tree_t *t, void *obj)
    812 {
    813 	void *neighbor;
    814 
    815 	if (!(((neighbor = AVL_NEXT(t, obj)) == NULL) ||
    816 	    (t->avl_compar(obj, neighbor) <= 0))) {
    817 		filebench_log(LOG_ERROR,
    818 		    "avl_update_lt: Neighbor miss compare");
    819 		return (B_FALSE);
    820 	}
    821 
    822 	neighbor = AVL_PREV(t, obj);
    823 	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) < 0)) {
    824 		AVL_REINSERT(t, obj);
    825 		return (B_TRUE);
    826 	}
    827 
    828 	return (B_FALSE);
    829 }
    830 
    831 boolean_t
    832 avl_update_gt(avl_tree_t *t, void *obj)
    833 {
    834 	void *neighbor;
    835 
    836 	if (!(((neighbor = AVL_PREV(t, obj)) == NULL) ||
    837 	    (t->avl_compar(obj, neighbor) >= 0))) {
    838 		filebench_log(LOG_ERROR,
    839 		    "avl_update_gt: Neighbor miss compare");
    840 		return (B_FALSE);
    841 	}
    842 
    843 	neighbor = AVL_NEXT(t, obj);
    844 	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) > 0)) {
    845 		AVL_REINSERT(t, obj);
    846 		return (B_TRUE);
    847 	}
    848 
    849 	return (B_FALSE);
    850 }
    851 
    852 boolean_t
    853 avl_update(avl_tree_t *t, void *obj)
    854 {
    855 	void *neighbor;
    856 
    857 	neighbor = AVL_PREV(t, obj);
    858 	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) < 0)) {
    859 		AVL_REINSERT(t, obj);
    860 		return (B_TRUE);
    861 	}
    862 
    863 	neighbor = AVL_NEXT(t, obj);
    864 	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) > 0)) {
    865 		AVL_REINSERT(t, obj);
    866 		return (B_TRUE);
    867 	}
    868 
    869 	return (B_FALSE);
    870 }
    871 
    872 /*
    873  * initialize a new AVL tree
    874  */
    875 void
    876 avl_create(avl_tree_t *tree, int (*compar) (const void *, const void *),
    877     size_t size, size_t offset)
    878 {
    879 	if ((tree == NULL) || (compar == NULL) || (size == 0) ||
    880 	    (size < (offset + sizeof (avl_node_t)))) {
    881 		filebench_log(LOG_ERROR,
    882 		    "avl_create: Bad Parameters Passed");
    883 		return;
    884 	}
    885 ;
    886 #ifdef _LP64
    887 	if ((offset & 0x7) != 0) {
    888 		filebench_log(LOG_ERROR, "Missaligned pointer to new data");
    889 		return;
    890 	}
    891 #endif
    892 
    893 	tree->avl_compar = compar;
    894 	tree->avl_root = NULL;
    895 	tree->avl_numnodes = 0;
    896 	tree->avl_size = size;
    897 	tree->avl_offset = offset;
    898 }
    899 
    900 /*
    901  * Delete a tree.
    902  */
    903 /* ARGSUSED */
    904 void
    905 avl_destroy(avl_tree_t *tree)
    906 {
    907 	if ((tree == NULL) || (tree->avl_numnodes != 0) ||
    908 	    (tree->avl_root != NULL))
    909 		filebench_log(LOG_DEBUG_IMPL, "avl_tree: Tree not destroyed");
    910 }
    911 
    912 
    913 /*
    914  * Return the number of nodes in an AVL tree.
    915  */
    916 unsigned long
    917 avl_numnodes(avl_tree_t *tree)
    918 {
    919 	if (tree == NULL) {
    920 		filebench_log(LOG_ERROR, "avl_numnodes: Null tree pointer");
    921 		return (0);
    922 	}
    923 	return (tree->avl_numnodes);
    924 }
    925 
    926 boolean_t
    927 avl_is_empty(avl_tree_t *tree)
    928 {
    929 	if (tree == NULL) {
    930 		filebench_log(LOG_ERROR, "avl_is_empty: Null tree pointer");
    931 		return (0);
    932 	}
    933 	return (tree->avl_numnodes == 0);
    934 }
    935 
    936 #define	CHILDBIT	(1L)
    937 
    938 /*
    939  * Post-order tree walk used to visit all tree nodes and destroy the tree
    940  * in post order. This is used for destroying a tree w/o paying any cost
    941  * for rebalancing it.
    942  *
    943  * example:
    944  *
    945  *	void *cookie = NULL;
    946  *	my_data_t *node;
    947  *
    948  *	while ((node = avl_destroy_nodes(tree, &cookie)) != NULL)
    949  *		free(node);
    950  *	avl_destroy(tree);
    951  *
    952  * The cookie is really an avl_node_t to the current node's parent and
    953  * an indication of which child you looked at last.
    954  *
    955  * On input, a cookie value of CHILDBIT indicates the tree is done.
    956  */
    957 void *
    958 avl_destroy_nodes(avl_tree_t *tree, void **cookie)
    959 {
    960 	avl_node_t	*node;
    961 	avl_node_t	*parent;
    962 	int		child;
    963 	void		*first;
    964 	size_t		off = tree->avl_offset;
    965 
    966 	/*
    967 	 * Initial calls go to the first node or it's right descendant.
    968 	 */
    969 	if (*cookie == NULL) {
    970 		first = avl_first(tree);
    971 
    972 		/*
    973 		 * deal with an empty tree
    974 		 */
    975 		if (first == NULL) {
    976 			*cookie = (void *)CHILDBIT;
    977 			return (NULL);
    978 		}
    979 
    980 		node = AVL_DATA2NODE(first, off);
    981 		parent = AVL_XPARENT(node);
    982 		goto check_right_side;
    983 	}
    984 
    985 	/*
    986 	 * If there is no parent to return to we are done.
    987 	 */
    988 	parent = (avl_node_t *)((uintptr_t)(*cookie) & ~CHILDBIT);
    989 	if (parent == NULL) {
    990 		if (tree->avl_root != NULL) {
    991 			if (tree->avl_numnodes != 1) {
    992 				filebench_log(LOG_DEBUG_IMPL,
    993 				    "avl_destroy_nodes:"
    994 				    " number of nodes wrong");
    995 			}
    996 			tree->avl_root = NULL;
    997 			tree->avl_numnodes = 0;
    998 		}
    999 		return (NULL);
   1000 	}
   1001 
   1002 	/*
   1003 	 * Remove the child pointer we just visited from the parent and tree.
   1004 	 */
   1005 	child = (uintptr_t)(*cookie) & CHILDBIT;
   1006 	parent->avl_child[child] = NULL;
   1007 	if (tree->avl_numnodes <= 1)
   1008 		filebench_log(LOG_DEBUG_IMPL,
   1009 		    "avl_destroy_nodes: number of nodes wrong");
   1010 
   1011 	--tree->avl_numnodes;
   1012 
   1013 	/*
   1014 	 * If we just did a right child or there isn't one, go up to parent.
   1015 	 */
   1016 	if (child == 1 || parent->avl_child[1] == NULL) {
   1017 		node = parent;
   1018 		parent = AVL_XPARENT(parent);
   1019 		goto done;
   1020 	}
   1021 
   1022 	/*
   1023 	 * Do parent's right child, then leftmost descendent.
   1024 	 */
   1025 	node = parent->avl_child[1];
   1026 	while (node->avl_child[0] != NULL) {
   1027 		parent = node;
   1028 		node = node->avl_child[0];
   1029 	}
   1030 
   1031 	/*
   1032 	 * If here, we moved to a left child. It may have one
   1033 	 * child on the right (when balance == +1).
   1034 	 */
   1035 check_right_side:
   1036 	if (node->avl_child[1] != NULL) {
   1037 		if (AVL_XBALANCE(node) != 1)
   1038 			filebench_log(LOG_DEBUG_IMPL,
   1039 			    "avl_destroy_nodes: Tree inconsistency");
   1040 		parent = node;
   1041 		node = node->avl_child[1];
   1042 		if (node->avl_child[0] != NULL ||
   1043 		    node->avl_child[1] != NULL)
   1044 			filebench_log(LOG_DEBUG_IMPL,
   1045 			    "avl_destroy_nodes: Destroying non leaf node");
   1046 	} else {
   1047 
   1048 		if (AVL_XBALANCE(node) > 0)
   1049 			filebench_log(LOG_DEBUG_IMPL,
   1050 			    "avl_destroy_nodes: Tree inconsistency");
   1051 	}
   1052 
   1053 done:
   1054 	if (parent == NULL) {
   1055 		*cookie = (void *)CHILDBIT;
   1056 		if (node != tree->avl_root)
   1057 			filebench_log(LOG_DEBUG_IMPL,
   1058 			    "avl_destroy_nodes: Dangling last node");
   1059 	} else {
   1060 		*cookie = (void *)((uintptr_t)parent | AVL_XCHILD(node));
   1061 	}
   1062 
   1063 	return (AVL_NODE2DATA(node, off));
   1064 }
   1065