summaryrefslogtreecommitdiff
path: root/misc/tsearch.c
diff options
context:
space:
mode:
Diffstat (limited to 'misc/tsearch.c')
-rw-r--r--misc/tsearch.c612
1 files changed, 510 insertions, 102 deletions
diff --git a/misc/tsearch.c b/misc/tsearch.c
index 6af6536a72..466536bf34 100644
--- a/misc/tsearch.c
+++ b/misc/tsearch.c
@@ -1,5 +1,6 @@
-/* Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+/* Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
This file is part of the GNU C Library.
+ Contributed by Bernd Schmidt <crux@Pool.Informatik.RWTH-Aachen.DE>, 1997.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
@@ -16,175 +17,584 @@
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
-/* Tree search generalized from Knuth (6.2.2) Algorithm T just like
- the AT&T man page says.
-
- The node_t structure is for internal use only, lint doesn't grok it.
-
- Written by reading the System V Interface Definition, not the code.
+/* Tree search for red/black trees.
+ The algorithm for adding nodes is taken from one of the many "Algorithms"
+ books by Robert Sedgewick, although the implementation differs.
+ The algorithm for deleting nodes can probably be found in a book named
+ "Introduction to Algorithms" by Cormen/Leiserson/Rivest. At least that's
+ the book that my professor took most algorithms from during the "Data
+ Structures" course...
Totally public domain. */
-/*LINTLIBRARY*/
+
+/* Red/black trees are binary trees in which the edges are colored either red
+ or black. They have the following properties:
+ 1. The number of black edges on every path from the root to a leaf is
+ constant.
+ 2. No two red edges are adjacent.
+ Therefore there is an upper bound on the length of every path, it's
+ O(log n) where n is the number of nodes in the tree. No path can be longer
+ than 1+2*P where P is the length of the shortest path in the tree.
+ Useful for the implementation:
+ 3. If one of the children of a node is NULL, then the other one is red
+ (if it exists).
+
+ In the implementation, not the edges are colored, but the nodes. The color
+ interpreted as the color of the edge leading to this node. The color is
+ meaningless for the root node, but we color the root node black for
+ convenience. All added nodes are red initially.
+
+ Adding to a red/black tree is rather easy. The right place is searched
+ with a usual binary tree search. Additionally, whenever a node N is
+ reached that has two red successors, the successors are colored black and
+ the node itself colored red. This moves red edges up the tree where they
+ pose less of a problem once we get to really insert the new node. Changing
+ N's color to red may violate rule 2, however, so rotations may become
+ necessary to restore the invariants. Adding a new red leaf may violate
+ the same rule, so afterwards an additional check is run and the tree
+ possibly rotated.
+
+ Deleting is hairy. There are mainly two nodes involved: the node to be
+ deleted (n1), and another node that is to be unchained from the tree (n2).
+ If n1 has a successor (the node with a smallest key that is larger than
+ n1), then the successor becomes n2 and its contents are copied into n1,
+ otherwise n1 becomes n2.
+ Unchaining a node may violate rule 1: if n2 is black, one subtree is
+ missing one black edge afterwards. The algorithm must try to move this
+ error upwards towards the root, so that the subtree that does not have
+ enough black edges becomes the whole tree. Once that happens, the error
+ has disappeared. It may not be necessary to go all the way up, since it
+ is possible that rotations and recoloring can fix the error before that.
+
+ Although the deletion algorithm must walk upwards through the tree, we
+ do not store parent pointers in the nodes. Instead, delete allocates a
+ small array of parent pointers and fills it while descending the tree.
+ Since we know that the length of a path is O(log n), where n is the number
+ of nodes, this is likely to use less memory. */
+
+/* Tree rotations look like this:
+ A C
+ / \ / \
+ B C A G
+ / \ / \ --> / \
+ D E F G B F
+ / \
+ D E
+
+ In this case, A has been rotated left. This preserves the ordering of the
+ binary tree. */
#include <stdlib.h>
#include <search.h>
-/* This routine is not very bad. It makes many assumptions about
- the compiler. It assumes that the first field in the node must be
- the "key" field, which points to the datum. It is very tricky
- stuff. H.J. */
-
typedef struct node_t
{
+ /* Callers expect this to be the first element in the structure - do not
+ move! */
const void *key;
struct node_t *left;
struct node_t *right;
+ unsigned int red:1;
+} *node;
+
+#undef DEBUGGING
+
+#ifdef DEBUGGING
+
+/* Routines to check tree invariants. */
+
+#include <assert.h>
+
+#define CHECK_TREE(a) check_tree(a)
+
+static void
+check_tree_recurse (node p, int d_sofar, int d_total)
+{
+ if (p == NULL)
+ {
+ assert (d_sofar == d_total);
+ return;
+ }
+
+ check_tree_recurse (p->left, d_sofar + (p->left && !p->left->red), d_total);
+ check_tree_recurse (p->right, d_sofar + (p->right && !p->right->red), d_total);
+ if (p->left)
+ assert (!(p->left->red && p->red));
+ if (p->right)
+ assert (!(p->right->red && p->red));
+}
+
+static void
+check_tree (node root)
+{
+ int cnt = 0;
+ node p;
+ if (root == NULL)
+ return;
+ root->red = 0;
+ for(p = root->left; p; p = p->left)
+ cnt += !p->red;
+ check_tree_recurse (root, 0, cnt);
}
-node;
-/* Prototype fpr local function. */
-static void trecurse __P ((const void *vroot, __action_fn_t action, int level));
+#else
-/* find or insert datum into search tree.
-char *key; key to be located
-node **rootp; address of tree root
-int (*compar)(); ordering function
-*/
+#define CHECK_TREE(a)
+
+#endif
+
+/* Possibly "split" a node with two red successors, and/or fix up two red
+ edges in a row. ROOTP is a pointer to the lowest node we visited, PARENTP
+ and GPARENTP pointers to its parent/grandparent. P_R and GP_R contain the
+ comparison values that determined which way was taken in the tree to reach
+ ROOTP. MODE is 1 if we need not do the split, but must check for two red
+ edges between GPARENTP and ROOTP. */
+static void
+maybe_split_for_insert (node *rootp, node *parentp, node *gparentp,
+ int p_r, int gp_r, int mode)
+{
+ node root = *rootp;
+ node *rp, *lp;
+ rp = &(*rootp)->right;
+ lp = &(*rootp)->left;
+
+ /* See if we have to split this node (both successors red). */
+ if (mode == 1
+ || ((*rp) != NULL && (*lp) != NULL && (*rp)->red && (*lp)->red))
+ {
+ /* This node becomes red, its successors black. */
+ root->red = 1;
+ if (*rp)
+ (*rp)->red = 0;
+ if (*lp)
+ (*lp)->red = 0;
+
+ /* If the parent of this node is also red, we have to do
+ rotations. */
+ if (parentp != NULL && (*parentp)->red)
+ {
+ node gp = *gparentp;
+ node p = *parentp;
+ /* There are two main cases:
+ 1. The edge types (left or right) of the two red edges differ.
+ 2. Both red edges are of the same type.
+ There exist two symmetries of each case, so there is a total of
+ 4 cases. */
+ if ((p_r > 0) != (gp_r > 0))
+ {
+ /* Put the child at the top of the tree, with its parent
+ and grandparent as successors. */
+ p->red = 1;
+ gp->red = 1;
+ root->red = 0;
+ if (p_r < 0)
+ {
+ /* Child is left of parent. */
+ p->left = *rp;
+ *rp = p;
+ gp->right = *lp;
+ *lp = gp;
+ }
+ else
+ {
+ /* Child is right of parent. */
+ p->right = *lp;
+ *lp = p;
+ gp->left = *rp;
+ *rp = gp;
+ }
+ *gparentp = root;
+ }
+ else
+ {
+ *gparentp = *parentp;
+ /* Parent becomes the top of the tree, grandparent and
+ child are its successors. */
+ p->red = 0;
+ gp->red = 1;
+ if (p_r < 0)
+ {
+ /* Left edges. */
+ gp->left = p->right;
+ p->right = gp;
+ }
+ else
+ {
+ /* Right edges. */
+ gp->right = p->left;
+ p->left = gp;
+ }
+ }
+ }
+ }
+}
+
+/* Find or insert datum into search tree.
+ KEY is the key to be located, ROOTP is the address of tree root,
+ COMPAR the ordering function. */
void *
-__tsearch (key, vrootp, compar)
- const void *key;
- void **vrootp;
- __compar_fn_t compar;
+__tsearch (const void *key, void **vrootp, __compar_fn_t compar)
{
- node *q;
- node **rootp = (node **) vrootp;
+ node q;
+ node *parentp = NULL, *gparentp = NULL;
+ node *rootp = (node *) vrootp;
+ node *nextp;
+ int r = 0, p_r = 0, gp_r = 0; /* No they might not, Mr Compiler. */
if (rootp == NULL)
return NULL;
- while (*rootp != NULL) /* Knuth's T1: */
- {
- int r;
+ /* This saves some additional tests below. */
+ if (*rootp != NULL)
+ (*rootp)->red = 0;
+
+ CHECK_TREE (*rootp);
- r = (*compar) (key, (*rootp)->key);
- if (r == 0) /* T2: */
- return *rootp; /* we found it! */
- rootp = (r < 0)
- ? &(*rootp)->left /* T3: follow left branch */
- : &(*rootp)->right; /* T4: follow right branch */
+ nextp = rootp;
+ while (*nextp != NULL)
+ {
+ node root = *rootp;
+ r = (*compar) (key, root->key);
+ if (r == 0)
+ return root;
+
+ maybe_split_for_insert (rootp, parentp, gparentp, p_r, gp_r, 0);
+ /* If that did any rotations, parentp and gparentp are now garbage.
+ That doesn't matter, because the values they contain are never
+ used again in that case. */
+
+ nextp = r < 0 ? &root->left : &root->right;
+ if (*nextp == NULL)
+ break;
+
+ gparentp = parentp;
+ parentp = rootp;
+ rootp = nextp;
+
+ gp_r = p_r;
+ p_r = r;
}
- q = (node *) malloc (sizeof (node)); /* T5: key not found */
- if (q != NULL) /* make new node */
+ q = (struct node_t *) malloc (sizeof (struct node_t));
+ if (q != NULL)
{
- *rootp = q; /* link new node to old */
+ *nextp = q; /* link new node to old */
q->key = key; /* initialize new node */
+ q->red = 1;
q->left = q->right = NULL;
}
+ if (nextp != rootp)
+ /* There may be two red edges in a row now, which we must avoid by
+ rotating the tree. */
+ maybe_split_for_insert (nextp, rootp, parentp, r, p_r, 1);
return q;
}
weak_alias (__tsearch, tsearch)
+/* Find datum in search tree.
+ KEY is the key to be located, ROOTP is the address of tree root,
+ COMPAR the ordering function. */
void *
__tfind (key, vrootp, compar)
const void *key;
const void **vrootp;
__compar_fn_t compar;
{
- node **rootp = (node **) vrootp;
+ node *rootp = (node *) vrootp;
if (rootp == NULL)
return NULL;
- while (*rootp != NULL) /* Knuth's T1: */
+ CHECK_TREE (*rootp);
+
+ while (*rootp != NULL)
{
+ node root = *rootp;
int r;
- r = (*compar)(key, (*rootp)->key);
- if (r == 0) /* T2: */
- return *rootp; /* we found it! */
+ r = (*compar) (key, root->key);
+ if (r == 0)
+ return root;
- rootp = (r < 0)
- ? &(*rootp)->left /* T3: follow left branch */
- : &(*rootp)->right; /* T4: follow right branch */
+ rootp = r < 0 ? &root->left : &root->right;
}
- return NULL;
+ return NULL;
}
weak_alias (__tfind, tfind)
-/* delete node with given key
-char *key; key to be deleted
-node **rootp; address of the root of tree
-int (*compar)(); comparison function
-*/
+/* Delete node with given key.
+ KEY is the key to be deleted, ROOTP is the address of the root of tree,
+ COMPAR the comparison function. */
void *
-__tdelete (key, vrootp, compar)
- const void *key;
- void **vrootp;
- __compar_fn_t compar;
+__tdelete (const void *key, void **vrootp, __compar_fn_t compar)
{
- node *p;
- node *q;
- node *r;
+ node p, q, r, retval;
int cmp;
- node **rootp = (node **) vrootp;
+ node *rootp = (node *) vrootp;
+ node root, unchained;
+ /* Stack of nodes so we remember the parents without recursion. It's
+ _very_ unlikely that there are paths longer than 40 nodes. The tree
+ would need to have around 250.000 nodes. */
+ int stacksize = 40;
+ int sp = 0;
+ node **nodestack = alloca (sizeof (node *) * stacksize);
- if (rootp == NULL || (p = *rootp) == NULL)
+ if (rootp == NULL)
return NULL;
+ p = *rootp;
+ if (p == NULL)
+ return NULL;
+
+ CHECK_TREE (p);
while ((cmp = (*compar) (key, (*rootp)->key)) != 0)
{
+ if (sp == stacksize)
+ {
+ node **newstack;
+ stacksize += 20;
+ newstack = alloca (sizeof (node *) * stacksize);
+ memcpy (newstack, nodestack, sp * sizeof (node *));
+ nodestack = newstack;
+ }
+
+ nodestack[sp++] = rootp;
p = *rootp;
- rootp = (cmp < 0)
- ? &(*rootp)->left /* follow left branch */
- : &(*rootp)->right; /* follow right branch */
+ rootp = ((cmp < 0)
+ ? &(*rootp)->left
+ : &(*rootp)->right);
if (*rootp == NULL)
- return NULL; /* key not found */
+ return NULL;
}
- r = (*rootp)->right; /* D1: */
- q = (*rootp)->left;
- if (q == NULL) /* Left NULL? */
- q = r;
- else if (r != NULL) /* Right link is NULL? */
+ /* This is bogus if the node to be deleted is the root... this routine
+ really should return an integer with 0 for success, -1 for failure
+ and errno = ESRCH or something. */
+ retval = p;
+
+ /* We don't unchain the node we want to delete. Instead, we overwrite
+ it with its successor and unchain the successor. If there is no
+ successor, we really unchain the node to be deleted. */
+
+ root = *rootp;
+
+ r = root->right;
+ q = root->left;
+
+ if (q == NULL || r == NULL)
+ unchained = root;
+ else
{
- if (r->left == NULL) /* D2: Find successor */
+ node *parent = rootp, *up = &root->right;
+ for (;;)
{
- r->left = q;
- q = r;
+ if (sp == stacksize)
+ {
+ node **newstack;
+ stacksize += 20;
+ newstack = alloca (sizeof (node *) * stacksize);
+ memcpy (newstack, nodestack, sp * sizeof (node *));
+ nodestack = newstack;
+ }
+ nodestack[sp++] = parent;
+ parent = up;
+ if ((*up)->left == NULL)
+ break;
+ up = &(*up)->left;
}
+ unchained = *up;
+ }
+
+ /* We know that either the left or right successor of UNCHAINED is NULL.
+ R becomes the other one, it is chained into the parent of UNCHAINED. */
+ r = unchained->left;
+ if (r == NULL)
+ r = unchained->right;
+ if (sp == 0)
+ *rootp = r;
+ else
+ {
+ q = *nodestack[sp-1];
+ if (unchained == q->right)
+ q->right = r;
else
- { /* D3: Find (struct node_t *)0 link */
- for (q = r->left; q->left != NULL; q = r->left)
- r = q;
- r->left = q->right;
- q->left = (*rootp)->left;
- q->right = (*rootp)->right;
+ q->left = r;
+ }
+
+ if (unchained != root)
+ root->key = unchained->key;
+ if (!unchained->red)
+ {
+ /* Now we lost a black edge, which means that the number of black
+ edges on every path is no longer constant. We must balance the
+ tree. */
+ /* NODESTACK now contains all parents of R. R is likely to be NULL
+ in the first iteration. */
+ /* NULL nodes are considered black throughout - this is necessary for
+ correctness. */
+ while (sp > 0 && (r == NULL || !r->red))
+ {
+ node *pp = nodestack[sp - 1];
+ p = *pp;
+ /* Two symmetric cases. */
+ if (r == p->left)
+ {
+ /* Q is R's brother, P is R's parent. The subtree with root
+ R has one black edge less than the subtree with root Q. */
+ q = p->right;
+ if (q != NULL && q->red)
+ {
+ /* If Q is red, we know that P is black. We rotate P left
+ so that Q becomes the top node in the tree, with P below
+ it. P is colored red, Q is colored black.
+ This action does not change the black edge count for any
+ leaf in the tree, but we will be able to recognize one
+ of the following situations, which all require that Q
+ is black. */
+ q->red = 0;
+ p->red = 1;
+ /* Left rotate p. */
+ p->right = q->left;
+ q->left = p;
+ *pp = q;
+ /* Make sure pp is right if the case below tries to use
+ it. */
+ nodestack[sp++] = pp = &q->left;
+ q = p->right;
+ }
+ /* We know that Q can't be NULL here. We also know that Q is
+ black. */
+ if ((q->left == NULL || !q->left->red)
+ && (q->right == NULL || !q->right->red))
+ {
+ /* Q has two black successors. We can simply color Q red.
+ The whole subtree with root P is now missing one black
+ edge. Note that this action can temporarily make the
+ tree invalid (if P is red). But we will exit the loop
+ in that case and set P black, which both makes the tree
+ valid and also makes the black edge count come out
+ right. If P is black, we are at least one step closer
+ to the root and we'll try again the next iteration. */
+ q->red = 1;
+ r = p;
+ }
+ else
+ {
+ /* Q is black, one of Q's successors is red. We can
+ repair the tree with one operation and will exit the
+ loop afterwards. */
+ if (q->right == NULL || !q->right->red)
+ {
+ /* The left one is red. We perform the same action as
+ in maybe_split_for_insert where two red edges are
+ adjacent but point in different directions:
+ Q's left successor (let's call it Q2) becomes the
+ top of the subtree we are looking at, its parent (Q)
+ and grandparent (P) become its successors. The former
+ successors of Q2 are placed below P and Q.
+ P becomes black, and Q2 gets the color that P had.
+ This changes the black edge count only for node R and
+ its successors. */
+ node q2 = q->left;
+ q2->red = p->red;
+ p->right = q2->left;
+ q->left = q2->right;
+ q2->right = q;
+ q2->left = p;
+ *pp = q2;
+ p->red = 0;
+ }
+ else
+ {
+ /* It's the right one. Rotate P left. P becomes black,
+ and Q gets the color that P had. Q's right successor
+ also becomes black. This changes the black edge
+ count only for node R and its successors. */
+ q->red = p->red;
+ p->red = 0;
+
+ q->right->red = 0;
+
+ /* left rotate p */
+ p->right = q->left;
+ q->left = p;
+ *pp = q;
+ }
+
+ /* We're done. */
+ sp = 1;
+ r = NULL;
+ }
+ }
+ else
+ {
+ /* Comments: see above. */
+ q = p->left;
+ if (q != NULL && q->red)
+ {
+ q->red = 0;
+ p->red = 1;
+ p->left = q->right;
+ q->right = p;
+ *pp = q;
+ nodestack[sp++] = pp = &q->right;
+ q = p->left;
+ }
+ if ((q->right == NULL || !q->right->red)
+ && (q->left == NULL || !q->left->red))
+ {
+ q->red = 1;
+ r = p;
+ }
+ else
+ {
+ if (q->left == NULL || !q->left->red)
+ {
+ node q2 = q->right;
+ q2->red = p->red;
+ p->left = q2->right;
+ q->right = q2->left;
+ q2->left = q;
+ q2->right = p;
+ *pp = q2;
+ p->red = 0;
+ }
+ else
+ {
+ q->red = p->red;
+ p->red = 0;
+ q->left->red = 0;
+ p->left = q->right;
+ q->right = p;
+ *pp = q;
+ }
+ sp = 1;
+ r = NULL;
+ }
+ }
+ --sp;
}
+ if (r != NULL)
+ r->red = 0;
}
- free ((struct node_t *) *rootp); /* D4: Free node */
- *rootp = q; /* link parent to new node */
- return p;
+
+ free (unchained);
+ return retval;
}
weak_alias (__tdelete, tdelete)
-/* Walk the nodes of a tree
-node *root; Root of the tree to be walked
-void (*action)(); Function to be called at each node
-int level;
-*/
+/* Walk the nodes of a tree.
+ ROOT is the root of the tree to be walked, ACTION the function to be
+ called at each node. LEVEL is the level of ROOT in the whole tree. */
static void
-trecurse (vroot, action, level)
- const void *vroot;
- __action_fn_t action;
- int level;
+trecurse (const void *vroot, __action_fn_t action, int level)
{
- node *root = (node *) vroot;
+ node root = (node ) vroot;
if (root->left == NULL && root->right == NULL)
(*action) (root, leaf, level);
@@ -201,17 +611,15 @@ trecurse (vroot, action, level)
}
-/* void twalk(root, action) Walk the nodes of a tree
-node *root; Root of the tree to be walked
-void (*action)(); Function to be called at each node
-PTR
-*/
+/* Walk the nodes of a tree.
+ ROOT is the root of the tree to be walked, ACTION the function to be
+ called at each node. */
void
-__twalk (vroot, action)
- const void *vroot;
- __action_fn_t action;
+__twalk (const void *vroot, __action_fn_t action)
{
- const node *root = (node *) vroot;
+ const node root = (node) vroot;
+
+ CHECK_TREE (root);
if (root != NULL && action != NULL)
trecurse (root, action, 0);