summaryrefslogtreecommitdiff
path: root/db2/hash
diff options
context:
space:
mode:
Diffstat (limited to 'db2/hash')
-rw-r--r--db2/hash/hash.c1440
-rw-r--r--db2/hash/hash.src211
-rw-r--r--db2/hash/hash_auto.c1343
-rw-r--r--db2/hash/hash_conv.c101
-rw-r--r--db2/hash/hash_debug.c96
-rw-r--r--db2/hash/hash_dup.c544
-rw-r--r--db2/hash/hash_func.c219
-rw-r--r--db2/hash/hash_page.c1775
-rw-r--r--db2/hash/hash_rec.c810
-rw-r--r--db2/hash/hash_stat.c58
10 files changed, 6597 insertions, 0 deletions
diff --git a/db2/hash/hash.c b/db2/hash/hash.c
new file mode 100644
index 0000000000..6d8c40057d
--- /dev/null
+++ b/db2/hash/hash.c
@@ -0,0 +1,1440 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ * Sleepycat Software. All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994
+ * Margo Seltzer. All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)hash.c 10.25 (Sleepycat) 8/24/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "shqueue.h"
+#include "db_int.h"
+#include "db_page.h"
+#include "db_am.h"
+#include "db_ext.h"
+#include "hash.h"
+#include "log.h"
+
+static int __ham_c_close __P((DBC *));
+static int __ham_c_del __P((DBC *, int));
+static int __ham_c_get __P((DBC *, DBT *, DBT *, int));
+static int __ham_c_put __P((DBC *, DBT *, DBT *, int));
+static int __ham_c_init __P((DB *, DB_TXN *, DBC **));
+static int __ham_cursor __P((DB *, DB_TXN *, DBC **));
+static int __ham_delete __P((DB *, DB_TXN *, DBT *, int));
+static int __ham_dup_return __P((HTAB *, HASH_CURSOR *, DBT *, int));
+static int __ham_get __P((DB *, DB_TXN *, DBT *, DBT *, int));
+static void __ham_init_htab __P((HTAB *));
+static int __ham_lookup __P((HTAB *,
+ HASH_CURSOR *, const DBT *, u_int32_t, db_lockmode_t));
+static int __ham_overwrite __P((HTAB *, HASH_CURSOR *, DBT *));
+static int __ham_put __P((DB *, DB_TXN *, DBT *, DBT *, int));
+static int __ham_sync __P((DB *, int));
+
+/************************** INTERFACE ROUTINES ***************************/
+/* OPEN/CLOSE */
+
+/*
+ * __ham_open --
+ *
+ * PUBLIC: int __ham_open __P((DB *, DB_INFO *));
+ */
+int
+__ham_open(dbp, dbinfo)
+ DB *dbp;
+ DB_INFO *dbinfo;
+{
+ DB_ENV *dbenv;
+ DBC *curs;
+ HTAB *hashp;
+ int file_existed, ret;
+
+ dbenv = dbp->dbenv;
+
+ if ((hashp = (HTAB *)calloc(1, sizeof(HTAB))) == NULL)
+ return (ENOMEM);
+ hashp->dbp = dbp;
+
+ /* Set the hash function if specified by the user. */
+ if (dbinfo != NULL && dbinfo->h_hash != NULL)
+ hashp->hash = dbinfo->h_hash;
+
+ /*
+ * Initialize the remaining fields of the dbp. The type, close and
+ * fd functions are all set in db_open.
+ */
+ dbp->internal = hashp;
+ dbp->cursor = __ham_cursor;
+ dbp->del = __ham_delete;
+ dbp->get = __ham_get;
+ dbp->put = __ham_put;
+ dbp->sync = __ham_sync;
+
+ /* If locking is turned on, lock the meta data page. */
+ if (F_ISSET(dbp, DB_AM_LOCKING)) {
+ dbp->lock.pgno = BUCKET_INVALID;
+ if ((ret = lock_get(dbenv->lk_info, dbp->locker,
+ 0, &dbp->lock_dbt, DB_LOCK_READ, &hashp->hlock)) != 0) {
+ if (ret < 0)
+ ret = EAGAIN;
+ goto out;
+ }
+ }
+
+ /*
+ * Now, we can try to read the meta-data page and figure out
+ * if we set up locking and get the meta-data page properly.
+ * If this is a new file, initialize it, and put it back dirty.
+ */
+ if ((ret = __ham_get_page(hashp->dbp, 0, (PAGE **)&hashp->hdr)) != 0)
+ goto out;
+
+ /* Initialize the hashp structure */
+ if (hashp->hdr->magic == DB_HASHMAGIC) {
+ file_existed = 1;
+ /* File exists, verify the data in the header. */
+ if (hashp->hash == NULL)
+ hashp->hash =
+ hashp->hdr->version < 5 ? __ham_func4 : __ham_func5;
+ if (hashp->hash(CHARKEY, sizeof(CHARKEY)) !=
+ hashp->hdr->h_charkey) {
+ __db_err(hashp->dbp->dbenv,
+ "hash: incompatible hash function");
+ ret = EINVAL;
+ goto out;
+ }
+ if (F_ISSET(hashp->hdr, DB_HASH_DUP))
+ F_SET(dbp, DB_AM_DUP);
+ } else {
+ /*
+ * File does not exist, we must initialize the header. If
+ * locking is enabled that means getting a write lock first.
+ */
+ file_existed = 0;
+ if (F_ISSET(dbp, DB_AM_LOCKING) &&
+ ((ret = lock_put(dbenv->lk_info, hashp->hlock)) != 0 ||
+ (ret = lock_get(dbenv->lk_info, dbp->locker, 0,
+ &dbp->lock_dbt, DB_LOCK_WRITE, &hashp->hlock)) != 0)) {
+ if (ret < 0)
+ ret = EAGAIN;
+ goto out;
+ }
+
+ hashp->hdr->nelem = dbinfo != NULL ? dbinfo->h_nelem : 0;
+ hashp->hdr->ffactor =
+ dbinfo != NULL && dbinfo->h_ffactor ? dbinfo->h_ffactor : 0;
+ __ham_init_htab(hashp);
+ if (F_ISSET(dbp, DB_AM_DUP))
+ F_SET(hashp->hdr, DB_HASH_DUP);
+ if ((ret = __ham_dirty_page(hashp, (PAGE *)hashp->hdr)) != 0)
+ goto out;
+ }
+
+ /* Initialize the default cursor. */
+ __ham_c_init(dbp, NULL, &curs);
+ TAILQ_INSERT_TAIL(&dbp->curs_queue, curs, links);
+
+ /* Allocate memory for our split buffer. */
+ if ((hashp->split_buf = (PAGE *)malloc(dbp->pgsize)) == NULL) {
+ ret = ENOMEM;
+ goto out;
+ }
+
+#ifdef NO_STATISTICS_FOR_DB_ERR
+ __db_err(dbp->dbenv,
+ "%s%lx\n%s%ld\n%s%ld\n%s%ld\n%s%ld\n%s0x%lx\n%s0x%lx\n%s%ld\n%s%ld\n%s0x%lx",
+ "TABLE POINTER ", (long)hashp,
+ "BUCKET SIZE ", (long)hashp->hdr->pagesize,
+ "FILL FACTOR ", (long)hashp->hdr->ffactor,
+ "MAX BUCKET ", (long)hashp->hdr->max_bucket,
+ "OVFL POINT ", (long)hashp->hdr->ovfl_point,
+ "LAST FREED ", (long)hashp->hdr->last_freed,
+ "HIGH MASK ", (long)hashp->hdr->high_mask,
+ "LOW MASK ", (long)hashp->hdr->low_mask,
+ "NELEM ", (long)hashp->hdr->nelem,
+ "FLAGS ", (long)hashp->hdr->flags);
+#endif
+
+ /* Release the meta data page */
+ (void)__ham_put_page(hashp->dbp, (PAGE *)hashp->hdr, 0);
+ if (F_ISSET(dbp, DB_AM_LOCKING) &&
+ (ret = lock_put(dbenv->lk_info, hashp->hlock)) != 0) {
+ if (ret < 0)
+ ret = EAGAIN;
+ goto out;
+ }
+
+ hashp->hlock = 0;
+ hashp->hdr = NULL;
+ /* Sync the file so that we know that the meta data goes to disk. */
+ if (!file_existed && (ret = dbp->sync(dbp, 0)) != 0)
+ goto out;
+ return (0);
+
+out: (void)__ham_close(dbp);
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __ham_close __P((DB *));
+ */
+int
+__ham_close(dbp)
+ DB *dbp;
+{
+ HTAB *hashp;
+ int ret, t_ret;
+
+ DEBUG_LWRITE(dbp, NULL, "ham_close", NULL, NULL, 0);
+ hashp = (HTAB *)dbp->internal;
+ ret = 0;
+
+ /* Free the split page. */
+ if (hashp->split_buf)
+ FREE(hashp->split_buf, dbp->pgsize);
+
+ if (hashp->hdr && (t_ret = __ham_put_page(hashp->dbp,
+ (PAGE *)hashp->hdr, 0)) != 0 && ret == 0)
+ ret = t_ret;
+ if (hashp->hlock && (t_ret = lock_put(hashp->dbp->dbenv->lk_info,
+ hashp->hlock)) != 0 && ret == 0)
+ ret = t_ret;
+
+ FREE(hashp, sizeof(HTAB));
+ dbp->internal = NULL;
+ return (ret);
+}
+
+/************************** LOCAL CREATION ROUTINES **********************/
+/*
+ * Returns 0 on No Error
+ */
+static void
+__ham_init_htab(hashp)
+ HTAB *hashp;
+{
+ u_int32_t nelem;
+ int32_t l2, nbuckets;
+
+ nelem = hashp->hdr->nelem;
+ hashp->hdr->pagesize = hashp->dbp->pgsize;
+ ZERO_LSN(hashp->hdr->lsn);
+ hashp->hdr->magic = DB_HASHMAGIC;
+ hashp->hdr->version = DB_HASHVERSION;
+ if (hashp->hash == NULL)
+ hashp->hash =
+ hashp->hdr->version < 5 ? __ham_func4 : __ham_func5;
+ hashp->hdr->h_charkey = hashp->hash(CHARKEY, sizeof(CHARKEY));
+ if (nelem != 0 && hashp->hdr->ffactor != 0) {
+ nelem = (nelem - 1) / hashp->hdr->ffactor + 1;
+ l2 = __db_log2(nelem > 2 ? nelem : 2);
+ } else
+ l2 = 2;
+
+ nbuckets = 1 << l2;
+
+ hashp->hdr->spares[l2] = 0;
+ hashp->hdr->spares[l2 + 1] = 0;
+ hashp->hdr->ovfl_point = l2;
+ hashp->hdr->last_freed = PGNO_INVALID;
+
+ hashp->hdr->max_bucket = hashp->hdr->high_mask = nbuckets - 1;
+ hashp->hdr->low_mask = (nbuckets >> 1) - 1;
+ memcpy(hashp->hdr->uid, hashp->dbp->lock.fileid, DB_FILE_ID_LEN);
+}
+
+/********************** DESTROY/CLOSE ROUTINES ************************/
+
+
+/*
+ * Write modified pages to disk
+ *
+ * Returns:
+ * 0 == OK
+ * -1 ERROR
+ */
+static int
+__ham_sync(dbp, flags)
+ DB *dbp;
+ int flags;
+{
+ int ret;
+
+ DEBUG_LWRITE(dbp, NULL, "ham_sync", NULL, NULL, flags);
+ if ((ret = __db_syncchk(dbp, flags)) != 0)
+ return (ret);
+ if (F_ISSET(dbp, DB_AM_RDONLY))
+ return (0);
+
+ if ((ret = memp_fsync(dbp->mpf)) == DB_INCOMPLETE)
+ ret = 0;
+
+ return (ret);
+}
+
+/*******************************SEARCH ROUTINES *****************************/
+/*
+ * All the access routines return
+ *
+ * Returns:
+ * 0 on SUCCESS
+ * 1 to indicate an external ERROR (i.e. key not found, etc)
+ * -1 to indicate an internal ERROR (i.e. out of memory, etc)
+ */
+
+static int
+__ham_get(dbp, txn, key, data, flags)
+ DB *dbp;
+ DB_TXN *txn;
+ DBT *key;
+ DBT *data;
+ int flags;
+{
+ DB *ldbp;
+ DBC *cp;
+ HTAB *hashp;
+ HASH_CURSOR *hcp;
+ int ret, t_ret;
+
+ DEBUG_LREAD(dbp, txn, "ham_get", key, NULL, flags);
+ if ((ret = __db_getchk(dbp, key, data, flags)) != 0)
+ return (ret);
+
+ ldbp = dbp;
+ if (F_ISSET(dbp, DB_AM_THREAD) &&
+ (ret = __db_gethandle(dbp, __ham_hdup, &ldbp)) != 0)
+ return (ret);
+
+ hashp = (HTAB *)ldbp->internal;
+ SET_LOCKER(ldbp, txn);
+ GET_META(ldbp, hashp);
+ cp = TAILQ_FIRST(&ldbp->curs_queue);
+
+ hashp->hash_accesses++;
+ hcp = (HASH_CURSOR *)TAILQ_FIRST(&ldbp->curs_queue)->internal;
+ if ((ret = __ham_lookup(hashp, hcp, key, 0, DB_LOCK_READ)) == 0)
+ if (F_ISSET(hcp, H_OK))
+ ret = __ham_dup_return(hashp, hcp, data, DB_FIRST);
+ else /* Key was not found */
+ ret = DB_NOTFOUND;
+
+ if ((t_ret = __ham_item_done(hashp, hcp, 0)) != 0 && ret == 0)
+ ret = t_ret;
+ RELEASE_META(ldbp, hashp);
+ if (F_ISSET(dbp, DB_AM_THREAD))
+ __db_puthandle(ldbp);
+ return (ret);
+}
+
+static int
+__ham_put(dbp, txn, key, data, flags)
+ DB *dbp;
+ DB_TXN *txn;
+ DBT *key;
+ DBT *data;
+ int flags;
+{
+ DB *ldbp;
+ HTAB *hashp;
+ HASH_CURSOR *hcp;
+ DBT tmp_val, *myval;
+ int ret, t_ret;
+ u_int32_t nbytes;
+
+ DEBUG_LWRITE(dbp, txn, "ham_put", key, data, flags);
+ if ((ret = __db_putchk(dbp, key, data,
+ flags, F_ISSET(dbp, DB_AM_RDONLY), F_ISSET(dbp, DB_AM_DUP))) != 0)
+ return (ret);
+
+ ldbp = dbp;
+ if (F_ISSET(dbp, DB_AM_THREAD) &&
+ (ret = __db_gethandle(dbp, __ham_hdup, &ldbp)) != 0)
+ return (ret);
+
+ hashp = (HTAB *)ldbp->internal;
+ SET_LOCKER(ldbp, txn);
+ GET_META(ldbp, hashp);
+ hcp = TAILQ_FIRST(&ldbp->curs_queue)->internal;
+
+ nbytes = (ISBIG(hashp, key->size) ? HOFFPAGE_PSIZE :
+ HKEYDATA_PSIZE(key->size)) +
+ (ISBIG(hashp, data->size) ? HOFFPAGE_PSIZE :
+ HKEYDATA_PSIZE(data->size));
+
+ hashp->hash_accesses++;
+ ret = __ham_lookup(hashp, hcp, key, nbytes, DB_LOCK_WRITE);
+
+ if (ret == DB_NOTFOUND) {
+ ret = 0;
+ if (hcp->seek_found_page != PGNO_INVALID &&
+ hcp->seek_found_page != hcp->pgno) {
+ if ((ret = __ham_item_done(hashp, hcp, 0)) != 0)
+ goto out;
+ hcp->pgno = hcp->seek_found_page;
+ hcp->bndx = NDX_INVALID;
+ }
+
+ if (F_ISSET(data, DB_DBT_PARTIAL) && data->doff != 0) {
+ /*
+ * Doing a partial put, but the key does not exist
+ * and we are not beginning the write at 0. We
+ * must create a data item padded up to doff and
+ * then write the new bytes represented by val.
+ */
+ ret = __ham_init_dbt(&tmp_val, data->size + data->doff,
+ &hcp->big_data, &hcp->big_datalen);
+ if (ret == 0) {
+ memset(tmp_val.data, 0, data->doff);
+ memcpy((u_int8_t *)tmp_val.data + data->doff,
+ data->data, data->size);
+ myval = &tmp_val;
+ }
+ } else
+ myval = (DBT *)data;
+
+ if (ret == 0)
+ ret = __ham_add_el(hashp, hcp, key, myval, H_KEYDATA);
+ } else if (ret == 0 && F_ISSET(hcp, H_OK)) {
+ if (flags == DB_NOOVERWRITE)
+ ret = DB_KEYEXIST;
+ else if (F_ISSET(ldbp, DB_AM_DUP))
+ ret = __ham_add_dup(hashp, hcp, data, DB_KEYLAST);
+ else
+ ret = __ham_overwrite(hashp, hcp, data);
+ }
+
+ /* Free up all the cursor pages. */
+ if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
+ ret = t_ret;
+ /* Now check if we have to grow. */
+out: if (ret == 0 && F_ISSET(hcp, H_EXPAND)) {
+ ret = __ham_expand_table(hashp);
+ F_CLR(hcp, H_EXPAND);
+ }
+
+ if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
+ ret = t_ret;
+ RELEASE_META(ldbp, hashp);
+ if (F_ISSET(dbp, DB_AM_THREAD))
+ __db_puthandle(ldbp);
+ return (ret);
+}
+
+static int
+__ham_cursor(dbp, txnid, dbcp)
+ DB *dbp;
+ DB_TXN *txnid;
+ DBC **dbcp;
+{
+ int ret;
+
+ DEBUG_LWRITE(dbp, txnid, "ham_cursor", NULL, NULL, 0);
+ if ((ret = __ham_c_init(dbp, txnid, dbcp)) != 0)
+ return (ret);
+
+ DB_THREAD_LOCK(dbp);
+ TAILQ_INSERT_TAIL(&dbp->curs_queue, *dbcp, links);
+ DB_THREAD_UNLOCK(dbp);
+ return (ret);
+}
+
+static int
+__ham_c_init(dbp, txnid, dbcp)
+ DB *dbp;
+ DB_TXN *txnid;
+ DBC **dbcp;
+{
+ DBC *db_curs;
+ HASH_CURSOR *new_curs;
+
+ if ((db_curs = (DBC *)calloc(sizeof(DBC), 1)) == NULL)
+ return (ENOMEM);
+
+ if ((new_curs =
+ (HASH_CURSOR *)calloc(sizeof(struct cursor_t), 1)) == NULL) {
+ FREE(db_curs, sizeof(DBC));
+ return (ENOMEM);
+ }
+
+ db_curs->internal = new_curs;
+ db_curs->c_close = __ham_c_close;
+ db_curs->c_del = __ham_c_del;
+ db_curs->c_get = __ham_c_get;
+ db_curs->c_put = __ham_c_put;
+ db_curs->txn = txnid;
+ db_curs->dbp = dbp;
+
+ new_curs->db_cursor = db_curs;
+ __ham_item_init(new_curs);
+
+ if (dbcp != NULL)
+ *dbcp = db_curs;
+ return (0);
+}
+
+static int
+__ham_delete(dbp, txn, key, flags)
+ DB *dbp;
+ DB_TXN *txn;
+ DBT *key;
+ int flags;
+{
+ DB *ldbp;
+ HTAB *hashp;
+ HASH_CURSOR *hcp;
+ int ret, t_ret;
+
+ DEBUG_LWRITE(dbp, txn, "ham_delete", key, NULL, flags);
+ if ((ret = __db_delchk(dbp, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0)
+ return (ret);
+
+ ldbp = dbp;
+ if (F_ISSET(dbp, DB_AM_THREAD) &&
+ (ret = __db_gethandle(dbp, __ham_hdup, &ldbp)) != 0)
+ return (ret);
+ hashp = (HTAB *)ldbp->internal;
+ SET_LOCKER(ldbp, txn);
+ GET_META(ldbp, hashp);
+ hcp = TAILQ_FIRST(&ldbp->curs_queue)->internal;
+
+ hashp->hash_accesses++;
+ if ((ret = __ham_lookup(hashp, hcp, key, 0, DB_LOCK_WRITE)) == 0)
+ if (F_ISSET(hcp, H_OK))
+ ret = __ham_del_pair(hashp, hcp);
+ else
+ ret = DB_NOTFOUND;
+
+ if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
+ ret = t_ret;
+ RELEASE_META(ldbp, hashp);
+ if (F_ISSET(dbp, DB_AM_THREAD))
+ __db_puthandle(ldbp);
+ return (ret);
+}
+
+/* ****************** CURSORS ********************************** */
+static int
+__ham_c_close(cursor)
+ DBC *cursor;
+{
+ DB *ldbp;
+ HTAB *hashp;
+ HASH_CURSOR *hcp;
+ int ret;
+
+ DEBUG_LWRITE(cursor->dbp, cursor->txn, "ham_c_close", NULL, NULL, 0);
+ /*
+ * If the pagep, dpagep, and lock fields of the cursor are all NULL,
+ * then there really isn't a need to get a handle here. However,
+ * the normal case is that at least one of those fields is non-NULL,
+ * and putting those checks in here would couple the ham_item_done
+ * functionality with cursor close which would be pretty disgusting.
+ * Instead, we pay the overhead here of always getting the handle.
+ */
+ ldbp = cursor->dbp;
+ if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
+ (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
+ return (ret);
+ hashp = (HTAB *)ldbp->internal;
+ hcp = (HASH_CURSOR *)cursor->internal;
+ ret = __ham_item_done(hashp, hcp, 0);
+
+ if (hcp->big_key)
+ FREE(hcp->big_key, hcp->big_keylen);
+ if (hcp->big_data)
+ FREE(hcp->big_data, hcp->big_datalen);
+
+ /*
+ * All cursors (except the default ones) are linked off the master.
+ * Therefore, when we close the cursor, we have to remove it from
+ * the master, not the local one. When we are closing the file in
+ * its entirety, then we clear the THREAD bit and the master and
+ * local are identical, so we remove the correct one.
+ */
+ DB_THREAD_LOCK(cursor->dbp);
+ TAILQ_REMOVE(&cursor->dbp->curs_queue, cursor, links);
+ DB_THREAD_UNLOCK(cursor->dbp);
+
+ if (F_ISSET(cursor->dbp, DB_AM_THREAD))
+ __db_puthandle(ldbp);
+
+ FREE(hcp, sizeof(HASH_CURSOR));
+ FREE(cursor, sizeof(DBC));
+ return (ret);
+}
+
+static int
+__ham_c_del(cursor, flags)
+ DBC *cursor;
+ int flags;
+{
+ DB *ldbp;
+ HTAB *hashp;
+ HASH_CURSOR *hcp;
+ HASH_CURSOR save_curs;
+ db_pgno_t ppgno, chg_pgno;
+ int ret, t_ret;
+
+ DEBUG_LWRITE(cursor->dbp, cursor->txn, "ham_c_del", NULL, NULL, flags);
+ ldbp = cursor->dbp;
+ if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
+ (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
+ return (ret);
+ hashp = (HTAB *)ldbp->internal;
+ hcp = (HASH_CURSOR *)cursor->internal;
+ save_curs = *hcp;
+ if ((ret = __db_cdelchk(ldbp, flags,
+ F_ISSET(ldbp, DB_AM_RDONLY), IS_VALID(hcp))) != 0)
+ return (ret);
+ if (F_ISSET(hcp, H_DELETED))
+ return (DB_NOTFOUND);
+
+ SET_LOCKER(hashp->dbp, cursor->txn);
+ GET_META(hashp->dbp, hashp);
+ hashp->hash_accesses++;
+ if ((ret = __ham_get_cpage(hashp, hcp, DB_LOCK_WRITE)) != 0)
+ goto out;
+ if (F_ISSET(hcp, H_ISDUP) && hcp->dpgno != PGNO_INVALID) {
+ ppgno = PREV_PGNO(hcp->dpagep);
+
+ /* Remove item from duplicate page. */
+ chg_pgno = hcp->dpgno;
+ if ((ret = __db_drem(hashp->dbp,
+ &hcp->dpagep, hcp->dndx, __ham_del_page)) != 0)
+ goto out;
+
+ /*
+ * There are 4 cases.
+ * 1. We removed an item on a page, but nothing else changed.
+ * 2. We removed the last item on a page, but there is a
+ * following page of duplicates.
+ * 3. We removed the last item on a page, this page was the
+ * last page in a duplicate set, but there were dups before
+ * it.
+ * 4. We removed the last item on a page, removing the last
+ * duplicate.
+ * In case 1 hcp->dpagep is unchanged.
+ * In case 2 hcp->dpagep comes back pointing to the next dup
+ * page.
+ * In case 3 hcp->dpagep comes back NULL.
+ * In case 4 hcp->dpagep comes back NULL.
+ */
+ if (hcp->dpagep == NULL) {
+ if (ppgno != PGNO_INVALID) { /* Case 3 */
+ hcp->dpgno = ppgno;
+ if ((ret = __ham_get_cpage(hashp, hcp,
+ DB_LOCK_READ)) != 0)
+ goto out;
+ hcp->dndx = NUM_ENT(hcp->dpagep);
+ F_SET(hcp, H_DELETED);
+ } else { /* Case 4 */
+ ret = __ham_del_pair(hashp, hcp);
+ hcp->dpgno = PGNO_INVALID;
+ /*
+ * Delpair updated the cursor queue, so we
+ * don't have to do that here.
+ */
+ chg_pgno = PGNO_INVALID;
+ }
+ } else if (PGNO(hcp->dpagep) != hcp->dpgno) {
+ hcp->dndx = 0; /* Case 2 */
+ hcp->dpgno = PGNO(hcp->dpagep);
+ if (ppgno == PGNO_INVALID)
+ memcpy(P_ENTRY(hcp->pagep,
+ H_DATAINDEX(hcp->bndx)) +
+ SSZ(HOFFDUP, pgno), &hcp->dpgno,
+ sizeof(db_pgno_t));
+ F_SET(hcp, H_DELETED);
+ } else /* Case 1 */
+ F_SET(hcp, H_DELETED);
+ if (chg_pgno != PGNO_INVALID)
+ __ham_c_update(hashp, hcp, chg_pgno, 0, 0, 1);
+ } else if (F_ISSET(hcp, H_ISDUP)) { /* on page */
+ if (hcp->dup_off == 0 && DUP_SIZE(hcp->dup_len) ==
+ LEN_HDATA(hcp->pagep, hashp->hdr->pagesize, hcp->bndx))
+ ret = __ham_del_pair(hashp, hcp);
+ else {
+ DBT repldbt;
+
+ repldbt.flags = 0;
+ F_SET(&repldbt, DB_DBT_PARTIAL);
+ repldbt.doff = hcp->dup_off;
+ repldbt.dlen = DUP_SIZE(hcp->dup_len);
+ repldbt.size = 0;
+ ret = __ham_replpair(hashp, hcp, &repldbt, 0);
+ hcp->dup_tlen -= DUP_SIZE(hcp->dup_len);
+ __ham_c_update(hashp, hcp, hcp->pgno,
+ DUP_SIZE(hcp->dup_len), 0, 1);
+ F_SET(hcp, H_DELETED);
+ }
+
+ } else
+ /* Not a duplicate */
+ ret = __ham_del_pair(hashp, hcp);
+
+out: if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
+ t_ret = ret;
+ if (ret != 0)
+ *hcp = save_curs;
+ RELEASE_META(hashp->dbp, hashp);
+ if (F_ISSET(cursor->dbp, DB_AM_THREAD))
+ __db_puthandle(ldbp);
+ return (ret);
+}
+
+static int
+__ham_c_get(cursor, key, data, flags)
+ DBC *cursor;
+ DBT *key;
+ DBT *data;
+ int flags;
+{
+ DB *ldbp;
+ HTAB *hashp;
+ HASH_CURSOR *hcp, save_curs;
+ int get_key, ret, t_ret;
+
+ DEBUG_LREAD(cursor->dbp, cursor->txn, "ham_c_get",
+ flags == DB_SET || flags == DB_SET_RANGE ? key : NULL,
+ NULL, flags);
+ ldbp = cursor->dbp;
+ if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
+ (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
+ return (ret);
+ hashp = (HTAB *)(ldbp->internal);
+ hcp = (HASH_CURSOR *)cursor->internal;
+ save_curs = *hcp;
+ if ((ret =
+ __db_cgetchk(hashp->dbp, key, data, flags, IS_VALID(hcp))) != 0)
+ return (ret);
+
+ SET_LOCKER(hashp->dbp, cursor->txn);
+ GET_META(hashp->dbp, hashp);
+ hashp->hash_accesses++;
+
+ hcp->seek_size = 0;
+
+ ret = 0;
+ get_key = 1;
+ switch (flags) {
+ case DB_PREV:
+ if (hcp->bucket != BUCKET_INVALID) {
+ ret = __ham_item_prev(hashp, hcp, DB_LOCK_READ);
+ break;
+ }
+ /* FALL THROUGH */
+ case DB_LAST:
+ ret = __ham_item_last(hashp, hcp, DB_LOCK_READ);
+ break;
+ case DB_FIRST:
+ ret = __ham_item_first(hashp, hcp, DB_LOCK_READ);
+ break;
+ case DB_NEXT:
+ if (hcp->bucket == BUCKET_INVALID)
+ hcp->bucket = 0;
+ ret = __ham_item_next(hashp, hcp, DB_LOCK_READ);
+ break;
+ case DB_SET:
+ case DB_SET_RANGE:
+ ret = __ham_lookup(hashp, hcp, key, 0, DB_LOCK_READ);
+ get_key = 0;
+ break;
+ case DB_CURRENT:
+ if (F_ISSET(hcp, H_DELETED)) {
+ ret = DB_KEYEMPTY;
+ goto out;
+ }
+
+ ret = __ham_item(hashp, hcp, DB_LOCK_READ);
+ break;
+ }
+
+ /*
+ * Must always enter this loop to do error handling and
+ * check for big key/data pair.
+ */
+ while (1) {
+ if (ret != 0 && ret != DB_NOTFOUND)
+ goto out1;
+ else if (F_ISSET(hcp, H_OK)) {
+ /* Get the key. */
+ if (get_key && (ret = __db_ret(hashp->dbp, hcp->pagep,
+ H_KEYINDEX(hcp->bndx), key, &hcp->big_key,
+ &hcp->big_keylen)) != 0)
+ goto out1;
+
+ ret = __ham_dup_return(hashp, hcp, data, flags);
+ break;
+ } else if (!F_ISSET(hcp, H_NOMORE)) {
+ abort();
+ break;
+ }
+
+ /*
+ * Ran out of entries in a bucket; change buckets.
+ */
+ switch (flags) {
+ case DB_LAST:
+ case DB_PREV:
+ ret = __ham_item_done(hashp, hcp, 0);
+ if (hcp->bucket == 0) {
+ ret = DB_NOTFOUND;
+ goto out1;
+ }
+ hcp->bucket--;
+ hcp->bndx = NDX_INVALID;
+ if (ret == 0)
+ ret = __ham_item_prev(hashp,
+ hcp, DB_LOCK_READ);
+ break;
+ case DB_FIRST:
+ case DB_NEXT:
+ ret = __ham_item_done(hashp, hcp, 0);
+ hcp->bndx = NDX_INVALID;
+ hcp->bucket++;
+ hcp->pgno = PGNO_INVALID;
+ hcp->pagep = NULL;
+ if (hcp->bucket > hashp->hdr->max_bucket) {
+ ret = DB_NOTFOUND;
+ goto out1;
+ }
+ if (ret == 0)
+ ret = __ham_item_next(hashp,
+ hcp, DB_LOCK_READ);
+ break;
+ case DB_SET:
+ case DB_SET_RANGE:
+ /* Key not found. */
+ ret = DB_NOTFOUND;
+ goto out1;
+ }
+ }
+out1: if ((t_ret = __ham_item_done(hashp, hcp, 0)) != 0 && ret == 0)
+ t_ret = ret;
+out: if (ret)
+ *hcp = save_curs;
+ RELEASE_META(hashp->dbp, hashp);
+ if (F_ISSET(cursor->dbp, DB_AM_THREAD))
+ __db_puthandle(ldbp);
+ return (ret);
+}
+
+static int
+__ham_c_put(cursor, key, data, flags)
+ DBC *cursor;
+ DBT *key;
+ DBT *data;
+ int flags;
+{
+ DB *ldbp;
+ HTAB *hashp;
+ HASH_CURSOR *hcp, save_curs;
+ int ret, t_ret;
+ u_int32_t nbytes;
+
+ DEBUG_LWRITE(cursor->dbp, cursor->txn, "ham_c_put",
+ flags == DB_KEYFIRST || flags == DB_KEYLAST ? key : NULL,
+ NULL, flags);
+ ldbp = cursor->dbp;
+ if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
+ (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
+ return (ret);
+ hashp = (HTAB *)(ldbp->internal);
+ hcp = (HASH_CURSOR *)cursor->internal;
+ save_curs = *hcp;
+
+ if ((ret = __db_cputchk(hashp->dbp, key, data, flags,
+ F_ISSET(ldbp, DB_AM_RDONLY), IS_VALID(hcp))) != 0)
+ return (ret);
+ if (F_ISSET(hcp, H_DELETED))
+ return (DB_NOTFOUND);
+
+ SET_LOCKER(hashp->dbp, cursor->txn);
+ GET_META(hashp->dbp, hashp);
+ ret = 0;
+
+ switch (flags) {
+ case DB_KEYLAST:
+ case DB_KEYFIRST:
+ nbytes = (ISBIG(hashp, key->size) ? HOFFPAGE_PSIZE :
+ HKEYDATA_PSIZE(key->size)) +
+ (ISBIG(hashp, data->size) ? HOFFPAGE_PSIZE :
+ HKEYDATA_PSIZE(data->size));
+ ret = __ham_lookup(hashp, hcp, key, nbytes, DB_LOCK_WRITE);
+ break;
+ case DB_BEFORE:
+ case DB_AFTER:
+ case DB_CURRENT:
+ ret = __ham_item(hashp, hcp, DB_LOCK_WRITE);
+ break;
+ }
+
+ if (ret == 0) {
+ if (flags == DB_CURRENT && !F_ISSET(ldbp, DB_AM_DUP))
+ ret = __ham_overwrite(hashp, hcp, data);
+ else
+ ret = __ham_add_dup(hashp, hcp, data, flags);
+ }
+
+ if (ret == 0 && F_ISSET(hcp, H_EXPAND)) {
+ ret = __ham_expand_table(hashp);
+ F_CLR(hcp, H_EXPAND);
+ }
+
+ if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
+ ret = t_ret;
+ if (ret != 0)
+ *hcp = save_curs;
+ RELEASE_META(hashp->dbp, hashp);
+ if (F_ISSET(cursor->dbp, DB_AM_THREAD))
+ __db_puthandle(ldbp);
+ return (ret);
+}
+
+/********************************* UTILITIES ************************/
+
+/*
+ * __ham_expand_table --
+ *
+ * PUBLIC: int __ham_expand_table __P((HTAB *));
+ */
+int
+__ham_expand_table(hashp)
+ HTAB *hashp;
+{
+ u_int32_t old_bucket, new_bucket;
+ u_int32_t spare_ndx;
+ int ret;
+
+ ret = 0;
+ DIRTY_META(hashp, ret);
+ if (ret)
+ return (ret);
+
+ if (DB_LOGGING(hashp->dbp)) {
+ DB_LSN new_lsn;
+
+ if ((ret = __ham_splitmeta_log(hashp->dbp->dbenv->lg_info,
+ (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
+ hashp->dbp->log_fileid,
+ hashp->hdr->max_bucket, hashp->hdr->ovfl_point,
+ hashp->hdr->spares[hashp->hdr->ovfl_point],
+ &hashp->hdr->lsn)) != 0)
+ return (ret);
+
+ hashp->hdr->lsn = new_lsn;
+ }
+
+ hashp->hash_expansions++;
+ new_bucket = ++hashp->hdr->max_bucket;
+ old_bucket = (hashp->hdr->max_bucket & hashp->hdr->low_mask);
+
+ /*
+ * If the split point is increasing (hdr.max_bucket's log base 2
+ * increases), max sure that we have enough extra pages, then
+ * copy the current contents of the spare split bucket to the
+ * next bucket.
+ */
+ spare_ndx = __db_log2(hashp->hdr->max_bucket + 1);
+ if (spare_ndx > hashp->hdr->ovfl_point) {
+ /*
+ * We are about to shift the split point. Make sure that
+ * if the next doubling is going to be big (more than 8
+ * pages), we have some extra pages around.
+ */
+ if (hashp->hdr->spares[hashp->hdr->ovfl_point] == 0 &&
+ new_bucket >= 8)
+ __ham_init_ovflpages(hashp);
+
+ hashp->hdr->spares[spare_ndx] =
+ hashp->hdr->spares[hashp->hdr->ovfl_point];
+ hashp->hdr->ovfl_point = spare_ndx;
+ }
+
+ if (new_bucket > hashp->hdr->high_mask) {
+ /* Starting a new doubling */
+ hashp->hdr->low_mask = hashp->hdr->high_mask;
+ hashp->hdr->high_mask = new_bucket | hashp->hdr->low_mask;
+ }
+
+ if (BUCKET_TO_PAGE(hashp, new_bucket) > MAX_PAGES(hashp)) {
+ __db_err(hashp->dbp->dbenv,
+ "hash: Cannot allocate new bucket. Pages exhausted.");
+ return (ENOSPC);
+ }
+
+ /* Relocate records to the new bucket */
+ return (__ham_split_page(hashp, old_bucket, new_bucket));
+}
+
+/*
+ * PUBLIC: u_int32_t __ham_call_hash __P((HTAB *, u_int8_t *, int32_t));
+ */
+u_int32_t
+__ham_call_hash(hashp, k, len)
+ HTAB *hashp;
+ u_int8_t *k;
+ int32_t len;
+{
+ u_int32_t n, bucket;
+
+ n = (u_int32_t)hashp->hash(k, len);
+ bucket = n & hashp->hdr->high_mask;
+ if (bucket > hashp->hdr->max_bucket)
+ bucket = bucket & hashp->hdr->low_mask;
+ return (bucket);
+}
+
+/*
+ * Check for duplicates, and call __db_ret appropriately. Release
+ * everything held by the cursor.
+ */
+static int
+__ham_dup_return(hashp, hcp, val, flags)
+ HTAB *hashp;
+ HASH_CURSOR *hcp;
+ DBT *val;
+ int flags;
+{
+ HKEYDATA *hk;
+ PAGE *pp;
+ DBT *myval, tmp_val;
+ db_indx_t ndx;
+ db_pgno_t pgno;
+ u_int8_t type;
+ int indx, ret;
+ db_indx_t len;
+
+ /* Check for duplicate and return the first one. */
+ ndx = H_DATAINDEX(hcp->bndx);
+ type = GET_HKEYDATA(hcp->pagep, ndx)->type;
+ pp = hcp->pagep;
+ myval = val;
+
+ /*
+ * There are 3 cases:
+ * 1. We are not in duplicate, simply call db_ret.
+ * 2. We are looking at keys and stumbled onto a duplicate.
+ * 3. We are in the middle of a duplicate set. (ISDUP set)
+ */
+
+ /*
+ * Here we check for the case where we just stumbled onto a
+ * duplicate. In this case, we do initialization and then
+ * let the normal duplicate code handle it.
+ */
+ if (!F_ISSET(hcp, H_ISDUP))
+ if (type == H_DUPLICATE) {
+ F_SET(hcp, H_ISDUP);
+ hcp->dup_tlen = LEN_HDATA(hcp->pagep,
+ hashp->hdr->pagesize, hcp->bndx);
+ hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
+ if (flags == DB_LAST || flags == DB_PREV) {
+ hcp->dndx = 0;
+ hcp->dup_off = 0;
+ do {
+ memcpy(&len, hk->data + hcp->dup_off,
+ sizeof(db_indx_t));
+ hcp->dup_off += DUP_SIZE(len);
+ hcp->dndx++;
+ } while (hcp->dup_off < hcp->dup_tlen);
+ hcp->dup_off -= DUP_SIZE(len);
+ hcp->dndx--;
+ } else {
+ memcpy(&len, hk->data, sizeof(db_indx_t));
+ hcp->dup_off = 0;
+ hcp->dndx = 0;
+ }
+ hcp->dup_len = len;
+ } else if (type == H_OFFDUP) {
+ F_SET(hcp, H_ISDUP);
+ memcpy(&pgno,
+ P_ENTRY(hcp->pagep, ndx) + SSZ(HOFFDUP, pgno),
+ sizeof(db_pgno_t));
+ if (flags == DB_LAST || flags == DB_PREV) {
+ indx = (int)hcp->dndx;
+ if ((ret = __db_dend(hashp->dbp,
+ pgno, &hcp->dpagep)) != 0)
+ return (ret);
+ hcp->dpgno = PGNO(hcp->dpagep);
+ hcp->dndx = NUM_ENT(hcp->dpagep) - 1;
+ } else if ((ret = __ham_next_cpage(hashp,
+ hcp, pgno, 0, H_ISDUP)) != 0)
+ return (ret);
+ }
+
+
+ /*
+ * Now, everything is initialized, grab a duplicate if
+ * necessary.
+ */
+ if (F_ISSET(hcp, H_ISDUP))
+ if (hcp->dpgno != PGNO_INVALID) {
+ pp = hcp->dpagep;
+ ndx = hcp->dndx;
+ } else {
+ /*
+ * Copy the DBT in case we are retrieving into
+ * user memory and we need the parameters for
+ * it.
+ */
+ memcpy(&tmp_val, val, sizeof(*val));
+ F_SET(&tmp_val, DB_DBT_PARTIAL);
+ tmp_val.dlen = hcp->dup_len;
+ tmp_val.doff = hcp->dup_off + sizeof(db_indx_t);
+ myval = &tmp_val;
+ }
+
+
+ /*
+ * Finally, if we had a duplicate, pp, ndx, and myval should be
+ * set appropriately.
+ */
+ if ((ret = __db_ret(hashp->dbp, pp, ndx, myval, &hcp->big_data,
+ &hcp->big_datalen)) != 0)
+ return (ret);
+
+ /*
+ * In case we sent a temporary off to db_ret, set the real
+ * return values.
+ */
+ val->data = myval->data;
+ val->size = myval->size;
+
+ return (0);
+}
+
+static int
+__ham_overwrite(hashp, hcp, nval)
+ HTAB *hashp;
+ HASH_CURSOR *hcp;
+ DBT *nval;
+{
+ DBT *myval, tmp_val;
+ HKEYDATA *hk;
+
+ if (F_ISSET(hashp->dbp, DB_AM_DUP))
+ return (__ham_add_dup(hashp, hcp, nval, DB_KEYLAST));
+ else if (!F_ISSET(nval, DB_DBT_PARTIAL)) {
+ /* Put/overwrite */
+ memcpy(&tmp_val, nval, sizeof(*nval));
+ F_SET(&tmp_val, DB_DBT_PARTIAL);
+ tmp_val.doff = 0;
+ hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
+ if (hk->type == H_OFFPAGE)
+ memcpy(&tmp_val.dlen,
+ (u_int8_t *)hk + SSZ(HOFFPAGE, tlen),
+ sizeof(u_int32_t));
+ else
+ tmp_val.dlen = LEN_HDATA(hcp->pagep,
+ hashp->hdr->pagesize,hcp->bndx);
+ myval = &tmp_val;
+ } else /* Regular partial put */
+ myval = nval;
+
+ return (__ham_replpair(hashp, hcp, myval, 0));
+}
+
+/*
+ * Given a key and a cursor, sets the cursor to the page/ndx on which
+ * the key resides. If the key is found, the cursor H_OK flag is set
+ * and the pagep, bndx, pgno (dpagep, dndx, dpgno) fields are set.
+ * If the key is not found, the H_OK flag is not set. If the sought
+ * field is non-0, the pagep, bndx, pgno (dpagep, dndx, dpgno) fields
+ * are set indicating where an add might take place. If it is 0,
+ * non of the cursor pointer field are valid.
+ */
+static int
+__ham_lookup(hashp, hcp, key, sought, mode)
+ HTAB *hashp;
+ HASH_CURSOR *hcp;
+ const DBT *key;
+ u_int32_t sought;
+ db_lockmode_t mode;
+{
+ HKEYDATA *hk;
+ db_pgno_t pgno;
+ u_int32_t tlen;
+ int match, ret, t_ret;
+
+ /*
+ * Set up cursor so that we're looking for space to add an item
+ * as we cycle through the pages looking for the key.
+ */
+ if ((ret = __ham_item_reset(hashp, hcp)) != 0)
+ return (ret);
+ hcp->seek_size = sought;
+
+ hcp->bucket = __ham_call_hash(hashp, (u_int8_t *)key->data, key->size);
+ while (1) {
+ if ((ret = __ham_item_next(hashp, hcp, mode)) != 0)
+ return (ret);
+
+ if (F_ISSET(hcp, H_NOMORE))
+ break;
+
+ hk = H_PAIRKEY(hcp->pagep, hcp->bndx);
+ switch (hk->type) {
+ case H_OFFPAGE:
+ memcpy(&tlen, (u_int8_t *)hk + SSZ(HOFFPAGE, tlen),
+ sizeof(u_int32_t));
+ if (tlen == key->size) {
+ memcpy(&pgno,
+ (u_int8_t *)hk + SSZ(HOFFPAGE, pgno),
+ sizeof(db_pgno_t));
+ match = __db_moff(hashp->dbp, key, pgno);
+ if (match == 0) {
+ F_SET(hcp, H_OK);
+ return (0);
+ }
+ }
+ break;
+ case H_KEYDATA:
+ if (key->size == LEN_HKEY(hcp->pagep,
+ hashp->hdr->pagesize, hcp->bndx) &&
+ memcmp(key->data, hk->data, key->size) == 0) {
+ F_SET(hcp, H_OK);
+ return (0);
+ }
+ break;
+ case H_DUPLICATE:
+ case H_OFFDUP:
+ /*
+ * These are errors because keys are never
+ * duplicated, only data items are.
+ */
+ return (__db_pgfmt(hashp->dbp, PGNO(hcp->pagep)));
+ }
+ hashp->hash_collisions++;
+ }
+
+ /*
+ * Item was not found, adjust cursor properly.
+ */
+
+ if (sought != 0)
+ return (ret);
+
+ if ((t_ret = __ham_item_done(hashp, hcp, 0)) != 0 && ret == 0)
+ ret = t_ret;
+ return (ret);
+}
+
+/*
+ * Initialize a dbt using some possibly already allocated storage
+ * for items.
+ * PUBLIC: int __ham_init_dbt __P((DBT *, u_int32_t, void **, u_int32_t *));
+ */
+int
+__ham_init_dbt(dbt, size, bufp, sizep)
+ DBT *dbt;
+ u_int32_t size;
+ void **bufp;
+ u_int32_t *sizep;
+{
+ memset(dbt, 0, sizeof(*dbt));
+ if (*sizep < size) {
+ if ((*bufp = (void *)(*bufp == NULL ?
+ malloc(size) : realloc(*bufp, size))) == NULL) {
+ *sizep = 0;
+ return (ENOMEM);
+ }
+ *sizep = size;
+ }
+ dbt->data = *bufp;
+ dbt->size = size;
+ return (0);
+}
+
+/*
+ * Adjust the cursor after an insert or delete. The cursor passed is
+ * the one that was operated upon; we just need to check any of the
+ * others.
+ *
+ * len indicates the length of the item added/deleted
+ * add indicates if the item indicated by the cursor has just been
+ * added (add == 1) or deleted (add == 0).
+ * dup indicates if the addition occurred into a duplicate set.
+ *
+ * PUBLIC: void __ham_c_update __P((HTAB *,
+ * PUBLIC: HASH_CURSOR *, db_pgno_t, u_int32_t, int, int));
+ */
+void
+__ham_c_update(hashp, hcp, chg_pgno, len, add, dup)
+ HTAB *hashp;
+ HASH_CURSOR *hcp;
+ db_pgno_t chg_pgno;
+ u_int32_t len;
+ int add;
+ int dup;
+{
+ DBC *cp;
+ HTAB *hp;
+ HASH_CURSOR *lcp;
+ int page_deleted;
+
+ /*
+ * Regular adds are always at the end of a given page,
+ * so we never have to adjust anyone's cursor after
+ * a regular add.
+ */
+ if (!dup && add)
+ return;
+
+ page_deleted = chg_pgno != PGNO_INVALID &&
+ ((!dup && chg_pgno != hcp->pgno) ||
+ (dup && chg_pgno != hcp->dpgno));
+
+ hp = hcp->db_cursor->dbp->master->internal;
+ DB_THREAD_LOCK(hp->dbp);
+
+ for (cp = TAILQ_FIRST(&hp->dbp->curs_queue); cp != NULL;
+ cp = TAILQ_NEXT(cp, links)) {
+ if (cp->internal == hcp)
+ continue;
+
+ lcp = (HASH_CURSOR *)cp->internal;
+
+ if (!dup && lcp->pgno != chg_pgno)
+ continue;
+
+ if (dup && F_ISSET(hcp, H_DELETED) && lcp->pgno != chg_pgno)
+ continue;
+
+ if (dup && !F_ISSET(hcp, H_DELETED) && lcp->dpgno != chg_pgno)
+ continue;
+
+ if (page_deleted) {
+ if (dup) {
+ lcp->dpgno = hcp->dpgno;
+ lcp->dndx = hcp->dndx;
+ } else {
+ lcp->pgno = hcp->pgno;
+ lcp->bndx = hcp->bndx;
+ lcp->bucket = hcp->bucket;
+ }
+ F_CLR(lcp, H_ISDUP);
+ continue;
+ }
+
+ if (!dup && lcp->bndx > hcp->bndx)
+ lcp->bndx--;
+ else if (!dup && lcp->bndx == hcp->bndx)
+ F_SET(lcp, H_DELETED);
+ else if (dup && lcp->bndx == hcp->bndx) {
+ /* Assign dpgno in case there was page conversion. */
+ lcp->dpgno = hcp->dpgno;
+ if (add && lcp->dndx >= hcp->dndx )
+ lcp->dndx++;
+ else if (!add && lcp->dndx > hcp->dndx)
+ lcp->dndx--;
+ else if (!add && lcp->dndx == hcp->dndx)
+ F_SET(lcp, H_DELETED);
+
+ /* Now adjust on-page information. */
+ if (lcp->dpgno == PGNO_INVALID)
+ if (add) {
+ lcp->dup_tlen += len;
+ if (lcp->dndx > hcp->dndx)
+ lcp->dup_off += len;
+ } else {
+ lcp->dup_tlen -= len;
+ if (lcp->dndx > hcp->dndx)
+ lcp->dup_off -= len;
+ }
+ }
+ }
+ DB_THREAD_UNLOCK(hp->dbp);
+}
+
+/*
+ * __ham_hdup --
+ * This function gets called when we create a duplicate handle for a
+ * threaded DB. It should create the private part of the DB structure.
+ * PUBLIC: int __ham_hdup __P((DB *, DB *));
+ */
+int
+__ham_hdup(orig, new)
+ DB *orig, *new;
+{
+ HTAB *hashp;
+ DBC *curs;
+ int ret;
+
+ if ((hashp = (HTAB *)malloc(sizeof(HTAB))) == NULL)
+ return (ENOMEM);
+
+ new->internal = hashp;
+
+ hashp->dbp = new;
+ hashp->hlock = 0;
+ hashp->hdr = NULL;
+ hashp->hash = ((HTAB *)orig->internal)->hash;
+ if ((hashp->split_buf = (PAGE *)malloc(orig->pgsize)) == NULL)
+ return (ENOMEM);
+ hashp->local_errno = 0;
+ hashp->hash_accesses = 0;
+ hashp->hash_collisions = 0;
+ hashp->hash_expansions = 0;
+ hashp->hash_overflows = 0;
+ hashp->hash_bigpages = 0;
+ /* Initialize the cursor queue. */
+ ret = __ham_c_init(new, NULL, &curs);
+ TAILQ_INSERT_TAIL(&new->curs_queue, curs, links);
+ return (ret);
+}
diff --git a/db2/hash/hash.src b/db2/hash/hash.src
new file mode 100644
index 0000000000..04a98d3cb3
--- /dev/null
+++ b/db2/hash/hash.src
@@ -0,0 +1,211 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ * Sleepycat Software. All rights reserved.
+ */
+/*
+ * Copyright (c) 1995, 1996
+ * Margo Seltzer. All rights reserved.
+ */
+/*
+ * Copyright (c) 1995, 1996
+ * The President and Fellows of Harvard University. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)hash.src 10.1 (Sleepycat) 4/12/97
+ */
+
+#include "config.h"
+
+/*
+ * This is the source file used to create the logging functions for the
+ * hash package. Each access method (or set of routines wishing to register
+ * record types with the transaction system) should have a file like this.
+ * Each type of log record and its parameters is defined. The basic
+ * format of a record definition is:
+ *
+ * BEGIN <RECORD_TYPE>
+ * ARG|STRING|POINTER <variable name> <variable type> <printf format>
+ * ...
+ * END
+ * ARG the argument is a simple parameter of the type * specified.
+ * DBT the argument is a DBT (db.h) containing a length and pointer.
+ * PTR the argument is a pointer to the data type specified; the entire
+ * type should be logged.
+ *
+ * There are a set of shell scripts of the form xxx.sh that generate c
+ * code and or h files to process these. (This is probably better done
+ * in a single PERL script, but for now, this works.)
+ *
+ * The DB recovery system requires the following three fields appear in
+ * every record, and will assign them to the per-record-type structures
+ * as well as making them the first parameters to the appropriate logging
+ * call.
+ * rectype: record-type, identifies the structure and log/read call
+ * txnid: transaction id, a DBT in this implementation
+ * prev: the last LSN for this transaction
+ */
+
+/*
+ * Use the argument of PREFIX as the prefix for all record types,
+ * routines, id numbers, etc.
+ */
+PREFIX ham
+
+/*
+ * HASH-insdel: used for hash to insert/delete a pair of entries onto a master
+ * page. The pair might be regular key/data pairs or they might be the
+ * structures that refer to off page items, duplicates or offpage duplicates.
+ * opcode - PUTPAIR/DELPAIR + big masks
+ * fileid - identifies the file referenced
+ * pgno - page within file
+ * ndx - index on the page of the item being added (item index)
+ * pagelsn - lsn on the page before the update
+ * key - the key being inserted
+ * data - the data being inserted
+ */
+BEGIN insdel
+ARG opcode u_int32_t lu
+ARG fileid u_int32_t lu
+ARG pgno db_pgno_t lu
+ARG ndx u_int32_t lu
+POINTER pagelsn DB_LSN * lu
+DBT key DBT s
+DBT data DBT s
+END
+
+/*
+ * Used to add and remove overflow pages.
+ * prev_pgno is the previous page that is going to get modified to
+ * point to this one. If this is the first page in a chain
+ * then prev_pgno should be PGNO_INVALID.
+ * new_pgno is the page being allocated.
+ * next_pgno is the page that follows this one. On allocation,
+ * this should be PGNO_INVALID. For deletes, it may exist.
+ * pagelsn is the old lsn on the page.
+ */
+BEGIN newpage
+ARG opcode u_int32_t lu
+ARG fileid u_int32_t lu
+ARG prev_pgno db_pgno_t lu
+POINTER prevlsn DB_LSN * lu
+ARG new_pgno db_pgno_t lu
+POINTER pagelsn DB_LSN * lu
+ARG next_pgno db_pgno_t lu
+POINTER nextlsn DB_LSN * lu
+END
+
+/*
+ * Splitting requires two types of log messages. The first
+ * logs the meta-data of the split. The second logs the
+ * data on the original page. To redo the split, we have
+ * to visit the new page (pages) and add the items back
+ * on the page if they are not yet there.
+ * For the meta-data split
+ * bucket: max_bucket in table before split
+ * ovflpoint: overflow point before split.
+ * spares: spares[ovflpoint] before split.
+ */
+BEGIN splitmeta
+ARG fileid u_int32_t lu
+ARG bucket u_int32_t lu
+ARG ovflpoint u_int32_t lu
+ARG spares u_int32_t lu
+POINTER metalsn DB_LSN * lu
+END
+
+BEGIN splitdata
+ARG fileid u_int32_t lu
+ARG opcode u_int32_t lu
+ARG pgno db_pgno_t lu
+DBT pageimage DBT s
+POINTER pagelsn DB_LSN * lu
+END
+
+/*
+ * HASH-replace: is used for hash to handle partial puts that only
+ * affect a single master page.
+ * fileid - identifies the file referenced
+ * pgno - page within file
+ * ndx - index on the page of the item being modified (item index)
+ * pagelsn - lsn on the page before the update
+ * off - offset in the old item where the new item is going.
+ * olditem - DBT that describes the part of the item being replaced.
+ * newitem - DBT of the new item.
+ * makedup - this was a replacement that made an item a duplicate.
+ */
+BEGIN replace
+ARG fileid u_int32_t lu
+ARG pgno db_pgno_t lu
+ARG ndx u_int32_t lu
+POINTER pagelsn DB_LSN * lu
+ARG off int32_t ld
+DBT olditem DBT s
+DBT newitem DBT s
+ARG makedup u_int32_t lu
+END
+
+/*
+ * HASH-newpgno: is used to record getting/deleting a new page number.
+ * This doesn't require much data modification, just modifying the
+ * meta-data.
+ * pgno is the page being allocated/freed.
+ * free_pgno is the next_pgno on the free list.
+ * old_type was the type of a page being deallocated.
+ * old_pgno was the next page number before the deallocation. We use it
+ * to indicate whether we incremented the spares count or not
+ * during this allocation.
+ */
+BEGIN newpgno
+ARG opcode u_int32_t lu
+ARG fileid u_int32_t lu
+ARG pgno db_pgno_t lu
+ARG free_pgno db_pgno_t lu
+ARG old_type u_int32_t lu
+ARG old_pgno db_pgno_t lu
+ARG new_type u_int32_t lu
+POINTER pagelsn DB_LSN * lu
+POINTER metalsn DB_LSN * lu
+END
+
+/*
+ * ovfl: initialize a set of overflow pages.
+ */
+BEGIN ovfl
+ARG fileid u_int32_t lu
+ARG start_pgno db_pgno_t lu
+ARG npages u_int32_t lu
+ARG free_pgno db_pgno_t lu
+POINTER metalsn DB_LSN * lu
+END
diff --git a/db2/hash/hash_auto.c b/db2/hash/hash_auto.c
new file mode 100644
index 0000000000..f8ab80c8ee
--- /dev/null
+++ b/db2/hash/hash_auto.c
@@ -0,0 +1,1343 @@
+/* Do not edit: automatically built by dist/db_gen.sh. */
+#include "config.h"
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <ctype.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_page.h"
+#include "db_dispatch.h"
+#include "hash.h"
+#include "db_am.h"
+#include "common_ext.h"
+
+/*
+ * PUBLIC: int __ham_insdel_log
+ * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC: u_int32_t, u_int32_t, db_pgno_t, u_int32_t,
+ * PUBLIC: DB_LSN *, DBT *, DBT *));
+ */
+int __ham_insdel_log(logp, txnid, ret_lsnp, flags,
+ opcode, fileid, pgno, ndx, pagelsn, key,
+ data)
+ DB_LOG *logp;
+ DB_TXN *txnid;
+ DB_LSN *ret_lsnp;
+ u_int32_t flags;
+ u_int32_t opcode;
+ u_int32_t fileid;
+ db_pgno_t pgno;
+ u_int32_t ndx;
+ DB_LSN * pagelsn;
+ DBT *key;
+ DBT *data;
+{
+ DBT logrec;
+ DB_LSN *lsnp, null_lsn;
+ u_int32_t zero;
+ u_int32_t rectype, txn_num;
+ int ret;
+ u_int8_t *bp;
+
+ rectype = DB_ham_insdel;
+ txn_num = txnid == NULL ? 0 : txnid->txnid;
+ if (txnid == NULL) {
+ null_lsn.file = 0;
+ null_lsn.offset = 0;
+ lsnp = &null_lsn;
+ } else
+ lsnp = &txnid->last_lsn;
+ logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+ + sizeof(opcode)
+ + sizeof(fileid)
+ + sizeof(pgno)
+ + sizeof(ndx)
+ + sizeof(*pagelsn)
+ + sizeof(u_int32_t) + (key == NULL ? 0 : key->size)
+ + sizeof(u_int32_t) + (data == NULL ? 0 : data->size);
+ if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+ return (ENOMEM);
+
+ bp = logrec.data;
+ memcpy(bp, &rectype, sizeof(rectype));
+ bp += sizeof(rectype);
+ memcpy(bp, &txn_num, sizeof(txn_num));
+ bp += sizeof(txn_num);
+ memcpy(bp, lsnp, sizeof(DB_LSN));
+ bp += sizeof(DB_LSN);
+ memcpy(bp, &opcode, sizeof(opcode));
+ bp += sizeof(opcode);
+ memcpy(bp, &fileid, sizeof(fileid));
+ bp += sizeof(fileid);
+ memcpy(bp, &pgno, sizeof(pgno));
+ bp += sizeof(pgno);
+ memcpy(bp, &ndx, sizeof(ndx));
+ bp += sizeof(ndx);
+ if (pagelsn != NULL)
+ memcpy(bp, pagelsn, sizeof(*pagelsn));
+ else
+ memset(bp, 0, sizeof(*pagelsn));
+ bp += sizeof(*pagelsn);
+ if (key == NULL) {
+ zero = 0;
+ memcpy(bp, &zero, sizeof(u_int32_t));
+ bp += sizeof(u_int32_t);
+ } else {
+ memcpy(bp, &key->size, sizeof(key->size));
+ bp += sizeof(key->size);
+ memcpy(bp, key->data, key->size);
+ bp += key->size;
+ }
+ if (data == NULL) {
+ zero = 0;
+ memcpy(bp, &zero, sizeof(u_int32_t));
+ bp += sizeof(u_int32_t);
+ } else {
+ memcpy(bp, &data->size, sizeof(data->size));
+ bp += sizeof(data->size);
+ memcpy(bp, data->data, data->size);
+ bp += data->size;
+ }
+#ifdef DEBUG
+ if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+ fprintf(stderr, "Error in log record length");
+#endif
+ ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+ if (txnid != NULL)
+ txnid->last_lsn = *ret_lsnp;
+ free(logrec.data);
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __ham_insdel_print
+ * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__ham_insdel_print(notused1, dbtp, lsnp, notused3, notused4)
+ DB_LOG *notused1;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ int notused3;
+ void *notused4;
+{
+ __ham_insdel_args *argp;
+ u_int32_t i;
+ int c, ret;
+
+ i = 0;
+ c = 0;
+ notused1 = NULL;
+ notused3 = 0;
+ notused4 = NULL;
+
+ if((ret = __ham_insdel_read(dbtp->data, &argp)) != 0)
+ return (ret);
+ printf("[%lu][%lu]ham_insdel: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+ (u_long)lsnp->file,
+ (u_long)lsnp->offset,
+ (u_long)argp->type,
+ (u_long)argp->txnid->txnid,
+ (u_long)argp->prev_lsn.file,
+ (u_long)argp->prev_lsn.offset);
+ printf("\topcode: %lu\n", (u_long)argp->opcode);
+ printf("\tfileid: %lu\n", (u_long)argp->fileid);
+ printf("\tpgno: %lu\n", (u_long)argp->pgno);
+ printf("\tndx: %lu\n", (u_long)argp->ndx);
+ printf("\tpagelsn: [%lu][%lu]\n",
+ (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset);
+ printf("\tkey: ");
+ for (i = 0; i < argp->key.size; i++) {
+ c = ((char *)argp->key.data)[i];
+ if (isprint(c) || c == 0xa)
+ putchar(c);
+ else
+ printf("%#x ", c);
+ }
+ printf("\n");
+ printf("\tdata: ");
+ for (i = 0; i < argp->data.size; i++) {
+ c = ((char *)argp->data.data)[i];
+ if (isprint(c) || c == 0xa)
+ putchar(c);
+ else
+ printf("%#x ", c);
+ }
+ printf("\n");
+ printf("\n");
+ free(argp);
+ return (0);
+}
+
+/*
+ * PUBLIC: int __ham_insdel_read __P((void *, __ham_insdel_args **));
+ */
+int
+__ham_insdel_read(recbuf, argpp)
+ void *recbuf;
+ __ham_insdel_args **argpp;
+{
+ __ham_insdel_args *argp;
+ u_int8_t *bp;
+
+ argp = (__ham_insdel_args *)malloc(sizeof(__ham_insdel_args) +
+ sizeof(DB_TXN));
+ if (argp == NULL)
+ return (ENOMEM);
+ argp->txnid = (DB_TXN *)&argp[1];
+ bp = recbuf;
+ memcpy(&argp->type, bp, sizeof(argp->type));
+ bp += sizeof(argp->type);
+ memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
+ bp += sizeof(argp->txnid->txnid);
+ memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+ bp += sizeof(DB_LSN);
+ memcpy(&argp->opcode, bp, sizeof(argp->opcode));
+ bp += sizeof(argp->opcode);
+ memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+ bp += sizeof(argp->fileid);
+ memcpy(&argp->pgno, bp, sizeof(argp->pgno));
+ bp += sizeof(argp->pgno);
+ memcpy(&argp->ndx, bp, sizeof(argp->ndx));
+ bp += sizeof(argp->ndx);
+ memcpy(&argp->pagelsn, bp, sizeof(argp->pagelsn));
+ bp += sizeof(argp->pagelsn);
+ memcpy(&argp->key.size, bp, sizeof(u_int32_t));
+ bp += sizeof(u_int32_t);
+ argp->key.data = bp;
+ bp += argp->key.size;
+ memcpy(&argp->data.size, bp, sizeof(u_int32_t));
+ bp += sizeof(u_int32_t);
+ argp->data.data = bp;
+ bp += argp->data.size;
+ *argpp = argp;
+ return (0);
+}
+
+/*
+ * PUBLIC: int __ham_newpage_log
+ * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC: u_int32_t, u_int32_t, db_pgno_t, DB_LSN *,
+ * PUBLIC: db_pgno_t, DB_LSN *, db_pgno_t, DB_LSN *));
+ */
+int __ham_newpage_log(logp, txnid, ret_lsnp, flags,
+ opcode, fileid, prev_pgno, prevlsn, new_pgno, pagelsn,
+ next_pgno, nextlsn)
+ DB_LOG *logp;
+ DB_TXN *txnid;
+ DB_LSN *ret_lsnp;
+ u_int32_t flags;
+ u_int32_t opcode;
+ u_int32_t fileid;
+ db_pgno_t prev_pgno;
+ DB_LSN * prevlsn;
+ db_pgno_t new_pgno;
+ DB_LSN * pagelsn;
+ db_pgno_t next_pgno;
+ DB_LSN * nextlsn;
+{
+ DBT logrec;
+ DB_LSN *lsnp, null_lsn;
+ u_int32_t rectype, txn_num;
+ int ret;
+ u_int8_t *bp;
+
+ rectype = DB_ham_newpage;
+ txn_num = txnid == NULL ? 0 : txnid->txnid;
+ if (txnid == NULL) {
+ null_lsn.file = 0;
+ null_lsn.offset = 0;
+ lsnp = &null_lsn;
+ } else
+ lsnp = &txnid->last_lsn;
+ logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+ + sizeof(opcode)
+ + sizeof(fileid)
+ + sizeof(prev_pgno)
+ + sizeof(*prevlsn)
+ + sizeof(new_pgno)
+ + sizeof(*pagelsn)
+ + sizeof(next_pgno)
+ + sizeof(*nextlsn);
+ if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+ return (ENOMEM);
+
+ bp = logrec.data;
+ memcpy(bp, &rectype, sizeof(rectype));
+ bp += sizeof(rectype);
+ memcpy(bp, &txn_num, sizeof(txn_num));
+ bp += sizeof(txn_num);
+ memcpy(bp, lsnp, sizeof(DB_LSN));
+ bp += sizeof(DB_LSN);
+ memcpy(bp, &opcode, sizeof(opcode));
+ bp += sizeof(opcode);
+ memcpy(bp, &fileid, sizeof(fileid));
+ bp += sizeof(fileid);
+ memcpy(bp, &prev_pgno, sizeof(prev_pgno));
+ bp += sizeof(prev_pgno);
+ if (prevlsn != NULL)
+ memcpy(bp, prevlsn, sizeof(*prevlsn));
+ else
+ memset(bp, 0, sizeof(*prevlsn));
+ bp += sizeof(*prevlsn);
+ memcpy(bp, &new_pgno, sizeof(new_pgno));
+ bp += sizeof(new_pgno);
+ if (pagelsn != NULL)
+ memcpy(bp, pagelsn, sizeof(*pagelsn));
+ else
+ memset(bp, 0, sizeof(*pagelsn));
+ bp += sizeof(*pagelsn);
+ memcpy(bp, &next_pgno, sizeof(next_pgno));
+ bp += sizeof(next_pgno);
+ if (nextlsn != NULL)
+ memcpy(bp, nextlsn, sizeof(*nextlsn));
+ else
+ memset(bp, 0, sizeof(*nextlsn));
+ bp += sizeof(*nextlsn);
+#ifdef DEBUG
+ if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+ fprintf(stderr, "Error in log record length");
+#endif
+ ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+ if (txnid != NULL)
+ txnid->last_lsn = *ret_lsnp;
+ free(logrec.data);
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __ham_newpage_print
+ * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__ham_newpage_print(notused1, dbtp, lsnp, notused3, notused4)
+ DB_LOG *notused1;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ int notused3;
+ void *notused4;
+{
+ __ham_newpage_args *argp;
+ u_int32_t i;
+ int c, ret;
+
+ i = 0;
+ c = 0;
+ notused1 = NULL;
+ notused3 = 0;
+ notused4 = NULL;
+
+ if((ret = __ham_newpage_read(dbtp->data, &argp)) != 0)
+ return (ret);
+ printf("[%lu][%lu]ham_newpage: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+ (u_long)lsnp->file,
+ (u_long)lsnp->offset,
+ (u_long)argp->type,
+ (u_long)argp->txnid->txnid,
+ (u_long)argp->prev_lsn.file,
+ (u_long)argp->prev_lsn.offset);
+ printf("\topcode: %lu\n", (u_long)argp->opcode);
+ printf("\tfileid: %lu\n", (u_long)argp->fileid);
+ printf("\tprev_pgno: %lu\n", (u_long)argp->prev_pgno);
+ printf("\tprevlsn: [%lu][%lu]\n",
+ (u_long)argp->prevlsn.file, (u_long)argp->prevlsn.offset);
+ printf("\tnew_pgno: %lu\n", (u_long)argp->new_pgno);
+ printf("\tpagelsn: [%lu][%lu]\n",
+ (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset);
+ printf("\tnext_pgno: %lu\n", (u_long)argp->next_pgno);
+ printf("\tnextlsn: [%lu][%lu]\n",
+ (u_long)argp->nextlsn.file, (u_long)argp->nextlsn.offset);
+ printf("\n");
+ free(argp);
+ return (0);
+}
+
+/*
+ * PUBLIC: int __ham_newpage_read __P((void *, __ham_newpage_args **));
+ */
+int
+__ham_newpage_read(recbuf, argpp)
+ void *recbuf;
+ __ham_newpage_args **argpp;
+{
+ __ham_newpage_args *argp;
+ u_int8_t *bp;
+
+ argp = (__ham_newpage_args *)malloc(sizeof(__ham_newpage_args) +
+ sizeof(DB_TXN));
+ if (argp == NULL)
+ return (ENOMEM);
+ argp->txnid = (DB_TXN *)&argp[1];
+ bp = recbuf;
+ memcpy(&argp->type, bp, sizeof(argp->type));
+ bp += sizeof(argp->type);
+ memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
+ bp += sizeof(argp->txnid->txnid);
+ memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+ bp += sizeof(DB_LSN);
+ memcpy(&argp->opcode, bp, sizeof(argp->opcode));
+ bp += sizeof(argp->opcode);
+ memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+ bp += sizeof(argp->fileid);
+ memcpy(&argp->prev_pgno, bp, sizeof(argp->prev_pgno));
+ bp += sizeof(argp->prev_pgno);
+ memcpy(&argp->prevlsn, bp, sizeof(argp->prevlsn));
+ bp += sizeof(argp->prevlsn);
+ memcpy(&argp->new_pgno, bp, sizeof(argp->new_pgno));
+ bp += sizeof(argp->new_pgno);
+ memcpy(&argp->pagelsn, bp, sizeof(argp->pagelsn));
+ bp += sizeof(argp->pagelsn);
+ memcpy(&argp->next_pgno, bp, sizeof(argp->next_pgno));
+ bp += sizeof(argp->next_pgno);
+ memcpy(&argp->nextlsn, bp, sizeof(argp->nextlsn));
+ bp += sizeof(argp->nextlsn);
+ *argpp = argp;
+ return (0);
+}
+
+/*
+ * PUBLIC: int __ham_splitmeta_log
+ * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC: u_int32_t, u_int32_t, u_int32_t, u_int32_t,
+ * PUBLIC: DB_LSN *));
+ */
+int __ham_splitmeta_log(logp, txnid, ret_lsnp, flags,
+ fileid, bucket, ovflpoint, spares, metalsn)
+ DB_LOG *logp;
+ DB_TXN *txnid;
+ DB_LSN *ret_lsnp;
+ u_int32_t flags;
+ u_int32_t fileid;
+ u_int32_t bucket;
+ u_int32_t ovflpoint;
+ u_int32_t spares;
+ DB_LSN * metalsn;
+{
+ DBT logrec;
+ DB_LSN *lsnp, null_lsn;
+ u_int32_t rectype, txn_num;
+ int ret;
+ u_int8_t *bp;
+
+ rectype = DB_ham_splitmeta;
+ txn_num = txnid == NULL ? 0 : txnid->txnid;
+ if (txnid == NULL) {
+ null_lsn.file = 0;
+ null_lsn.offset = 0;
+ lsnp = &null_lsn;
+ } else
+ lsnp = &txnid->last_lsn;
+ logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+ + sizeof(fileid)
+ + sizeof(bucket)
+ + sizeof(ovflpoint)
+ + sizeof(spares)
+ + sizeof(*metalsn);
+ if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+ return (ENOMEM);
+
+ bp = logrec.data;
+ memcpy(bp, &rectype, sizeof(rectype));
+ bp += sizeof(rectype);
+ memcpy(bp, &txn_num, sizeof(txn_num));
+ bp += sizeof(txn_num);
+ memcpy(bp, lsnp, sizeof(DB_LSN));
+ bp += sizeof(DB_LSN);
+ memcpy(bp, &fileid, sizeof(fileid));
+ bp += sizeof(fileid);
+ memcpy(bp, &bucket, sizeof(bucket));
+ bp += sizeof(bucket);
+ memcpy(bp, &ovflpoint, sizeof(ovflpoint));
+ bp += sizeof(ovflpoint);
+ memcpy(bp, &spares, sizeof(spares));
+ bp += sizeof(spares);
+ if (metalsn != NULL)
+ memcpy(bp, metalsn, sizeof(*metalsn));
+ else
+ memset(bp, 0, sizeof(*metalsn));
+ bp += sizeof(*metalsn);
+#ifdef DEBUG
+ if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+ fprintf(stderr, "Error in log record length");
+#endif
+ ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+ if (txnid != NULL)
+ txnid->last_lsn = *ret_lsnp;
+ free(logrec.data);
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __ham_splitmeta_print
+ * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__ham_splitmeta_print(notused1, dbtp, lsnp, notused3, notused4)
+ DB_LOG *notused1;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ int notused3;
+ void *notused4;
+{
+ __ham_splitmeta_args *argp;
+ u_int32_t i;
+ int c, ret;
+
+ i = 0;
+ c = 0;
+ notused1 = NULL;
+ notused3 = 0;
+ notused4 = NULL;
+
+ if((ret = __ham_splitmeta_read(dbtp->data, &argp)) != 0)
+ return (ret);
+ printf("[%lu][%lu]ham_splitmeta: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+ (u_long)lsnp->file,
+ (u_long)lsnp->offset,
+ (u_long)argp->type,
+ (u_long)argp->txnid->txnid,
+ (u_long)argp->prev_lsn.file,
+ (u_long)argp->prev_lsn.offset);
+ printf("\tfileid: %lu\n", (u_long)argp->fileid);
+ printf("\tbucket: %lu\n", (u_long)argp->bucket);
+ printf("\tovflpoint: %lu\n", (u_long)argp->ovflpoint);
+ printf("\tspares: %lu\n", (u_long)argp->spares);
+ printf("\tmetalsn: [%lu][%lu]\n",
+ (u_long)argp->metalsn.file, (u_long)argp->metalsn.offset);
+ printf("\n");
+ free(argp);
+ return (0);
+}
+
+/*
+ * PUBLIC: int __ham_splitmeta_read __P((void *, __ham_splitmeta_args **));
+ */
+int
+__ham_splitmeta_read(recbuf, argpp)
+ void *recbuf;
+ __ham_splitmeta_args **argpp;
+{
+ __ham_splitmeta_args *argp;
+ u_int8_t *bp;
+
+ argp = (__ham_splitmeta_args *)malloc(sizeof(__ham_splitmeta_args) +
+ sizeof(DB_TXN));
+ if (argp == NULL)
+ return (ENOMEM);
+ argp->txnid = (DB_TXN *)&argp[1];
+ bp = recbuf;
+ memcpy(&argp->type, bp, sizeof(argp->type));
+ bp += sizeof(argp->type);
+ memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
+ bp += sizeof(argp->txnid->txnid);
+ memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+ bp += sizeof(DB_LSN);
+ memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+ bp += sizeof(argp->fileid);
+ memcpy(&argp->bucket, bp, sizeof(argp->bucket));
+ bp += sizeof(argp->bucket);
+ memcpy(&argp->ovflpoint, bp, sizeof(argp->ovflpoint));
+ bp += sizeof(argp->ovflpoint);
+ memcpy(&argp->spares, bp, sizeof(argp->spares));
+ bp += sizeof(argp->spares);
+ memcpy(&argp->metalsn, bp, sizeof(argp->metalsn));
+ bp += sizeof(argp->metalsn);
+ *argpp = argp;
+ return (0);
+}
+
+/*
+ * PUBLIC: int __ham_splitdata_log
+ * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC: u_int32_t, u_int32_t, db_pgno_t, DBT *,
+ * PUBLIC: DB_LSN *));
+ */
+int __ham_splitdata_log(logp, txnid, ret_lsnp, flags,
+ fileid, opcode, pgno, pageimage, pagelsn)
+ DB_LOG *logp;
+ DB_TXN *txnid;
+ DB_LSN *ret_lsnp;
+ u_int32_t flags;
+ u_int32_t fileid;
+ u_int32_t opcode;
+ db_pgno_t pgno;
+ DBT *pageimage;
+ DB_LSN * pagelsn;
+{
+ DBT logrec;
+ DB_LSN *lsnp, null_lsn;
+ u_int32_t zero;
+ u_int32_t rectype, txn_num;
+ int ret;
+ u_int8_t *bp;
+
+ rectype = DB_ham_splitdata;
+ txn_num = txnid == NULL ? 0 : txnid->txnid;
+ if (txnid == NULL) {
+ null_lsn.file = 0;
+ null_lsn.offset = 0;
+ lsnp = &null_lsn;
+ } else
+ lsnp = &txnid->last_lsn;
+ logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+ + sizeof(fileid)
+ + sizeof(opcode)
+ + sizeof(pgno)
+ + sizeof(u_int32_t) + (pageimage == NULL ? 0 : pageimage->size)
+ + sizeof(*pagelsn);
+ if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+ return (ENOMEM);
+
+ bp = logrec.data;
+ memcpy(bp, &rectype, sizeof(rectype));
+ bp += sizeof(rectype);
+ memcpy(bp, &txn_num, sizeof(txn_num));
+ bp += sizeof(txn_num);
+ memcpy(bp, lsnp, sizeof(DB_LSN));
+ bp += sizeof(DB_LSN);
+ memcpy(bp, &fileid, sizeof(fileid));
+ bp += sizeof(fileid);
+ memcpy(bp, &opcode, sizeof(opcode));
+ bp += sizeof(opcode);
+ memcpy(bp, &pgno, sizeof(pgno));
+ bp += sizeof(pgno);
+ if (pageimage == NULL) {
+ zero = 0;
+ memcpy(bp, &zero, sizeof(u_int32_t));
+ bp += sizeof(u_int32_t);
+ } else {
+ memcpy(bp, &pageimage->size, sizeof(pageimage->size));
+ bp += sizeof(pageimage->size);
+ memcpy(bp, pageimage->data, pageimage->size);
+ bp += pageimage->size;
+ }
+ if (pagelsn != NULL)
+ memcpy(bp, pagelsn, sizeof(*pagelsn));
+ else
+ memset(bp, 0, sizeof(*pagelsn));
+ bp += sizeof(*pagelsn);
+#ifdef DEBUG
+ if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+ fprintf(stderr, "Error in log record length");
+#endif
+ ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+ if (txnid != NULL)
+ txnid->last_lsn = *ret_lsnp;
+ free(logrec.data);
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __ham_splitdata_print
+ * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__ham_splitdata_print(notused1, dbtp, lsnp, notused3, notused4)
+ DB_LOG *notused1;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ int notused3;
+ void *notused4;
+{
+ __ham_splitdata_args *argp;
+ u_int32_t i;
+ int c, ret;
+
+ i = 0;
+ c = 0;
+ notused1 = NULL;
+ notused3 = 0;
+ notused4 = NULL;
+
+ if((ret = __ham_splitdata_read(dbtp->data, &argp)) != 0)
+ return (ret);
+ printf("[%lu][%lu]ham_splitdata: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+ (u_long)lsnp->file,
+ (u_long)lsnp->offset,
+ (u_long)argp->type,
+ (u_long)argp->txnid->txnid,
+ (u_long)argp->prev_lsn.file,
+ (u_long)argp->prev_lsn.offset);
+ printf("\tfileid: %lu\n", (u_long)argp->fileid);
+ printf("\topcode: %lu\n", (u_long)argp->opcode);
+ printf("\tpgno: %lu\n", (u_long)argp->pgno);
+ printf("\tpageimage: ");
+ for (i = 0; i < argp->pageimage.size; i++) {
+ c = ((char *)argp->pageimage.data)[i];
+ if (isprint(c) || c == 0xa)
+ putchar(c);
+ else
+ printf("%#x ", c);
+ }
+ printf("\n");
+ printf("\tpagelsn: [%lu][%lu]\n",
+ (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset);
+ printf("\n");
+ free(argp);
+ return (0);
+}
+
+/*
+ * PUBLIC: int __ham_splitdata_read __P((void *, __ham_splitdata_args **));
+ */
+int
+__ham_splitdata_read(recbuf, argpp)
+ void *recbuf;
+ __ham_splitdata_args **argpp;
+{
+ __ham_splitdata_args *argp;
+ u_int8_t *bp;
+
+ argp = (__ham_splitdata_args *)malloc(sizeof(__ham_splitdata_args) +
+ sizeof(DB_TXN));
+ if (argp == NULL)
+ return (ENOMEM);
+ argp->txnid = (DB_TXN *)&argp[1];
+ bp = recbuf;
+ memcpy(&argp->type, bp, sizeof(argp->type));
+ bp += sizeof(argp->type);
+ memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
+ bp += sizeof(argp->txnid->txnid);
+ memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+ bp += sizeof(DB_LSN);
+ memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+ bp += sizeof(argp->fileid);
+ memcpy(&argp->opcode, bp, sizeof(argp->opcode));
+ bp += sizeof(argp->opcode);
+ memcpy(&argp->pgno, bp, sizeof(argp->pgno));
+ bp += sizeof(argp->pgno);
+ memcpy(&argp->pageimage.size, bp, sizeof(u_int32_t));
+ bp += sizeof(u_int32_t);
+ argp->pageimage.data = bp;
+ bp += argp->pageimage.size;
+ memcpy(&argp->pagelsn, bp, sizeof(argp->pagelsn));
+ bp += sizeof(argp->pagelsn);
+ *argpp = argp;
+ return (0);
+}
+
+/*
+ * PUBLIC: int __ham_replace_log
+ * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC: u_int32_t, db_pgno_t, u_int32_t, DB_LSN *,
+ * PUBLIC: int32_t, DBT *, DBT *, u_int32_t));
+ */
+int __ham_replace_log(logp, txnid, ret_lsnp, flags,
+ fileid, pgno, ndx, pagelsn, off, olditem,
+ newitem, makedup)
+ DB_LOG *logp;
+ DB_TXN *txnid;
+ DB_LSN *ret_lsnp;
+ u_int32_t flags;
+ u_int32_t fileid;
+ db_pgno_t pgno;
+ u_int32_t ndx;
+ DB_LSN * pagelsn;
+ int32_t off;
+ DBT *olditem;
+ DBT *newitem;
+ u_int32_t makedup;
+{
+ DBT logrec;
+ DB_LSN *lsnp, null_lsn;
+ u_int32_t zero;
+ u_int32_t rectype, txn_num;
+ int ret;
+ u_int8_t *bp;
+
+ rectype = DB_ham_replace;
+ txn_num = txnid == NULL ? 0 : txnid->txnid;
+ if (txnid == NULL) {
+ null_lsn.file = 0;
+ null_lsn.offset = 0;
+ lsnp = &null_lsn;
+ } else
+ lsnp = &txnid->last_lsn;
+ logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+ + sizeof(fileid)
+ + sizeof(pgno)
+ + sizeof(ndx)
+ + sizeof(*pagelsn)
+ + sizeof(off)
+ + sizeof(u_int32_t) + (olditem == NULL ? 0 : olditem->size)
+ + sizeof(u_int32_t) + (newitem == NULL ? 0 : newitem->size)
+ + sizeof(makedup);
+ if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+ return (ENOMEM);
+
+ bp = logrec.data;
+ memcpy(bp, &rectype, sizeof(rectype));
+ bp += sizeof(rectype);
+ memcpy(bp, &txn_num, sizeof(txn_num));
+ bp += sizeof(txn_num);
+ memcpy(bp, lsnp, sizeof(DB_LSN));
+ bp += sizeof(DB_LSN);
+ memcpy(bp, &fileid, sizeof(fileid));
+ bp += sizeof(fileid);
+ memcpy(bp, &pgno, sizeof(pgno));
+ bp += sizeof(pgno);
+ memcpy(bp, &ndx, sizeof(ndx));
+ bp += sizeof(ndx);
+ if (pagelsn != NULL)
+ memcpy(bp, pagelsn, sizeof(*pagelsn));
+ else
+ memset(bp, 0, sizeof(*pagelsn));
+ bp += sizeof(*pagelsn);
+ memcpy(bp, &off, sizeof(off));
+ bp += sizeof(off);
+ if (olditem == NULL) {
+ zero = 0;
+ memcpy(bp, &zero, sizeof(u_int32_t));
+ bp += sizeof(u_int32_t);
+ } else {
+ memcpy(bp, &olditem->size, sizeof(olditem->size));
+ bp += sizeof(olditem->size);
+ memcpy(bp, olditem->data, olditem->size);
+ bp += olditem->size;
+ }
+ if (newitem == NULL) {
+ zero = 0;
+ memcpy(bp, &zero, sizeof(u_int32_t));
+ bp += sizeof(u_int32_t);
+ } else {
+ memcpy(bp, &newitem->size, sizeof(newitem->size));
+ bp += sizeof(newitem->size);
+ memcpy(bp, newitem->data, newitem->size);
+ bp += newitem->size;
+ }
+ memcpy(bp, &makedup, sizeof(makedup));
+ bp += sizeof(makedup);
+#ifdef DEBUG
+ if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+ fprintf(stderr, "Error in log record length");
+#endif
+ ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+ if (txnid != NULL)
+ txnid->last_lsn = *ret_lsnp;
+ free(logrec.data);
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __ham_replace_print
+ * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__ham_replace_print(notused1, dbtp, lsnp, notused3, notused4)
+ DB_LOG *notused1;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ int notused3;
+ void *notused4;
+{
+ __ham_replace_args *argp;
+ u_int32_t i;
+ int c, ret;
+
+ i = 0;
+ c = 0;
+ notused1 = NULL;
+ notused3 = 0;
+ notused4 = NULL;
+
+ if((ret = __ham_replace_read(dbtp->data, &argp)) != 0)
+ return (ret);
+ printf("[%lu][%lu]ham_replace: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+ (u_long)lsnp->file,
+ (u_long)lsnp->offset,
+ (u_long)argp->type,
+ (u_long)argp->txnid->txnid,
+ (u_long)argp->prev_lsn.file,
+ (u_long)argp->prev_lsn.offset);
+ printf("\tfileid: %lu\n", (u_long)argp->fileid);
+ printf("\tpgno: %lu\n", (u_long)argp->pgno);
+ printf("\tndx: %lu\n", (u_long)argp->ndx);
+ printf("\tpagelsn: [%lu][%lu]\n",
+ (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset);
+ printf("\toff: %ld\n", (long)argp->off);
+ printf("\tolditem: ");
+ for (i = 0; i < argp->olditem.size; i++) {
+ c = ((char *)argp->olditem.data)[i];
+ if (isprint(c) || c == 0xa)
+ putchar(c);
+ else
+ printf("%#x ", c);
+ }
+ printf("\n");
+ printf("\tnewitem: ");
+ for (i = 0; i < argp->newitem.size; i++) {
+ c = ((char *)argp->newitem.data)[i];
+ if (isprint(c) || c == 0xa)
+ putchar(c);
+ else
+ printf("%#x ", c);
+ }
+ printf("\n");
+ printf("\tmakedup: %lu\n", (u_long)argp->makedup);
+ printf("\n");
+ free(argp);
+ return (0);
+}
+
+/*
+ * PUBLIC: int __ham_replace_read __P((void *, __ham_replace_args **));
+ */
+int
+__ham_replace_read(recbuf, argpp)
+ void *recbuf;
+ __ham_replace_args **argpp;
+{
+ __ham_replace_args *argp;
+ u_int8_t *bp;
+
+ argp = (__ham_replace_args *)malloc(sizeof(__ham_replace_args) +
+ sizeof(DB_TXN));
+ if (argp == NULL)
+ return (ENOMEM);
+ argp->txnid = (DB_TXN *)&argp[1];
+ bp = recbuf;
+ memcpy(&argp->type, bp, sizeof(argp->type));
+ bp += sizeof(argp->type);
+ memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
+ bp += sizeof(argp->txnid->txnid);
+ memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+ bp += sizeof(DB_LSN);
+ memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+ bp += sizeof(argp->fileid);
+ memcpy(&argp->pgno, bp, sizeof(argp->pgno));
+ bp += sizeof(argp->pgno);
+ memcpy(&argp->ndx, bp, sizeof(argp->ndx));
+ bp += sizeof(argp->ndx);
+ memcpy(&argp->pagelsn, bp, sizeof(argp->pagelsn));
+ bp += sizeof(argp->pagelsn);
+ memcpy(&argp->off, bp, sizeof(argp->off));
+ bp += sizeof(argp->off);
+ memcpy(&argp->olditem.size, bp, sizeof(u_int32_t));
+ bp += sizeof(u_int32_t);
+ argp->olditem.data = bp;
+ bp += argp->olditem.size;
+ memcpy(&argp->newitem.size, bp, sizeof(u_int32_t));
+ bp += sizeof(u_int32_t);
+ argp->newitem.data = bp;
+ bp += argp->newitem.size;
+ memcpy(&argp->makedup, bp, sizeof(argp->makedup));
+ bp += sizeof(argp->makedup);
+ *argpp = argp;
+ return (0);
+}
+
+/*
+ * PUBLIC: int __ham_newpgno_log
+ * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC: u_int32_t, u_int32_t, db_pgno_t, db_pgno_t,
+ * PUBLIC: u_int32_t, db_pgno_t, u_int32_t, DB_LSN *,
+ * PUBLIC: DB_LSN *));
+ */
+int __ham_newpgno_log(logp, txnid, ret_lsnp, flags,
+ opcode, fileid, pgno, free_pgno, old_type, old_pgno,
+ new_type, pagelsn, metalsn)
+ DB_LOG *logp;
+ DB_TXN *txnid;
+ DB_LSN *ret_lsnp;
+ u_int32_t flags;
+ u_int32_t opcode;
+ u_int32_t fileid;
+ db_pgno_t pgno;
+ db_pgno_t free_pgno;
+ u_int32_t old_type;
+ db_pgno_t old_pgno;
+ u_int32_t new_type;
+ DB_LSN * pagelsn;
+ DB_LSN * metalsn;
+{
+ DBT logrec;
+ DB_LSN *lsnp, null_lsn;
+ u_int32_t rectype, txn_num;
+ int ret;
+ u_int8_t *bp;
+
+ rectype = DB_ham_newpgno;
+ txn_num = txnid == NULL ? 0 : txnid->txnid;
+ if (txnid == NULL) {
+ null_lsn.file = 0;
+ null_lsn.offset = 0;
+ lsnp = &null_lsn;
+ } else
+ lsnp = &txnid->last_lsn;
+ logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+ + sizeof(opcode)
+ + sizeof(fileid)
+ + sizeof(pgno)
+ + sizeof(free_pgno)
+ + sizeof(old_type)
+ + sizeof(old_pgno)
+ + sizeof(new_type)
+ + sizeof(*pagelsn)
+ + sizeof(*metalsn);
+ if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+ return (ENOMEM);
+
+ bp = logrec.data;
+ memcpy(bp, &rectype, sizeof(rectype));
+ bp += sizeof(rectype);
+ memcpy(bp, &txn_num, sizeof(txn_num));
+ bp += sizeof(txn_num);
+ memcpy(bp, lsnp, sizeof(DB_LSN));
+ bp += sizeof(DB_LSN);
+ memcpy(bp, &opcode, sizeof(opcode));
+ bp += sizeof(opcode);
+ memcpy(bp, &fileid, sizeof(fileid));
+ bp += sizeof(fileid);
+ memcpy(bp, &pgno, sizeof(pgno));
+ bp += sizeof(pgno);
+ memcpy(bp, &free_pgno, sizeof(free_pgno));
+ bp += sizeof(free_pgno);
+ memcpy(bp, &old_type, sizeof(old_type));
+ bp += sizeof(old_type);
+ memcpy(bp, &old_pgno, sizeof(old_pgno));
+ bp += sizeof(old_pgno);
+ memcpy(bp, &new_type, sizeof(new_type));
+ bp += sizeof(new_type);
+ if (pagelsn != NULL)
+ memcpy(bp, pagelsn, sizeof(*pagelsn));
+ else
+ memset(bp, 0, sizeof(*pagelsn));
+ bp += sizeof(*pagelsn);
+ if (metalsn != NULL)
+ memcpy(bp, metalsn, sizeof(*metalsn));
+ else
+ memset(bp, 0, sizeof(*metalsn));
+ bp += sizeof(*metalsn);
+#ifdef DEBUG
+ if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+ fprintf(stderr, "Error in log record length");
+#endif
+ ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+ if (txnid != NULL)
+ txnid->last_lsn = *ret_lsnp;
+ free(logrec.data);
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __ham_newpgno_print
+ * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__ham_newpgno_print(notused1, dbtp, lsnp, notused3, notused4)
+ DB_LOG *notused1;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ int notused3;
+ void *notused4;
+{
+ __ham_newpgno_args *argp;
+ u_int32_t i;
+ int c, ret;
+
+ i = 0;
+ c = 0;
+ notused1 = NULL;
+ notused3 = 0;
+ notused4 = NULL;
+
+ if((ret = __ham_newpgno_read(dbtp->data, &argp)) != 0)
+ return (ret);
+ printf("[%lu][%lu]ham_newpgno: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+ (u_long)lsnp->file,
+ (u_long)lsnp->offset,
+ (u_long)argp->type,
+ (u_long)argp->txnid->txnid,
+ (u_long)argp->prev_lsn.file,
+ (u_long)argp->prev_lsn.offset);
+ printf("\topcode: %lu\n", (u_long)argp->opcode);
+ printf("\tfileid: %lu\n", (u_long)argp->fileid);
+ printf("\tpgno: %lu\n", (u_long)argp->pgno);
+ printf("\tfree_pgno: %lu\n", (u_long)argp->free_pgno);
+ printf("\told_type: %lu\n", (u_long)argp->old_type);
+ printf("\told_pgno: %lu\n", (u_long)argp->old_pgno);
+ printf("\tnew_type: %lu\n", (u_long)argp->new_type);
+ printf("\tpagelsn: [%lu][%lu]\n",
+ (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset);
+ printf("\tmetalsn: [%lu][%lu]\n",
+ (u_long)argp->metalsn.file, (u_long)argp->metalsn.offset);
+ printf("\n");
+ free(argp);
+ return (0);
+}
+
+/*
+ * PUBLIC: int __ham_newpgno_read __P((void *, __ham_newpgno_args **));
+ */
+int
+__ham_newpgno_read(recbuf, argpp)
+ void *recbuf;
+ __ham_newpgno_args **argpp;
+{
+ __ham_newpgno_args *argp;
+ u_int8_t *bp;
+
+ argp = (__ham_newpgno_args *)malloc(sizeof(__ham_newpgno_args) +
+ sizeof(DB_TXN));
+ if (argp == NULL)
+ return (ENOMEM);
+ argp->txnid = (DB_TXN *)&argp[1];
+ bp = recbuf;
+ memcpy(&argp->type, bp, sizeof(argp->type));
+ bp += sizeof(argp->type);
+ memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
+ bp += sizeof(argp->txnid->txnid);
+ memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+ bp += sizeof(DB_LSN);
+ memcpy(&argp->opcode, bp, sizeof(argp->opcode));
+ bp += sizeof(argp->opcode);
+ memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+ bp += sizeof(argp->fileid);
+ memcpy(&argp->pgno, bp, sizeof(argp->pgno));
+ bp += sizeof(argp->pgno);
+ memcpy(&argp->free_pgno, bp, sizeof(argp->free_pgno));
+ bp += sizeof(argp->free_pgno);
+ memcpy(&argp->old_type, bp, sizeof(argp->old_type));
+ bp += sizeof(argp->old_type);
+ memcpy(&argp->old_pgno, bp, sizeof(argp->old_pgno));
+ bp += sizeof(argp->old_pgno);
+ memcpy(&argp->new_type, bp, sizeof(argp->new_type));
+ bp += sizeof(argp->new_type);
+ memcpy(&argp->pagelsn, bp, sizeof(argp->pagelsn));
+ bp += sizeof(argp->pagelsn);
+ memcpy(&argp->metalsn, bp, sizeof(argp->metalsn));
+ bp += sizeof(argp->metalsn);
+ *argpp = argp;
+ return (0);
+}
+
+/*
+ * PUBLIC: int __ham_ovfl_log
+ * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC: u_int32_t, db_pgno_t, u_int32_t, db_pgno_t,
+ * PUBLIC: DB_LSN *));
+ */
+int __ham_ovfl_log(logp, txnid, ret_lsnp, flags,
+ fileid, start_pgno, npages, free_pgno, metalsn)
+ DB_LOG *logp;
+ DB_TXN *txnid;
+ DB_LSN *ret_lsnp;
+ u_int32_t flags;
+ u_int32_t fileid;
+ db_pgno_t start_pgno;
+ u_int32_t npages;
+ db_pgno_t free_pgno;
+ DB_LSN * metalsn;
+{
+ DBT logrec;
+ DB_LSN *lsnp, null_lsn;
+ u_int32_t rectype, txn_num;
+ int ret;
+ u_int8_t *bp;
+
+ rectype = DB_ham_ovfl;
+ txn_num = txnid == NULL ? 0 : txnid->txnid;
+ if (txnid == NULL) {
+ null_lsn.file = 0;
+ null_lsn.offset = 0;
+ lsnp = &null_lsn;
+ } else
+ lsnp = &txnid->last_lsn;
+ logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+ + sizeof(fileid)
+ + sizeof(start_pgno)
+ + sizeof(npages)
+ + sizeof(free_pgno)
+ + sizeof(*metalsn);
+ if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+ return (ENOMEM);
+
+ bp = logrec.data;
+ memcpy(bp, &rectype, sizeof(rectype));
+ bp += sizeof(rectype);
+ memcpy(bp, &txn_num, sizeof(txn_num));
+ bp += sizeof(txn_num);
+ memcpy(bp, lsnp, sizeof(DB_LSN));
+ bp += sizeof(DB_LSN);
+ memcpy(bp, &fileid, sizeof(fileid));
+ bp += sizeof(fileid);
+ memcpy(bp, &start_pgno, sizeof(start_pgno));
+ bp += sizeof(start_pgno);
+ memcpy(bp, &npages, sizeof(npages));
+ bp += sizeof(npages);
+ memcpy(bp, &free_pgno, sizeof(free_pgno));
+ bp += sizeof(free_pgno);
+ if (metalsn != NULL)
+ memcpy(bp, metalsn, sizeof(*metalsn));
+ else
+ memset(bp, 0, sizeof(*metalsn));
+ bp += sizeof(*metalsn);
+#ifdef DEBUG
+ if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+ fprintf(stderr, "Error in log record length");
+#endif
+ ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+ if (txnid != NULL)
+ txnid->last_lsn = *ret_lsnp;
+ free(logrec.data);
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __ham_ovfl_print
+ * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__ham_ovfl_print(notused1, dbtp, lsnp, notused3, notused4)
+ DB_LOG *notused1;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ int notused3;
+ void *notused4;
+{
+ __ham_ovfl_args *argp;
+ u_int32_t i;
+ int c, ret;
+
+ i = 0;
+ c = 0;
+ notused1 = NULL;
+ notused3 = 0;
+ notused4 = NULL;
+
+ if((ret = __ham_ovfl_read(dbtp->data, &argp)) != 0)
+ return (ret);
+ printf("[%lu][%lu]ham_ovfl: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+ (u_long)lsnp->file,
+ (u_long)lsnp->offset,
+ (u_long)argp->type,
+ (u_long)argp->txnid->txnid,
+ (u_long)argp->prev_lsn.file,
+ (u_long)argp->prev_lsn.offset);
+ printf("\tfileid: %lu\n", (u_long)argp->fileid);
+ printf("\tstart_pgno: %lu\n", (u_long)argp->start_pgno);
+ printf("\tnpages: %lu\n", (u_long)argp->npages);
+ printf("\tfree_pgno: %lu\n", (u_long)argp->free_pgno);
+ printf("\tmetalsn: [%lu][%lu]\n",
+ (u_long)argp->metalsn.file, (u_long)argp->metalsn.offset);
+ printf("\n");
+ free(argp);
+ return (0);
+}
+
+/*
+ * PUBLIC: int __ham_ovfl_read __P((void *, __ham_ovfl_args **));
+ */
+int
+__ham_ovfl_read(recbuf, argpp)
+ void *recbuf;
+ __ham_ovfl_args **argpp;
+{
+ __ham_ovfl_args *argp;
+ u_int8_t *bp;
+
+ argp = (__ham_ovfl_args *)malloc(sizeof(__ham_ovfl_args) +
+ sizeof(DB_TXN));
+ if (argp == NULL)
+ return (ENOMEM);
+ argp->txnid = (DB_TXN *)&argp[1];
+ bp = recbuf;
+ memcpy(&argp->type, bp, sizeof(argp->type));
+ bp += sizeof(argp->type);
+ memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
+ bp += sizeof(argp->txnid->txnid);
+ memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+ bp += sizeof(DB_LSN);
+ memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+ bp += sizeof(argp->fileid);
+ memcpy(&argp->start_pgno, bp, sizeof(argp->start_pgno));
+ bp += sizeof(argp->start_pgno);
+ memcpy(&argp->npages, bp, sizeof(argp->npages));
+ bp += sizeof(argp->npages);
+ memcpy(&argp->free_pgno, bp, sizeof(argp->free_pgno));
+ bp += sizeof(argp->free_pgno);
+ memcpy(&argp->metalsn, bp, sizeof(argp->metalsn));
+ bp += sizeof(argp->metalsn);
+ *argpp = argp;
+ return (0);
+}
+
+/*
+ * PUBLIC: int __ham_init_print __P((DB_ENV *));
+ */
+int
+__ham_init_print(dbenv)
+ DB_ENV *dbenv;
+{
+ int ret;
+
+ if ((ret = __db_add_recovery(dbenv,
+ __ham_insdel_print, DB_ham_insdel)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery(dbenv,
+ __ham_newpage_print, DB_ham_newpage)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery(dbenv,
+ __ham_splitmeta_print, DB_ham_splitmeta)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery(dbenv,
+ __ham_splitdata_print, DB_ham_splitdata)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery(dbenv,
+ __ham_replace_print, DB_ham_replace)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery(dbenv,
+ __ham_newpgno_print, DB_ham_newpgno)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery(dbenv,
+ __ham_ovfl_print, DB_ham_ovfl)) != 0)
+ return (ret);
+ return (0);
+}
+
+/*
+ * PUBLIC: int __ham_init_recover __P((DB_ENV *));
+ */
+int
+__ham_init_recover(dbenv)
+ DB_ENV *dbenv;
+{
+ int ret;
+
+ if ((ret = __db_add_recovery(dbenv,
+ __ham_insdel_recover, DB_ham_insdel)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery(dbenv,
+ __ham_newpage_recover, DB_ham_newpage)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery(dbenv,
+ __ham_splitmeta_recover, DB_ham_splitmeta)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery(dbenv,
+ __ham_splitdata_recover, DB_ham_splitdata)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery(dbenv,
+ __ham_replace_recover, DB_ham_replace)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery(dbenv,
+ __ham_newpgno_recover, DB_ham_newpgno)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery(dbenv,
+ __ham_ovfl_recover, DB_ham_ovfl)) != 0)
+ return (ret);
+ return (0);
+}
+
diff --git a/db2/hash/hash_conv.c b/db2/hash/hash_conv.c
new file mode 100644
index 0000000000..22901af950
--- /dev/null
+++ b/db2/hash/hash_conv.c
@@ -0,0 +1,101 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ * Sleepycat Software. All rights reserved.
+ */
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)hash_conv.c 10.3 (Sleepycat) 6/21/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "db_swap.h"
+#include "hash.h"
+
+/*
+ * __h_pgin, __ham_pgout --
+ * Convert host-specific page layout to/from the host-independent
+ * format stored on disk.
+ *
+ * PUBLIC: int __ham_pgin __P((db_pgno_t, void *, DBT *));
+ * PUBLIC: int __ham_pgout __P((db_pgno_t, void *, DBT *));
+ */
+int
+__ham_pgin(pg, pp, cookie)
+ db_pgno_t pg;
+ void *pp;
+ DBT *cookie;
+{
+ DB_PGINFO *pginfo;
+ u_int32_t tpgno;
+
+ pginfo = (DB_PGINFO *)cookie->data;
+ tpgno = PGNO((PAGE *)pp);
+ if (pginfo->needswap)
+ M_32_SWAP(tpgno);
+
+ if (pg != PGNO_METADATA && pg != tpgno) {
+ P_INIT(pp, pginfo->db_pagesize,
+ pg, PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
+ return (0);
+ }
+
+ if (!pginfo->needswap)
+ return (0);
+ return (pg == PGNO_METADATA ? __ham_mswap(pp) : __db_pgin(pg, pp));
+}
+
+int
+__ham_pgout(pg, pp, cookie)
+ db_pgno_t pg;
+ void *pp;
+ DBT *cookie;
+{
+ DB_PGINFO *pginfo;
+
+ pginfo = (DB_PGINFO *)cookie->data;
+ if (!pginfo->needswap)
+ return (0);
+ return (pg == PGNO_METADATA ? __ham_mswap(pp) : __db_pgout(pg, pp));
+}
+
+/*
+ * __ham_mswap --
+ * Swap the bytes on the hash metadata page.
+ *
+ * PUBLIC: int __ham_mswap __P((void *));
+ */
+int
+__ham_mswap(pg)
+ void *pg;
+{
+ u_int8_t *p;
+ int i;
+
+ p = (u_int8_t *)pg;
+ SWAP32(p); /* lsn part 1 */
+ SWAP32(p); /* lsn part 2 */
+ SWAP32(p); /* pgno */
+ SWAP32(p); /* magic */
+ SWAP32(p); /* version */
+ SWAP32(p); /* pagesize */
+ SWAP32(p); /* ovfl_point */
+ SWAP32(p); /* last_freed */
+ SWAP32(p); /* max_bucket */
+ SWAP32(p); /* high_mask */
+ SWAP32(p); /* low_mask */
+ SWAP32(p); /* ffactor */
+ SWAP32(p); /* nelem */
+ SWAP32(p); /* h_charkey */
+ SWAP32(p); /* flags */
+ for (i = 0; i < NCACHED; ++i)
+ SWAP32(p); /* spares */
+ return (0);
+}
diff --git a/db2/hash/hash_debug.c b/db2/hash/hash_debug.c
new file mode 100644
index 0000000000..979ddd7b87
--- /dev/null
+++ b/db2/hash/hash_debug.c
@@ -0,0 +1,96 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ * Sleepycat Software. All rights reserved.
+ */
+/*
+ * Copyright (c) 1995
+ * The President and Fellows of Harvard University. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jeremy Rassen.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)hash_debug.c 10.2 (Sleepycat) 6/21/97";
+#endif /* not lint */
+
+#ifdef DEBUG
+/*
+ * PACKAGE: hashing
+ *
+ * DESCRIPTION:
+ * Debug routines.
+ *
+ * ROUTINES:
+ *
+ * External
+ * __dump_bucket
+ */
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <stdio.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "hash.h"
+
+/*
+ * __ham_dump_bucket --
+ *
+ * PUBLIC: #ifdef DEBUG
+ * PUBLIC: void __ham_dump_bucket __P((HTAB *, u_int32_t));
+ * PUBLIC: #endif
+ */
+void
+__ham_dump_bucket(hashp, bucket)
+ HTAB *hashp;
+ u_int32_t bucket;
+{
+ PAGE *p;
+ db_pgno_t pgno;
+ int ret;
+
+ for (pgno = BUCKET_TO_PAGE(hashp, bucket); pgno != PGNO_INVALID;) {
+ if ((ret = memp_fget(hashp->dbp->mpf, &pgno, 0, &p)) != 0)
+ break;
+ (void)__db_prpage(p, 1);
+ pgno = p->next_pgno;
+ (void)memp_fput(hashp->dbp->mpf, p, 0);
+ }
+}
+#endif /* DEBUG */
diff --git a/db2/hash/hash_dup.c b/db2/hash/hash_dup.c
new file mode 100644
index 0000000000..059eec6f92
--- /dev/null
+++ b/db2/hash/hash_dup.c
@@ -0,0 +1,544 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ * Sleepycat Software. All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)hash_dup.c 10.5 (Sleepycat) 7/27/97";
+#endif /* not lint */
+
+/*
+ * PACKAGE: hashing
+ *
+ * DESCRIPTION:
+ * Manipulation of duplicates for the hash package.
+ *
+ * ROUTINES:
+ *
+ * External
+ * __add_dup
+ * Internal
+ */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "db_swap.h"
+#include "hash.h"
+
+static int __ham_check_move __P((HTAB *, HASH_CURSOR *, int32_t));
+static int __ham_dup_convert __P((HTAB *, HASH_CURSOR *));
+static int __ham_make_dup __P((const DBT *, DBT *d, void **, u_int32_t *));
+
+/*
+ * Called from hash_access to add a duplicate key. nval is the new
+ * value that we want to add. The flags correspond to the flag values
+ * to cursor_put indicating where to add the new element.
+ * There are 4 cases.
+ * Case 1: The existing duplicate set already resides on a separate page.
+ * We can use common code for this.
+ * Case 2: The element is small enough to just be added to the existing set.
+ * Case 3: The element is large enough to be a big item, so we're going to
+ * have to push the set onto a new page.
+ * Case 4: The element is large enough to push the duplicate set onto a
+ * separate page.
+ *
+ * PUBLIC: int __ham_add_dup __P((HTAB *, HASH_CURSOR *, DBT *, int));
+ */
+int
+__ham_add_dup(hashp, hcp, nval, flags)
+ HTAB *hashp;
+ HASH_CURSOR *hcp;
+ DBT *nval;
+ int flags;
+{
+ DBT pval, tmp_val;
+ HKEYDATA *hk;
+ u_int32_t del_len, new_size;
+ int ret;
+
+ if (flags == DB_CURRENT && hcp->dpgno == PGNO_INVALID)
+ del_len = hcp->dup_len;
+ else
+ del_len = 0;
+
+ if ((ret = __ham_check_move(hashp, hcp,
+ (int32_t)DUP_SIZE(nval->size) - (int32_t)del_len)) != 0)
+ return (ret);
+
+ /*
+ * Check if resulting duplicate set is going to need to go
+ * onto a separate duplicate page. If so, convert the
+ * duplicate set and add the new one. After conversion,
+ * hcp->dndx is the first free ndx or the index of the
+ * current pointer into the duplicate set.
+ */
+ hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
+ new_size = DUP_SIZE(nval->size) - del_len + LEN_HKEYDATA(hcp->pagep,
+ hashp->hdr->pagesize, H_DATAINDEX(hcp->bndx));
+
+ /*
+ * We convert to off-page duplicates if the item is a big item,
+ * the addition of the new item will make the set large, or
+ * if there isn't enough room on this page to add the next item.
+ */
+ if (hk->type != H_OFFDUP &&
+ (hk->type == H_OFFPAGE || ISBIG(hashp, new_size) ||
+ DUP_SIZE(nval->size) - del_len > P_FREESPACE(hcp->pagep))) {
+
+ if ((ret = __ham_dup_convert(hashp, hcp)) != 0)
+ return (ret);
+ else
+ hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
+ }
+
+ /* There are two separate cases here: on page and off page. */
+ if (hk->type != H_OFFDUP) {
+ if (hk->type != H_DUPLICATE) {
+ hk->type = H_DUPLICATE;
+ pval.flags = 0;
+ pval.data = hk->data;
+ pval.size = LEN_HDATA(hcp->pagep, hashp->hdr->pagesize,
+ hcp->bndx);
+ if ((ret = __ham_make_dup(&pval, &tmp_val, &hcp->big_data,
+ &hcp->big_datalen)) != 0 ||
+ (ret = __ham_replpair(hashp, hcp, &tmp_val, 1)) != 0)
+ return (ret);
+ }
+
+ /* Now make the new entry a duplicate. */
+ if ((ret = __ham_make_dup(nval,
+ &tmp_val, &hcp->big_data, &hcp->big_datalen)) != 0)
+ return (ret);
+
+ tmp_val.dlen = 0;
+ switch (flags) { /* On page. */
+ case DB_KEYFIRST:
+ tmp_val.doff = 0;
+ break;
+ case DB_KEYLAST:
+ tmp_val.doff = LEN_HDATA(hcp->pagep,
+ hashp->hdr->pagesize, hcp->bndx);
+ break;
+ case DB_CURRENT:
+ tmp_val.doff = hcp->dup_off;
+ tmp_val.dlen = DUP_SIZE(hcp->dup_len);
+ break;
+ case DB_BEFORE:
+ tmp_val.doff = hcp->dup_off;
+ break;
+ case DB_AFTER:
+ tmp_val.doff = hcp->dup_off + DUP_SIZE(hcp->dup_len);
+ break;
+ }
+ /* Add the duplicate. */
+ ret = __ham_replpair(hashp, hcp, &tmp_val, 0);
+ if (ret == 0)
+ ret = __ham_dirty_page(hashp, hcp->pagep);
+ __ham_c_update(hashp, hcp, hcp->pgno, tmp_val.size, 1, 1);
+ return (ret);
+ }
+
+ /* If we get here, then we're on duplicate pages. */
+ if (hcp->dpgno == PGNO_INVALID) {
+ memcpy(&hcp->dpgno,
+ (u_int8_t *)hk + SSZ(HOFFDUP, pgno), sizeof(db_pgno_t));
+ hcp->dndx = 0;
+ }
+
+ switch (flags) {
+ case DB_KEYFIRST:
+ /*
+ * The only way that we are already on a dup page is
+ * if we just converted the on-page representation.
+ * In that case, we've only got one page of duplicates.
+ */
+ if (hcp->dpagep == NULL && (ret =
+ __db_dend(hashp->dbp, hcp->dpgno, &hcp->dpagep)) != 0)
+ return (ret);
+ hcp->dndx = 0;
+ break;
+ case DB_KEYLAST:
+ if (hcp->dpagep == NULL && (ret =
+ __db_dend(hashp->dbp, hcp->dpgno, &hcp->dpagep)) != 0)
+ return (ret);
+ hcp->dpgno = PGNO(hcp->dpagep);
+ hcp->dndx = NUM_ENT(hcp->dpagep);
+ break;
+ case DB_CURRENT:
+ if ((ret = __db_ditem(hashp->dbp, hcp->dpagep, hcp->dndx,
+ BKEYDATA_SIZE(GET_BKEYDATA(hcp->dpagep, hcp->dndx)->len)))
+ != 0)
+ return (ret);
+ break;
+ case DB_BEFORE: /* The default behavior is correct. */
+ break;
+ case DB_AFTER:
+ hcp->dndx++;
+ break;
+ }
+
+ ret = __db_dput(hashp->dbp,
+ nval, &hcp->dpagep, &hcp->dndx, __ham_overflow_page);
+ hcp->pgno = PGNO(hcp->pagep);
+ __ham_c_update(hashp, hcp, hcp->pgno, nval->size, 1, 1);
+ return (ret);
+}
+
+/*
+ * Convert an on-page set of duplicates to an offpage set of duplicates.
+ */
+static int
+__ham_dup_convert(hashp, hcp)
+ HTAB *hashp;
+ HASH_CURSOR *hcp;
+{
+ BOVERFLOW bo;
+ DBT dbt;
+ HOFFPAGE ho;
+ db_indx_t dndx, len;
+ int ret;
+ u_int8_t *p, *pend;
+
+ /*
+ * Create a new page for the duplicates.
+ */
+ if ((ret =
+ __ham_overflow_page(hashp->dbp, P_DUPLICATE, &hcp->dpagep)) != 0)
+ return (ret);
+ hcp->dpagep->type = P_DUPLICATE;
+ hcp->dpgno = PGNO(hcp->dpagep);
+
+ /*
+ * Now put the duplicates onto the new page.
+ */
+ dbt.flags = 0;
+ switch (((HKEYDATA *)H_PAIRDATA(hcp->pagep, hcp->bndx))->type) {
+ case H_KEYDATA:
+ /* Simple case, one key on page; move it to dup page. */
+ dndx = 0;
+ dbt.size =
+ LEN_HDATA(hcp->pagep, hashp->hdr->pagesize, hcp->bndx);
+ dbt.data =
+ ((HKEYDATA *)H_PAIRDATA(hcp->pagep, hcp->bndx))->data;
+ ret = __db_pitem(hashp->dbp, hcp->dpagep,
+ (u_int32_t)dndx, BKEYDATA_SIZE(dbt.size), NULL, &dbt);
+ if (ret == 0)
+ __ham_dirty_page(hashp, hcp->dpagep);
+ break;
+ case H_OFFPAGE:
+ /* Simple case, one key on page; move it to dup page. */
+ dndx = 0;
+ memcpy(&ho,
+ P_ENTRY(hcp->pagep, H_DATAINDEX(hcp->bndx)), HOFFPAGE_SIZE);
+ bo.deleted = 0;
+ bo.type = ho.type;
+ bo.pgno = ho.pgno;
+ bo.tlen = ho.tlen;
+ dbt.size = BOVERFLOW_SIZE;
+ dbt.data = &bo;
+
+ ret = __db_pitem(hashp->dbp, hcp->dpagep,
+ (u_int32_t)dndx, dbt.size, &dbt, NULL);
+ if (ret == 0)
+ __ham_dirty_page(hashp, hcp->dpagep);
+ break;
+ case H_DUPLICATE:
+ p = ((HKEYDATA *)H_PAIRDATA(hcp->pagep, hcp->bndx))->data;
+ pend = p +
+ LEN_HDATA(hcp->pagep, hashp->hdr->pagesize, hcp->bndx);
+
+ for (dndx = 0; p < pend; dndx++) {
+ memcpy(&len, p, sizeof(db_indx_t));
+ dbt.size = len;
+ p += sizeof(db_indx_t);
+ dbt.data = p;
+ p += len + sizeof(db_indx_t);
+ ret = __db_dput(hashp->dbp, &dbt,
+ &hcp->dpagep, &dndx, __ham_overflow_page);
+ if (ret != 0)
+ break;
+ }
+ break;
+ default:
+ ret = __db_pgfmt(hashp->dbp, (u_long)hcp->pgno);
+ }
+ if (ret == 0) {
+ /*
+ * Now attach this to the source page in place of
+ * the old duplicate item.
+ */
+ __ham_move_offpage(hashp, hcp->pagep,
+ (u_int32_t)H_DATAINDEX(hcp->bndx), hcp->dpgno);
+
+ /* Can probably just do a "put" here. */
+ ret = __ham_dirty_page(hashp, hcp->pagep);
+ } else {
+ (void)__ham_del_page(hashp->dbp, hcp->dpagep);
+ hcp->dpagep = NULL;
+ }
+ return (ret);
+}
+
+static int
+__ham_make_dup(notdup, dup, bufp, sizep)
+ const DBT *notdup;
+ DBT *dup;
+ void **bufp;
+ u_int32_t *sizep;
+{
+ db_indx_t tsize, item_size;
+ int ret;
+ u_int8_t *p;
+
+ item_size = (db_indx_t)notdup->size;
+ tsize = DUP_SIZE(item_size);
+ if ((ret = __ham_init_dbt(dup, tsize, bufp, sizep)) != 0)
+ return (ret);
+
+ dup->dlen = 0;
+ dup->flags = notdup->flags;
+ F_SET(dup, DB_DBT_PARTIAL);
+
+ p = dup->data;
+ memcpy(p, &item_size, sizeof(db_indx_t));
+ p += sizeof(db_indx_t);
+ memcpy(p, notdup->data, notdup->size);
+ p += notdup->size;
+ memcpy(p, &item_size, sizeof(db_indx_t));
+
+ dup->doff = 0;
+ dup->dlen = notdup->size;
+
+ return (0);
+}
+
+static int
+__ham_check_move(hashp, hcp, add_len)
+ HTAB *hashp;
+ HASH_CURSOR *hcp;
+ int32_t add_len;
+{
+ DBT k, d;
+ DB_LSN new_lsn;
+ HKEYDATA *hk;
+ PAGE *next_pagep;
+ db_pgno_t next_pgno;
+ int rectype, ret;
+ u_int32_t new_datalen, old_len;
+
+ /*
+ * Check if we can do whatever we need to on this page. If not,
+ * then we'll have to move the current element to a new page.
+ */
+
+ hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
+
+ /*
+ * If the item is already off page duplicates or an offpage item,
+ * then we know we can do whatever we need to do in-place
+ */
+ if (hk->type == H_OFFDUP || hk->type == H_OFFPAGE)
+ return (0);
+
+ old_len =
+ LEN_HITEM(hcp->pagep, hashp->hdr->pagesize, H_DATAINDEX(hcp->bndx));
+ new_datalen = old_len - HKEYDATA_SIZE(0) + add_len;
+
+ /*
+ * We need to add a new page under two conditions:
+ * 1. The addition makes the total data length cross the BIG
+ * threshold and the OFFDUP structure won't fit on this page.
+ * 2. The addition does not make the total data cross the
+ * threshold, but the new data won't fit on the page.
+ * If neither of these is true, then we can return.
+ */
+ if (ISBIG(hashp, new_datalen) && (old_len > HOFFDUP_SIZE ||
+ HOFFDUP_SIZE - old_len <= P_FREESPACE(hcp->pagep)))
+ return (0);
+
+ if (!ISBIG(hashp, new_datalen) &&
+ add_len <= (int32_t)P_FREESPACE(hcp->pagep))
+ return (0);
+
+ /*
+ * If we get here, then we need to move the item to a new page.
+ * Check if there are more pages in the chain.
+ */
+
+ new_datalen = ISBIG(hashp, new_datalen) ?
+ HOFFDUP_SIZE : HKEYDATA_SIZE(new_datalen);
+
+ next_pagep = NULL;
+ for (next_pgno = NEXT_PGNO(hcp->pagep); next_pgno != PGNO_INVALID;
+ next_pgno = NEXT_PGNO(next_pagep)) {
+ if (next_pagep != NULL &&
+ (ret = __ham_put_page(hashp->dbp, next_pagep, 0)) != 0)
+ return (ret);
+
+ if ((ret = __ham_get_page(hashp->dbp, next_pgno, &next_pagep)) != 0)
+ return (ret);
+
+ if (P_FREESPACE(next_pagep) >= new_datalen)
+ break;
+ }
+
+ /* No more pages, add one. */
+ if (next_pagep == NULL &&
+ (ret = __ham_add_ovflpage(hashp, hcp->pagep, 0, &next_pagep)) != 0)
+ return (ret);
+
+ /* Add new page at the end of the chain. */
+ if (P_FREESPACE(next_pagep) < new_datalen &&
+ (ret = __ham_add_ovflpage(hashp, next_pagep, 1, &next_pagep)) != 0)
+ return (ret);
+
+ /* Copy the item to the new page. */
+ if (DB_LOGGING(hashp->dbp)) {
+ rectype = PUTPAIR;
+ k.flags = 0;
+ d.flags = 0;
+ if (H_PAIRKEY(hcp->pagep, hcp->bndx)->type == H_OFFPAGE) {
+ rectype |= PAIR_KEYMASK;
+ k.data = H_PAIRKEY(hcp->pagep, hcp->bndx);
+ k.size = HOFFPAGE_SIZE;
+ } else {
+ k.data = H_PAIRKEY(hcp->pagep, hcp->bndx)->data;
+ k.size = LEN_HKEY(hcp->pagep,
+ hashp->hdr->pagesize, hcp->bndx);
+ }
+
+ if (hk->type == H_OFFPAGE) {
+ rectype |= PAIR_DATAMASK;
+ d.data = H_PAIRDATA(hcp->pagep, hcp->bndx);
+ d.size = HOFFPAGE_SIZE;
+ } else {
+ d.data = H_PAIRDATA(hcp->pagep, hcp->bndx)->data;
+ d.size = LEN_HDATA(hcp->pagep,
+ hashp->hdr->pagesize, hcp->bndx);
+ }
+
+
+ if ((ret = __ham_insdel_log(hashp->dbp->dbenv->lg_info,
+ (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, rectype,
+ hashp->dbp->log_fileid, PGNO(next_pagep),
+ (u_int32_t)H_NUMPAIRS(next_pagep), &LSN(next_pagep),
+ &k, &d)) != 0)
+ return (ret);
+
+ /* Move lsn onto page. */
+ LSN(next_pagep) = new_lsn; /* Structure assignment. */
+ }
+
+ __ham_copy_item(hashp, hcp->pagep, H_KEYINDEX(hcp->bndx), next_pagep);
+ __ham_copy_item(hashp, hcp->pagep, H_DATAINDEX(hcp->bndx), next_pagep);
+
+ /* Now delete the pair from the current page. */
+ ret = __ham_del_pair(hashp, hcp);
+
+ (void)__ham_put_page(hashp->dbp, hcp->pagep, 1);
+ hcp->pagep = next_pagep;
+ hcp->pgno = PGNO(hcp->pagep);
+ hcp->bndx = H_NUMPAIRS(hcp->pagep) - 1;
+ F_SET(hcp, H_EXPAND);
+ return (ret);
+}
+
+/*
+ * Replace an onpage set of duplicates with the OFFDUP structure that
+ * references the duplicate page.
+ * XXX This is really just a special case of __onpage_replace; we should
+ * probably combine them.
+ * PUBLIC: void __ham_move_offpage __P((HTAB *, PAGE *, u_int32_t, db_pgno_t));
+ */
+void
+__ham_move_offpage(hashp, pagep, ndx, pgno)
+ HTAB *hashp;
+ PAGE *pagep;
+ u_int32_t ndx;
+ db_pgno_t pgno;
+{
+ DBT new_dbt;
+ DBT old_dbt;
+ HOFFDUP od;
+ db_indx_t i;
+ int32_t shrink;
+ u_int8_t *src;
+
+ od.type = H_OFFDUP;
+ od.pgno = pgno;
+
+ if (DB_LOGGING(hashp->dbp)) {
+ new_dbt.data = &od;
+ new_dbt.size = HOFFDUP_SIZE;
+ old_dbt.data = P_ENTRY(pagep, ndx);
+ old_dbt.size = LEN_HITEM(pagep, hashp->hdr->pagesize, ndx);
+ (void)__ham_replace_log(hashp->dbp->dbenv->lg_info,
+ (DB_TXN *)hashp->dbp->txn, &LSN(pagep), 0,
+ hashp->dbp->log_fileid, PGNO(pagep), (u_int32_t)ndx,
+ &LSN(pagep), -1, &old_dbt, &new_dbt, 0);
+ }
+
+ shrink =
+ LEN_HITEM(pagep, hashp->hdr->pagesize, ndx) - HOFFDUP_SIZE;
+
+ if (shrink != 0) {
+ /* Copy data. */
+ src = (u_int8_t *)(pagep) + HOFFSET(pagep);
+ memmove(src + shrink, src, pagep->inp[ndx] - HOFFSET(pagep));
+ HOFFSET(pagep) += shrink;
+
+ /* Update index table. */
+ for (i = ndx; i < NUM_ENT(pagep); i++)
+ pagep->inp[i] += shrink;
+ }
+
+ /* Now copy the offdup entry onto the page. */
+ memcpy(P_ENTRY(pagep, ndx), &od, HOFFDUP_SIZE);
+}
diff --git a/db2/hash/hash_func.c b/db2/hash/hash_func.c
new file mode 100644
index 0000000000..2ef47afb57
--- /dev/null
+++ b/db2/hash/hash_func.c
@@ -0,0 +1,219 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ * Sleepycat Software. All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993
+ * Margo Seltzer. All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)hash_func.c 10.6 (Sleepycat) 7/26/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "hash.h"
+
+/*
+ * __ham_func2 --
+ * Phong Vo's linear congruential hash.
+ *
+ * PUBLIC: u_int32_t __ham_func2 __P((const void *, u_int32_t));
+ */
+#define dcharhash(h, c) ((h) = 0x63c63cd9*(h) + 0x9c39c33d + (c))
+
+u_int32_t
+__ham_func2(key, len)
+ const void *key;
+ u_int32_t len;
+{
+ const u_int8_t *e, *k;
+ u_int32_t h;
+ u_int8_t c;
+
+ k = key;
+ e = k + len;
+ for (h = 0; k != e;) {
+ c = *k++;
+ if (!c && k > e)
+ break;
+ dcharhash(h, c);
+ }
+ return (h);
+}
+
+/*
+ * __ham_func3 --
+ * Ozan Yigit's original sdbm hash.
+ *
+ * Ugly, but fast. Break the string up into 8 byte units. On the first time
+ * through the loop get the "leftover bytes" (strlen % 8). On every other
+ * iteration, perform 8 HASHC's so we handle all 8 bytes. Essentially, this
+ * saves us 7 cmp & branch instructions.
+ *
+ * PUBLIC: u_int32_t __ham_func3 __P((const void *, u_int32_t));
+ */
+u_int32_t
+__ham_func3(key, len)
+ const void *key;
+ u_int32_t len;
+{
+ const u_int8_t *k;
+ u_int32_t n, loop;
+
+ if (len == 0)
+ return (0);
+
+#define HASHC n = *k++ + 65599 * n
+ n = 0;
+ k = key;
+
+ loop = (len + 8 - 1) >> 3;
+ switch (len & (8 - 1)) {
+ case 0:
+ do {
+ HASHC;
+ case 7:
+ HASHC;
+ case 6:
+ HASHC;
+ case 5:
+ HASHC;
+ case 4:
+ HASHC;
+ case 3:
+ HASHC;
+ case 2:
+ HASHC;
+ case 1:
+ HASHC;
+ } while (--loop);
+ }
+ return (n);
+}
+
+/*
+ * __ham_func4 --
+ * Chris Torek's hash function. Although this function performs only
+ * slightly worse than __ham_func5 on strings, it performs horribly on
+ * numbers.
+ *
+ * PUBLIC: u_int32_t __ham_func4 __P((const void *, u_int32_t));
+ */
+u_int32_t
+__ham_func4(key, len)
+ const void *key;
+ u_int32_t len;
+{
+ const u_int8_t *k;
+ u_int32_t h, loop;
+
+ if (len == 0)
+ return (0);
+
+#define HASH4a h = (h << 5) - h + *k++;
+#define HASH4b h = (h << 5) + h + *k++;
+#define HASH4 HASH4b
+ h = 0;
+ k = key;
+
+ loop = (len + 8 - 1) >> 3;
+ switch (len & (8 - 1)) {
+ case 0:
+ do {
+ HASH4;
+ case 7:
+ HASH4;
+ case 6:
+ HASH4;
+ case 5:
+ HASH4;
+ case 4:
+ HASH4;
+ case 3:
+ HASH4;
+ case 2:
+ HASH4;
+ case 1:
+ HASH4;
+ } while (--loop);
+ }
+ return (h);
+}
+
+/*
+ * Fowler/Noll/Vo hash
+ *
+ * The basis of the hash algorithm was taken from an idea sent by email to the
+ * IEEE Posix P1003.2 mailing list from Phong Vo (kpv@research.att.com) and
+ * Glenn Fowler (gsf@research.att.com). Landon Curt Noll (chongo@toad.com)
+ * later improved on their algorithm.
+ *
+ * The magic is in the interesting relationship between the special prime
+ * 16777619 (2^24 + 403) and 2^32 and 2^8.
+ *
+ * This hash produces the fewest collisions of any function that we've seen so
+ * far, and works well on both numbers and strings.
+ *
+ * PUBLIC: u_int32_t __ham_func5 __P((const void *, u_int32_t));
+ */
+u_int32_t
+__ham_func5(key, len)
+ const void *key;
+ u_int32_t len;
+{
+ const u_int8_t *k, *e;
+ u_int32_t h;
+
+ k = key;
+ e = k + len;
+ for (h = 0; k < e; ++k) {
+ h *= 16777619;
+ h ^= *k;
+ }
+ return (h);
+}
diff --git a/db2/hash/hash_page.c b/db2/hash/hash_page.c
new file mode 100644
index 0000000000..68c31b14f9
--- /dev/null
+++ b/db2/hash/hash_page.c
@@ -0,0 +1,1775 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ * Sleepycat Software. All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994
+ * Margo Seltzer. All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)hash_page.c 10.18 (Sleepycat) 8/21/97";
+#endif /* not lint */
+
+
+/*
+ * PACKAGE: hashing
+ *
+ * DESCRIPTION:
+ * Page manipulation for hashing package.
+ *
+ * ROUTINES:
+ *
+ * External
+ * __get_page
+ * __add_ovflpage
+ * __overflow_page
+ * Internal
+ * open_temp
+ */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "db_swap.h"
+#include "hash.h"
+
+static int __ham_lock_bucket __P((DB *, HASH_CURSOR *, db_lockmode_t));
+
+#ifdef DEBUG_SLOW
+static void account_page(HTAB *, db_pgno_t, int);
+#endif
+
+/*
+ * PUBLIC: int __ham_item __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
+ */
+int
+__ham_item(hashp, cursorp, mode)
+ HTAB *hashp;
+ HASH_CURSOR *cursorp;
+ db_lockmode_t mode;
+{
+ db_pgno_t next_pgno;
+ int ret;
+
+ if (F_ISSET(cursorp, H_DELETED))
+ return (EINVAL);
+ F_CLR(cursorp, H_OK | H_NOMORE);
+
+ /* Check if we need to get a page for this cursor. */
+ if ((ret = __ham_get_cpage(hashp, cursorp, mode)) != 0)
+ return (ret);
+
+ /* Check if we are looking for space in which to insert an item. */
+ if (cursorp->seek_size && cursorp->seek_found_page == PGNO_INVALID
+ && cursorp->seek_size < P_FREESPACE(cursorp->pagep))
+ cursorp->seek_found_page = cursorp->pgno;
+
+ /* Check if we need to go on to the next page. */
+ if (F_ISSET(cursorp, H_ISDUP) && cursorp->dpgno == PGNO_INVALID)
+ /*
+ * ISDUP is set, and offset is at the beginning of the datum.
+ * We need to grab the length of the datum, then set the datum
+ * pointer to be the beginning of the datum.
+ */
+ memcpy(&cursorp->dup_len,
+ H_PAIRDATA(cursorp->pagep, cursorp->bndx)->data +
+ cursorp->dup_off, sizeof(db_indx_t));
+ else if (F_ISSET(cursorp, H_ISDUP)) {
+ /* Make sure we're not about to run off the page. */
+ if (cursorp->dpagep == NULL && (ret = __ham_get_page(hashp->dbp,
+ cursorp->dpgno, &cursorp->dpagep)) != 0)
+ return (ret);
+
+ if (cursorp->dndx >= NUM_ENT(cursorp->dpagep)) {
+ if (NEXT_PGNO(cursorp->dpagep) == PGNO_INVALID) {
+ if ((ret = __ham_put_page(hashp->dbp,
+ cursorp->dpagep, 0)) != 0)
+ return (ret);
+ F_CLR(cursorp, H_ISDUP);
+ cursorp->dpagep = NULL;
+ cursorp->dpgno = PGNO_INVALID;
+ cursorp->dndx = NDX_INVALID;
+ cursorp->bndx++;
+ } else if ((ret = __ham_next_cpage(hashp, cursorp,
+ NEXT_PGNO(cursorp->dpagep), 0, H_ISDUP)) != 0)
+ return (ret);
+ }
+ }
+
+ if (cursorp->bndx >= (db_indx_t)H_NUMPAIRS(cursorp->pagep)) {
+ /* Fetch next page. */
+ if (NEXT_PGNO(cursorp->pagep) == PGNO_INVALID) {
+ F_SET(cursorp, H_NOMORE);
+ if (cursorp->dpagep != NULL &&
+ (ret = __ham_put_page(hashp->dbp,
+ cursorp->dpagep, 0)) != 0)
+ return (ret);
+ cursorp->dpgno = PGNO_INVALID;
+ return (DB_NOTFOUND);
+ }
+ next_pgno = NEXT_PGNO(cursorp->pagep);
+ cursorp->bndx = 0;
+ if ((ret = __ham_next_cpage(hashp,
+ cursorp, next_pgno, 0, 0)) != 0)
+ return (ret);
+ }
+
+ F_SET(cursorp, H_OK);
+ return (0);
+}
+
+/*
+ * PUBLIC: int __ham_item_reset __P((HTAB *, HASH_CURSOR *));
+ */
+int
+__ham_item_reset(hashp, cursorp)
+ HTAB *hashp;
+ HASH_CURSOR *cursorp;
+{
+ int ret;
+
+ if (cursorp->pagep)
+ ret = __ham_put_page(hashp->dbp, cursorp->pagep, 0);
+ else
+ ret = 0;
+
+ __ham_item_init(cursorp);
+ return (ret);
+}
+
+/*
+ * PUBLIC: void __ham_item_init __P((HASH_CURSOR *));
+ */
+void
+__ham_item_init(cursorp)
+ HASH_CURSOR *cursorp;
+{
+ cursorp->pagep = NULL;
+ cursorp->bucket = BUCKET_INVALID;
+ cursorp->lock = 0;
+ cursorp->bndx = NDX_INVALID;
+ cursorp->pgno = PGNO_INVALID;
+ cursorp->dpgno = PGNO_INVALID;
+ cursorp->dndx = NDX_INVALID;
+ cursorp->dpagep = NULL;
+ cursorp->flags = 0;
+ cursorp->seek_size = 0;
+ cursorp->seek_found_page = PGNO_INVALID;
+}
+
+/*
+ * PUBLIC: int __ham_item_done __P((HTAB *, HASH_CURSOR *, int));
+ */
+int
+__ham_item_done(hashp, cursorp, dirty)
+ HTAB *hashp;
+ HASH_CURSOR *cursorp;
+ int dirty;
+{
+ int ret, t_ret;
+
+ t_ret = ret = 0;
+
+ if (cursorp->pagep)
+ ret = __ham_put_page(hashp->dbp, cursorp->pagep,
+ dirty && cursorp->dpagep == NULL);
+ cursorp->pagep = NULL;
+
+ if (cursorp->dpagep)
+ t_ret = __ham_put_page(hashp->dbp, cursorp->dpagep, dirty);
+ cursorp->dpagep = NULL;
+
+ if (ret == 0 && t_ret != 0)
+ ret = t_ret;
+
+ /*
+ * If we are running with transactions, then we must
+ * not relinquish locks explicitly.
+ */
+ if (cursorp->lock && hashp->dbp->txn == NULL)
+ t_ret = lock_put(hashp->dbp->dbenv->lk_info, cursorp->lock);
+ cursorp->lock = 0;
+
+
+ /*
+ * We don't throw out the page number since we might want to
+ * continue getting on this page.
+ */
+ return (ret != 0 ? ret : t_ret);
+}
+
+/*
+ * Returns the last item in a bucket.
+ *
+ * PUBLIC: int __ham_item_last __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
+ */
+int
+__ham_item_last(hashp, cursorp, mode)
+ HTAB *hashp;
+ HASH_CURSOR *cursorp;
+ db_lockmode_t mode;
+{
+ int ret;
+
+ if ((ret = __ham_item_reset(hashp, cursorp)) != 0)
+ return (ret);
+
+ cursorp->bucket = hashp->hdr->max_bucket;
+ F_SET(cursorp, H_OK);
+ return (__ham_item_prev(hashp, cursorp, mode));
+}
+/*
+ * PUBLIC: int __ham_item_first __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
+ */
+int
+__ham_item_first(hashp, cursorp, mode)
+ HTAB *hashp;
+ HASH_CURSOR *cursorp;
+ db_lockmode_t mode;
+{
+ int ret;
+
+ if ((ret = __ham_item_reset(hashp, cursorp)) != 0)
+ return (ret);
+ F_SET(cursorp, H_OK);
+ cursorp->bucket = 0;
+ return (__ham_item_next(hashp, cursorp, mode));
+}
+
+/*
+ * Returns a pointer to key/data pair on a page. In the case of bigkeys,
+ * just returns the page number and index of the bigkey pointer pair.
+ *
+ * PUBLIC: int __ham_item_prev __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
+ */
+int
+__ham_item_prev(hashp, cursorp, mode)
+ HTAB *hashp;
+ HASH_CURSOR *cursorp;
+ db_lockmode_t mode;
+{
+ db_pgno_t next_pgno;
+ int ret;
+
+ /*
+ * There are N cases for backing up in a hash file.
+ * Case 1: In the middle of a page, no duplicates, just dec the index.
+ * Case 2: In the middle of a duplicate set, back up one.
+ * Case 3: At the beginning of a duplicate set, get out of set and
+ * back up to next key.
+ * Case 4: At the beginning of a page; go to previous page.
+ * Case 5: At the beginning of a bucket; go to prev bucket.
+ */
+ F_CLR(cursorp, H_OK | H_NOMORE | H_DELETED);
+
+ /*
+ * First handle the duplicates. Either you'll get the key here
+ * or you'll exit the duplicate set and drop into the code below
+ * to handle backing up through keys.
+ */
+ if (F_ISSET(cursorp, H_ISDUP)) {
+ if (cursorp->dpgno == PGNO_INVALID) {
+ /* Duplicates are on-page. */
+ if (cursorp->dup_off != 0)
+ if ((ret = __ham_get_cpage(hashp,
+ cursorp, mode)) != 0)
+ return (ret);
+ else {
+ HASH_CURSOR *h;
+ h = cursorp;
+ memcpy(&h->dup_len,
+ H_PAIRDATA(h->pagep, h->bndx)->data
+ + h->dup_off - sizeof(db_indx_t),
+ sizeof(db_indx_t));
+ cursorp->dup_off -=
+ DUP_SIZE(cursorp->dup_len);
+ cursorp->dndx--;
+ return (__ham_item(hashp,
+ cursorp, mode));
+ }
+ } else if (cursorp->dndx > 0) { /* Duplicates are off-page. */
+ cursorp->dndx--;
+ return (__ham_item(hashp, cursorp, mode));
+ } else if ((ret = __ham_get_cpage(hashp, cursorp, mode)) != 0)
+ return (ret);
+ else if (PREV_PGNO(cursorp->dpagep) == PGNO_INVALID) {
+ F_CLR(cursorp, H_ISDUP); /* End of dups */
+ cursorp->dpgno = PGNO_INVALID;
+ if (cursorp->dpagep != NULL)
+ (void)__ham_put_page(hashp->dbp,
+ cursorp->dpagep, 0);
+ cursorp->dpagep = NULL;
+ } else if ((ret = __ham_next_cpage(hashp, cursorp,
+ PREV_PGNO(cursorp->dpagep), 0, H_ISDUP)) != 0)
+ return (ret);
+ else {
+ cursorp->dndx = NUM_ENT(cursorp->pagep) - 1;
+ return (__ham_item(hashp, cursorp, mode));
+ }
+ }
+
+ /*
+ * If we get here, we are not in a duplicate set, and just need
+ * to back up the cursor. There are still three cases:
+ * midpage, beginning of page, beginning of bucket.
+ */
+
+ if (cursorp->bndx == 0) { /* Beginning of page. */
+ if ((ret = __ham_get_cpage(hashp, cursorp, mode)) != 0)
+ return (ret);
+ cursorp->pgno = PREV_PGNO(cursorp->pagep);
+ if (cursorp->pgno == PGNO_INVALID) {
+ /* Beginning of bucket. */
+ F_SET(cursorp, H_NOMORE);
+ return (DB_NOTFOUND);
+ } else if ((ret = __ham_next_cpage(hashp,
+ cursorp, cursorp->pgno, 0, 0)) != 0)
+ return (ret);
+ else
+ cursorp->bndx = H_NUMPAIRS(cursorp->pagep);
+ }
+
+ /*
+ * Either we've got the cursor set up to be decremented, or we
+ * have to find the end of a bucket.
+ */
+ if (cursorp->bndx == NDX_INVALID) {
+ if (cursorp->pagep == NULL)
+ next_pgno = BUCKET_TO_PAGE(hashp, cursorp->bucket);
+ else
+ goto got_page;
+
+ do {
+ if ((ret = __ham_next_cpage(hashp,
+ cursorp, next_pgno, 0, 0)) != 0)
+ return (ret);
+got_page: next_pgno = NEXT_PGNO(cursorp->pagep);
+ cursorp->bndx = H_NUMPAIRS(cursorp->pagep);
+ } while (next_pgno != PGNO_INVALID);
+
+ if (cursorp->bndx == 0) {
+ /* Bucket was empty. */
+ F_SET(cursorp, H_NOMORE);
+ return (DB_NOTFOUND);
+ }
+ }
+
+ cursorp->bndx--;
+
+ return (__ham_item(hashp, cursorp, mode));
+}
+
+/*
+ * Sets the cursor to the next key/data pair on a page.
+ *
+ * PUBLIC: int __ham_item_next __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
+ */
+int
+__ham_item_next(hashp, cursorp, mode)
+ HTAB *hashp;
+ HASH_CURSOR *cursorp;
+ db_lockmode_t mode;
+{
+ /*
+ * Deleted on-page duplicates are a weird case. If we delete the last
+ * one, then our cursor is at the very end of a duplicate set and
+ * we actually need to go on to the next key.
+ */
+ if (F_ISSET(cursorp, H_DELETED)) {
+ if (cursorp->bndx != NDX_INVALID &&
+ F_ISSET(cursorp, H_ISDUP) &&
+ cursorp->dpgno == PGNO_INVALID &&
+ cursorp->dup_tlen == cursorp->dup_off) {
+ F_CLR(cursorp, H_ISDUP);
+ cursorp->dpgno = PGNO_INVALID;
+ cursorp->bndx++;
+ }
+ F_CLR(cursorp, H_DELETED);
+ } else if (cursorp->bndx == NDX_INVALID) {
+ cursorp->bndx = 0;
+ cursorp->dpgno = PGNO_INVALID;
+ F_CLR(cursorp, H_ISDUP);
+ } else if (F_ISSET(cursorp, H_ISDUP) && cursorp->dpgno != PGNO_INVALID)
+ cursorp->dndx++;
+ else if (F_ISSET(cursorp, H_ISDUP)) {
+ cursorp->dndx++;
+ cursorp->dup_off += DUP_SIZE(cursorp->dup_len);
+ if (cursorp->dup_off >= cursorp->dup_tlen) {
+ F_CLR(cursorp, H_ISDUP);
+ cursorp->dpgno = PGNO_INVALID;
+ cursorp->bndx++;
+ }
+ } else
+ cursorp->bndx++;
+
+ return (__ham_item(hashp, cursorp, mode));
+}
+
+/*
+ * PUBLIC: void __ham_putitem __P((PAGE *p, const DBT *, int));
+ *
+ * This is a little bit sleazy in that we're overloading the meaning
+ * of the H_OFFPAGE type here. When we recover deletes, we have the
+ * entire entry instead of having only the DBT, so we'll pass type
+ * H_OFFPAGE to mean, "copy the whole entry" as opposed to constructing
+ * an H_KEYDATA around it.
+ */
+void
+__ham_putitem(p, dbt, type)
+ PAGE *p;
+ const DBT *dbt;
+ int type;
+{
+ u_int16_t n, off;
+
+ n = NUM_ENT(p);
+
+ /* Put the item element on the page. */
+ if (type == H_OFFPAGE) {
+ off = HOFFSET(p) - dbt->size;
+ HOFFSET(p) = p->inp[n] = off;
+ memcpy(P_ENTRY(p, n), dbt->data, dbt->size);
+ } else {
+ off = HOFFSET(p) - HKEYDATA_SIZE(dbt->size);
+ HOFFSET(p) = p->inp[n] = off;
+ PUT_HKEYDATA(GET_HKEYDATA(p, n), dbt->data, dbt->size, type);
+ }
+
+ /* Adjust page info. */
+ NUM_ENT(p) += 1;
+}
+
+
+/*
+ * PUBLIC: int __ham_del_pair __P((HTAB *, HASH_CURSOR *));
+ * XXX TODO: if the item is an offdup, delete the other pages and
+ * then remove the pair. If the offpage page is 0, then you can
+ * just remove the pair.
+ */
+int
+__ham_del_pair(hashp, cursorp)
+ HTAB *hashp;
+ HASH_CURSOR *cursorp;
+{
+ DBT data_dbt, key_dbt;
+ DB_ENV *dbenv;
+ DB_LSN new_lsn, *n_lsn;
+ PAGE *p;
+ db_indx_t ndx;
+ db_pgno_t chg_pgno, pgno;
+ int ret, tret;
+
+ dbenv = hashp->dbp->dbenv;
+ ndx = cursorp->bndx;
+ if (cursorp->pagep == NULL && (ret =
+ __ham_get_page(hashp->dbp, cursorp->pgno, &cursorp->pagep)) != 0)
+ return (ret);
+
+ p = cursorp->pagep;
+
+ /*
+ * We optimize for the normal case which is when neither the key nor
+ * the data are large. In this case, we write a single log record
+ * and do the delete. If either is large, we'll call __big_delete
+ * to remove the big item and then update the page to remove the
+ * entry referring to the big item.
+ */
+ ret = 0;
+ if (H_PAIRKEY(p, ndx)->type == H_OFFPAGE) {
+ memcpy(&pgno, (u_int8_t *)GET_HOFFPAGE(p, H_KEYINDEX(ndx)) +
+ SSZ(HOFFPAGE, pgno), sizeof(db_pgno_t));
+ ret = __db_doff(hashp->dbp, pgno, __ham_del_page);
+ }
+
+ if (ret == 0)
+ switch (H_PAIRDATA(p, ndx)->type) {
+ case H_OFFPAGE:
+ memcpy(&pgno,
+ (u_int8_t *)GET_HOFFPAGE(p, H_DATAINDEX(ndx)) +
+ SSZ(HOFFPAGE, pgno), sizeof(db_pgno_t));
+ ret = __db_doff(hashp->dbp, pgno, __ham_del_page);
+ break;
+ case H_OFFDUP:
+ memcpy(&pgno,
+ (u_int8_t *)GET_HOFFDUP(p, H_DATAINDEX(ndx)) +
+ SSZ(HOFFDUP, pgno), sizeof(db_pgno_t));
+ ret = __db_ddup(hashp->dbp, pgno, __ham_del_page);
+ break;
+ }
+
+ if (ret)
+ return (ret);
+
+ /* Now log the delete off this page. */
+ if (DB_LOGGING(hashp->dbp)) {
+ key_dbt.data = P_ENTRY(p, H_KEYINDEX(ndx));
+ key_dbt.size =
+ LEN_HITEM(p, hashp->hdr->pagesize, H_KEYINDEX(ndx));
+ data_dbt.data = P_ENTRY(p, H_DATAINDEX(ndx));
+ data_dbt.size =
+ LEN_HITEM(p, hashp->hdr->pagesize, H_DATAINDEX(ndx));
+
+ if ((ret = __ham_insdel_log(dbenv->lg_info,
+ (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, DELPAIR,
+ hashp->dbp->log_fileid, PGNO(p), (u_int32_t)ndx,
+ &LSN(p), &key_dbt, &data_dbt)) != 0)
+ return (ret);
+
+ /* Move lsn onto page. */
+ LSN(p) = new_lsn;
+ }
+
+ __ham_dpair(hashp->dbp, p, ndx);
+
+ /*
+ * If we are locking, we will not maintain this.
+ * XXXX perhaps we can retain incremental numbers and apply them
+ * later.
+ */
+ if (!F_ISSET(hashp->dbp, DB_AM_LOCKING))
+ --hashp->hdr->nelem;
+
+ /*
+ * Check if the page is empty. There are two cases. If it's
+ * empty and it's not the first chain in the bucket (i.e., the
+ * bucket page) then we can simply remove it. If it is the first
+ * chain in the bucket, then we need to copy the second page into
+ * it and remove the second page.
+ */
+ if (NUM_ENT(p) == 0 && PREV_PGNO(p) == PGNO_INVALID &&
+ NEXT_PGNO(p) != PGNO_INVALID) {
+ PAGE *n_pagep, *nn_pagep;
+ db_pgno_t tmp_pgno;
+
+ /*
+ * First page in chain is empty and we know that there
+ * are more pages in the chain.
+ * XXX Need to log this.
+ */
+ if ((ret =
+ __ham_get_page(hashp->dbp, NEXT_PGNO(p), &n_pagep)) != 0)
+ return (ret);
+
+ if (NEXT_PGNO(n_pagep) != PGNO_INVALID) {
+ if ((ret =
+ __ham_get_page(hashp->dbp, NEXT_PGNO(n_pagep),
+ &nn_pagep)) != 0) {
+ (void) __ham_put_page(hashp->dbp, n_pagep, 0);
+ return (ret);
+ }
+ PREV_PGNO(nn_pagep) = PGNO(p);
+ (void)__ham_put_page(hashp->dbp, nn_pagep, 1);
+ }
+
+ tmp_pgno = PGNO(p);
+ memcpy(p, n_pagep, hashp->hdr->pagesize);
+ PGNO(p) = tmp_pgno;
+ PREV_PGNO(p) = PGNO_INVALID;
+
+ /*
+ * Cursor is advanced to the beginning of the next page.
+ */
+ cursorp->bndx = NDX_INVALID;
+ cursorp->pgno = PGNO(p);
+ chg_pgno = PGNO(p);
+ if ((ret = __ham_dirty_page(hashp, p)) != 0 ||
+ (ret = __ham_del_page(hashp->dbp, n_pagep)) != 0)
+ return (ret);
+ } else if (NUM_ENT(p) == 0 && PREV_PGNO(p) != PGNO_INVALID) {
+ PAGE *n_pagep, *p_pagep;
+
+ if ((ret =
+ __ham_get_page(hashp->dbp, PREV_PGNO(p), &p_pagep)) != 0)
+ return (ret);
+
+ if (NEXT_PGNO(p) != PGNO_INVALID) {
+ if ((ret = __ham_get_page(hashp->dbp,
+ NEXT_PGNO(p), &n_pagep)) != 0) {
+ (void)__ham_put_page(hashp->dbp, p_pagep, 0);
+ return (ret);
+ }
+ n_lsn = &LSN(n_pagep);
+ } else {
+ n_pagep = NULL;
+ n_lsn = NULL;
+ }
+
+ NEXT_PGNO(p_pagep) = NEXT_PGNO(p);
+ if (n_pagep != NULL)
+ PREV_PGNO(n_pagep) = PGNO(p_pagep);
+
+ if (DB_LOGGING(hashp->dbp)) {
+ if ((ret = __ham_newpage_log(dbenv->lg_info,
+ (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, DELOVFL,
+ hashp->dbp->log_fileid, PREV_PGNO(p), &LSN(p_pagep),
+ PGNO(p), &LSN(p), NEXT_PGNO(p), n_lsn)) != 0)
+ return (ret);
+
+ /* Move lsn onto page. */
+ LSN(p_pagep) = new_lsn; /* Structure assignment. */
+ if (n_pagep)
+ LSN(n_pagep) = new_lsn;
+ LSN(p) = new_lsn;
+ }
+ cursorp->pgno = NEXT_PGNO(p);
+ cursorp->bndx = 0;
+ /*
+ * Since we are about to delete the cursor page and we have
+ * just moved the cursor, we need to make sure that the
+ * old page pointer isn't left hanging around in the cursor.
+ */
+ cursorp->pagep = NULL;
+ chg_pgno = PGNO(p);
+ ret = __ham_del_page(hashp->dbp, p);
+ if ((tret = __ham_put_page(hashp->dbp, p_pagep, 1)) != 0 &&
+ ret == 0)
+ ret = tret;
+ if (n_pagep != NULL &&
+ (tret = __ham_put_page(hashp->dbp, n_pagep, 1)) != 0 &&
+ ret == 0)
+ ret = tret;
+ if (ret != 0)
+ return (ret);
+ } else {
+ /*
+ * Mark item deleted so that we don't try to return it, and
+ * so that we update the cursor correctly on the next call
+ * to next.
+ */
+ F_SET(cursorp, H_DELETED);
+ chg_pgno = cursorp->pgno;
+ ret = __ham_dirty_page(hashp, p);
+ }
+ __ham_c_update(hashp, cursorp, chg_pgno, 0, 0, 0);
+
+ F_CLR(cursorp, H_OK);
+ return (ret);
+}
+/*
+ * PUBLIC: int __ham_replpair __P((HTAB *, HASH_CURSOR *, DBT *, u_int32_t));
+ * Given the key data indicated by the cursor, replace part/all of it
+ * according to the fields in the dbt.
+ */
+int
+__ham_replpair(hashp, hcp, dbt, make_dup)
+ HTAB *hashp;
+ HASH_CURSOR *hcp;
+ DBT *dbt;
+ u_int32_t make_dup;
+{
+ DBT old_dbt, tmp;
+ DB_LSN new_lsn;
+ HKEYDATA *hk;
+ u_int32_t len;
+ int32_t change;
+ int is_big, ret, type;
+ u_int8_t *beg, *dest, *end, *src;
+
+ /*
+ * Big item replacements are handled in generic code.
+ * Items that fit on the current page fall into 4 classes.
+ * 1. On-page element, same size
+ * 2. On-page element, new is bigger (fits)
+ * 3. On-page element, new is bigger (does not fit)
+ * 4. On-page element, old is bigger
+ * Numbers 1, 2, and 4 are essentially the same (and should
+ * be the common case). We handle case 3 as a delete and
+ * add.
+ */
+
+ /*
+ * We need to compute the number of bytes that we are adding or
+ * removing from the entry. Normally, we can simply substract
+ * the number of bytes we are replacing (dbt->dlen) from the
+ * number of bytes we are inserting (dbt->size). However, if
+ * we are doing a partial put off the end of a record, then this
+ * formula doesn't work, because we are essentially adding
+ * new bytes.
+ */
+ change = dbt->size - dbt->dlen;
+
+ hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
+ is_big = hk->type == H_OFFPAGE;
+
+ if (is_big)
+ memcpy(&len, (u_int8_t *)hk + SSZ(HOFFPAGE, tlen),
+ sizeof(u_int32_t));
+ else
+ len = LEN_HKEYDATA(hcp->pagep,
+ hashp->dbp->pgsize, H_DATAINDEX(hcp->bndx));
+
+ if (dbt->doff + dbt->dlen > len)
+ change += dbt->doff + dbt->dlen - len;
+
+
+ if (change > (int)P_FREESPACE(hcp->pagep) || is_big) {
+ /*
+ * Case 3 -- two subcases.
+ * A. This is not really a partial operation, but an overwrite.
+ * Simple del and add works.
+ * B. This is a partial and we need to construct the data that
+ * we are really inserting (yuck).
+ * In both cases, we need to grab the key off the page (in
+ * some cases we could do this outside of this routine; for
+ * cleanliness we do it here. If you happen to be on a big
+ * key, this could be a performance hit).
+ */
+ tmp.flags = 0;
+ F_SET(&tmp, DB_DBT_MALLOC | DB_DBT_INTERNAL);
+ if ((ret =
+ __db_ret(hashp->dbp, hcp->pagep, H_KEYINDEX(hcp->bndx),
+ &tmp, &hcp->big_key, &hcp->big_keylen)) != 0)
+ return (ret);
+
+ type = hk->type;
+ if (dbt->doff == 0 && dbt->dlen == len) {
+ ret = __ham_del_pair(hashp, hcp);
+ if (ret == 0)
+ ret = __ham_add_el(hashp, hcp, &tmp, dbt, type);
+ } else { /* Case B */
+ DBT tdata;
+ tdata.flags = 0;
+ F_SET(&tdata, DB_DBT_MALLOC | DB_DBT_INTERNAL);
+
+ if ((ret = __db_ret(hashp->dbp, hcp->pagep,
+ H_DATAINDEX(hcp->bndx), &tdata, &hcp->big_data,
+ &hcp->big_datalen)) != 0)
+ goto err;
+
+ /* Now we can delete the item. */
+ if ((ret = __ham_del_pair(hashp, hcp)) != 0) {
+ free(tdata.data);
+ goto err;
+ }
+
+ /* Now shift old data around to make room for new. */
+ if (change > 0) {
+ tdata.data = (void *)
+ realloc(tdata.data, tdata.size + change);
+ memset((u_int8_t *)tdata.data + tdata.size,
+ 0, change);
+ }
+ if (tdata.data == NULL)
+ return (ENOMEM);
+ end = (u_int8_t *)tdata.data + tdata.size;
+
+ src = (u_int8_t *)tdata.data + dbt->doff + dbt->dlen;
+ if (src < end && tdata.size > dbt->doff + dbt->dlen) {
+ len = tdata.size - dbt->doff - dbt->dlen;
+ dest = src + change;
+ memmove(dest, src, len);
+ }
+ memcpy((u_int8_t *)tdata.data + dbt->doff,
+ dbt->data, dbt->size);
+ tdata.size += change;
+
+ /* Now add the pair. */
+ ret = __ham_add_el(hashp, hcp, &tmp, &tdata, type);
+ free(tdata.data);
+ }
+err: free(tmp.data);
+ return (ret);
+ }
+
+ /*
+ * Set up pointer into existing data. Do it before the log
+ * message so we can use it inside of the log setup.
+ */
+ beg = H_PAIRDATA(hcp->pagep, hcp->bndx)->data;
+ beg += dbt->doff;
+
+ /*
+ * If we are going to have to move bytes at all, figure out
+ * all the parameters here. Then log the call before moving
+ * anything around.
+ */
+ if (DB_LOGGING(hashp->dbp)) {
+ old_dbt.data = beg;
+ old_dbt.size = dbt->dlen;
+ if ((ret = __ham_replace_log(hashp->dbp->dbenv->lg_info,
+ (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
+ hashp->dbp->log_fileid, PGNO(hcp->pagep),
+ (u_int32_t)H_DATAINDEX(hcp->bndx), &LSN(hcp->pagep),
+ (u_int32_t)dbt->doff, &old_dbt, dbt, make_dup)) != 0)
+ return (ret);
+
+ LSN(hcp->pagep) = new_lsn; /* Structure assignment. */
+ }
+
+ __ham_onpage_replace(hcp->pagep, hashp->dbp->pgsize,
+ (u_int32_t)H_DATAINDEX(hcp->bndx), (int32_t)dbt->doff, change, dbt);
+
+ return (0);
+}
+
+/*
+ * Replace data on a page with new data, possibly growing or shrinking what's
+ * there. This is called on two different occasions. On one (from replpair)
+ * we are interested in changing only the data. On the other (from recovery)
+ * we are replacing the entire data (header and all) with a new element. In
+ * the latter case, the off argument is negative.
+ * pagep: the page that we're changing
+ * ndx: page index of the element that is growing/shrinking.
+ * off: Offset at which we are beginning the replacement.
+ * change: the number of bytes (+ or -) that the element is growing/shrinking.
+ * dbt: the new data that gets written at beg.
+ * PUBLIC: void __ham_onpage_replace __P((PAGE *, size_t, u_int32_t, int32_t,
+ * PUBLIC: int32_t, DBT *));
+ */
+void
+__ham_onpage_replace(pagep, pgsize, ndx, off, change, dbt)
+ PAGE *pagep;
+ size_t pgsize;
+ u_int32_t ndx;
+ int32_t off;
+ int32_t change;
+ DBT *dbt;
+{
+ db_indx_t i;
+ int32_t len;
+ u_int8_t *src, *dest;
+ int zero_me;
+
+ if (change != 0) {
+ zero_me = 0;
+ src = (u_int8_t *)(pagep) + HOFFSET(pagep);
+ if (off < 0)
+ len = pagep->inp[ndx] - HOFFSET(pagep);
+ else if ((u_int32_t)off >= LEN_HKEYDATA(pagep, pgsize, ndx)) {
+ len = GET_HKEYDATA(pagep, ndx)->data +
+ LEN_HKEYDATA(pagep, pgsize, ndx) - src;
+ zero_me = 1;
+ } else
+ len = (GET_HKEYDATA(pagep, ndx)->data + off) - src;
+ dest = src - change;
+ memmove(dest, src, len);
+ if (zero_me)
+ memset(dest + len, 0, change);
+
+ /* Now update the indices. */
+ for (i = ndx; i < NUM_ENT(pagep); i++)
+ pagep->inp[i] -= change;
+ HOFFSET(pagep) -= change;
+ }
+ if (off >= 0)
+ memcpy(GET_HKEYDATA(pagep, ndx)->data + off,
+ dbt->data, dbt->size);
+ else
+ memcpy(P_ENTRY(pagep, ndx), dbt->data, dbt->size);
+}
+
+/*
+ * PUBLIC: int __ham_split_page __P((HTAB *, u_int32_t, u_int32_t));
+ */
+int
+__ham_split_page(hashp, obucket, nbucket)
+ HTAB *hashp;
+ u_int32_t obucket, nbucket;
+{
+ DBT key, val, page_dbt;
+ DB_ENV *dbenv;
+ DB_LSN new_lsn;
+ PAGE **pp, *old_pagep, *temp_pagep, *new_pagep;
+ db_indx_t n;
+ db_pgno_t bucket_pgno, next_pgno;
+ u_int32_t big_len, len;
+ int ret, tret;
+ void *big_buf;
+
+ dbenv = hashp->dbp->dbenv;
+ temp_pagep = old_pagep = new_pagep = NULL;
+
+ bucket_pgno = BUCKET_TO_PAGE(hashp, obucket);
+ if ((ret = __ham_get_page(hashp->dbp, bucket_pgno, &old_pagep)) != 0)
+ return (ret);
+ if ((ret = __ham_new_page(hashp, BUCKET_TO_PAGE(hashp, nbucket), P_HASH,
+ &new_pagep)) != 0)
+ goto err;
+
+ temp_pagep = hashp->split_buf;
+ memcpy(temp_pagep, old_pagep, hashp->hdr->pagesize);
+
+ if (DB_LOGGING(hashp->dbp)) {
+ page_dbt.size = hashp->hdr->pagesize;
+ page_dbt.data = old_pagep;
+ if ((ret = __ham_splitdata_log(dbenv->lg_info,
+ (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
+ hashp->dbp->log_fileid, SPLITOLD, PGNO(old_pagep),
+ &page_dbt, &LSN(old_pagep))) != 0)
+ goto err;
+ }
+
+ P_INIT(old_pagep, hashp->hdr->pagesize, PGNO(old_pagep), PGNO_INVALID,
+ PGNO_INVALID, 0, P_HASH);
+
+ if (DB_LOGGING(hashp->dbp))
+ LSN(old_pagep) = new_lsn; /* Structure assignment. */
+
+ big_len = 0;
+ big_buf = NULL;
+ val.flags = key.flags = 0;
+ while (temp_pagep != NULL) {
+ for (n = 0; n < (db_indx_t)H_NUMPAIRS(temp_pagep); n++) {
+ if ((ret =
+ __db_ret(hashp->dbp, temp_pagep, H_KEYINDEX(n),
+ &key, &big_buf, &big_len)) != 0)
+ goto err;
+
+ if (__ham_call_hash(hashp, key.data, key.size)
+ == obucket)
+ pp = &old_pagep;
+ else
+ pp = &new_pagep;
+
+ /*
+ * Figure out how many bytes we need on the new
+ * page to store the key/data pair.
+ */
+
+ len = LEN_HITEM(temp_pagep, hashp->hdr->pagesize,
+ H_DATAINDEX(n)) +
+ LEN_HITEM(temp_pagep, hashp->hdr->pagesize,
+ H_KEYINDEX(n)) +
+ 2 * sizeof(db_indx_t);
+
+ if (P_FREESPACE(*pp) < len) {
+ if (DB_LOGGING(hashp->dbp)) {
+ page_dbt.size = hashp->hdr->pagesize;
+ page_dbt.data = *pp;
+ if ((ret = __ham_splitdata_log(
+ dbenv->lg_info,
+ (DB_TXN *)hashp->dbp->txn,
+ &new_lsn, 0,
+ hashp->dbp->log_fileid, SPLITNEW,
+ PGNO(*pp), &page_dbt,
+ &LSN(*pp))) != 0)
+ goto err;
+ LSN(*pp) = new_lsn;
+ }
+ if ((ret = __ham_add_ovflpage(hashp,
+ *pp, 1, pp)) != 0)
+ goto err;
+ }
+ __ham_copy_item(hashp, temp_pagep, H_KEYINDEX(n), *pp);
+ __ham_copy_item(hashp, temp_pagep, H_DATAINDEX(n), *pp);
+ }
+ next_pgno = NEXT_PGNO(temp_pagep);
+
+ /* Clear temp_page; if it's a link overflow page, free it. */
+ if (PGNO(temp_pagep) != bucket_pgno && (ret =
+ __ham_del_page(hashp->dbp, temp_pagep)) != 0)
+ goto err;
+
+ if (next_pgno == PGNO_INVALID)
+ temp_pagep = NULL;
+ else if ((ret =
+ __ham_get_page(hashp->dbp, next_pgno, &temp_pagep)) != 0)
+ goto err;
+
+ if (temp_pagep != NULL && DB_LOGGING(hashp->dbp)) {
+ page_dbt.size = hashp->hdr->pagesize;
+ page_dbt.data = temp_pagep;
+ if ((ret = __ham_splitdata_log(dbenv->lg_info,
+ (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
+ hashp->dbp->log_fileid, SPLITOLD, PGNO(temp_pagep),
+ &page_dbt, &LSN(temp_pagep))) != 0)
+ goto err;
+ LSN(temp_pagep) = new_lsn;
+ }
+ }
+ if (big_buf != NULL)
+ free(big_buf);
+
+ /*
+ * If the original bucket spanned multiple pages, then we've got
+ * a pointer to a page that used to be on the bucket chain. It
+ * should be deleted.
+ */
+ if (temp_pagep != NULL && PGNO(temp_pagep) != bucket_pgno &&
+ (ret = __ham_del_page(hashp->dbp, temp_pagep)) != 0)
+ goto err;
+
+ /*
+ * Write new buckets out.
+ */
+ if (DB_LOGGING(hashp->dbp)) {
+ page_dbt.size = hashp->hdr->pagesize;
+ page_dbt.data = old_pagep;
+ if ((ret = __ham_splitdata_log(dbenv->lg_info,
+ (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
+ hashp->dbp->log_fileid, SPLITNEW, PGNO(old_pagep),
+ &page_dbt, &LSN(old_pagep))) != 0)
+ goto err;
+ LSN(old_pagep) = new_lsn;
+
+ page_dbt.data = new_pagep;
+ if ((ret = __ham_splitdata_log(dbenv->lg_info,
+ (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
+ hashp->dbp->log_fileid, SPLITNEW, PGNO(new_pagep),
+ &page_dbt, &LSN(new_pagep))) != 0)
+ goto err;
+ LSN(new_pagep) = new_lsn;
+ }
+ ret = __ham_put_page(hashp->dbp, old_pagep, 1);
+ if ((tret = __ham_put_page(hashp->dbp, new_pagep, 1)) != 0 &&
+ ret == 0)
+ ret = tret;
+
+err: if (0) {
+ if (old_pagep != NULL)
+ (void)__ham_put_page(hashp->dbp, old_pagep, 1);
+ if (new_pagep != NULL)
+ (void)__ham_put_page(hashp->dbp, new_pagep, 1);
+ if (temp_pagep != NULL && PGNO(temp_pagep) != bucket_pgno)
+ (void)__ham_put_page(hashp->dbp, temp_pagep, 1);
+ }
+ return (ret);
+}
+
+/*
+ * Add the given pair to the page. The page in question may already be
+ * held (i.e. it was already gotten). If it is, then the page is passed
+ * in via the pagep parameter. On return, pagep will contain the page
+ * to which we just added something. This allows us to link overflow
+ * pages and return the new page having correctly put the last page.
+ *
+ * PUBLIC: int __ham_add_el __P((HTAB *, HASH_CURSOR *, const DBT *, const DBT *,
+ * PUBLIC: int));
+ */
+int
+__ham_add_el(hashp, hcp, key, val, type)
+ HTAB *hashp;
+ HASH_CURSOR *hcp;
+ const DBT *key, *val;
+ int type;
+{
+ DBT *pkey, *pdata, key_dbt, data_dbt;
+ DB_LSN new_lsn;
+ HOFFPAGE doff, koff;
+ db_pgno_t next_pgno;
+ u_int32_t data_size, key_size, pairsize;
+ int do_expand, is_keybig, is_databig, rectype, ret;
+ int key_type, data_type;
+
+ do_expand = 0;
+
+ if (hcp->pagep == NULL && (ret = __ham_get_page(hashp->dbp,
+ hcp->seek_found_page != PGNO_INVALID ? hcp->seek_found_page :
+ hcp->pgno, &hcp->pagep)) != 0)
+ return (ret);
+
+ key_size = HKEYDATA_PSIZE(key->size);
+ data_size = HKEYDATA_PSIZE(val->size);
+ is_keybig = ISBIG(hashp, key->size);
+ is_databig = ISBIG(hashp, val->size);
+ if (is_keybig)
+ key_size = HOFFPAGE_PSIZE;
+ if (is_databig)
+ data_size = HOFFPAGE_PSIZE;
+
+ pairsize = key_size + data_size;
+
+ /* Advance to first page in chain with room for item. */
+ while (H_NUMPAIRS(hcp->pagep) && NEXT_PGNO(hcp->pagep) !=
+ PGNO_INVALID) {
+ /*
+ * This may not be the end of the chain, but the pair may fit
+ * anyway. Check if it's a bigpair that fits or a regular
+ * pair that fits.
+ */
+ if (P_FREESPACE(hcp->pagep) >= pairsize)
+ break;
+ next_pgno = NEXT_PGNO(hcp->pagep);
+ if ((ret =
+ __ham_next_cpage(hashp, hcp, next_pgno, 0, 0)) != 0)
+ return (ret);
+ }
+
+ /*
+ * Check if we need to allocate a new page.
+ */
+ if (P_FREESPACE(hcp->pagep) < pairsize) {
+ do_expand = 1;
+ if ((ret = __ham_add_ovflpage(hashp,
+ hcp->pagep, 1, &hcp->pagep)) != 0)
+ return (ret);
+ hcp->pgno = PGNO(hcp->pagep);
+ }
+
+ /*
+ * Update cursor.
+ */
+ hcp->bndx = H_NUMPAIRS(hcp->pagep);
+ F_CLR(hcp, H_DELETED);
+ if (is_keybig) {
+ if ((ret = __db_poff(hashp->dbp,
+ key, &koff.pgno, __ham_overflow_page)) != 0)
+ return (ret);
+ koff.type = H_OFFPAGE;
+ koff.tlen = key->size;
+ key_dbt.data = &koff;
+ key_dbt.size = sizeof(koff);
+ pkey = &key_dbt;
+ key_type = H_OFFPAGE;
+ } else {
+ pkey = (DBT *)key;
+ key_type = H_KEYDATA;
+ }
+
+ if (is_databig) {
+ if ((ret = __db_poff(hashp->dbp,
+ val, &doff.pgno, __ham_overflow_page)) != 0)
+ return (ret);
+ doff.type = H_OFFPAGE;
+ doff.tlen = val->size;
+ data_dbt.data = &doff;
+ data_dbt.size = sizeof(doff);
+ pdata = &data_dbt;
+ data_type = H_OFFPAGE;
+ } else {
+ pdata = (DBT *)val;
+ data_type = type;
+ }
+
+ if (DB_LOGGING(hashp->dbp)) {
+ rectype = PUTPAIR;
+ if (is_databig)
+ rectype |= PAIR_DATAMASK;
+ if (is_keybig)
+ rectype |= PAIR_KEYMASK;
+
+ if ((ret = __ham_insdel_log(hashp->dbp->dbenv->lg_info,
+ (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, rectype,
+ hashp->dbp->log_fileid, PGNO(hcp->pagep),
+ (u_int32_t)H_NUMPAIRS(hcp->pagep),
+ &LSN(hcp->pagep), pkey, pdata)) != 0)
+ return (ret);
+
+ /* Move lsn onto page. */
+ LSN(hcp->pagep) = new_lsn; /* Structure assignment. */
+ }
+
+ __ham_putitem(hcp->pagep, pkey, key_type);
+ __ham_putitem(hcp->pagep, pdata, data_type);
+
+ /*
+ * For splits, we are going to update item_info's page number
+ * field, so that we can easily return to the same page the
+ * next time we come in here. For other operations, this shouldn't
+ * matter, since odds are this is the last thing that happens before
+ * we return to the user program.
+ */
+ hcp->pgno = PGNO(hcp->pagep);
+
+ /*
+ * XXX Maybe keep incremental numbers here
+ */
+ if (!F_ISSET(hashp->dbp, DB_AM_LOCKING))
+ hashp->hdr->nelem++;
+
+ if (do_expand || (hashp->hdr->ffactor != 0 &&
+ (u_int32_t)H_NUMPAIRS(hcp->pagep) > hashp->hdr->ffactor))
+ F_SET(hcp, H_EXPAND);
+ return (0);
+}
+
+
+/*
+ * Special __putitem call used in splitting -- copies one entry to
+ * another. Works for all types of hash entries (H_OFFPAGE, H_KEYDATA,
+ * H_DUPLICATE, H_OFFDUP). Since we log splits at a high level, we
+ * do not need to do any logging here.
+ * PUBLIC: void __ham_copy_item __P((HTAB *, PAGE *, int, PAGE *));
+ */
+void
+__ham_copy_item(hashp, src_page, src_ndx, dest_page)
+ HTAB *hashp;
+ PAGE *src_page;
+ int src_ndx;
+ PAGE *dest_page;
+{
+ u_int32_t len;
+ void *src, *dest;
+
+ /*
+ * Copy the key and data entries onto this new page.
+ */
+ src = P_ENTRY(src_page, src_ndx);
+
+ /* Set up space on dest. */
+ len = LEN_HITEM(src_page, hashp->hdr->pagesize, src_ndx);
+ HOFFSET(dest_page) -= len;
+ dest_page->inp[NUM_ENT(dest_page)] = HOFFSET(dest_page);
+ dest = P_ENTRY(dest_page, NUM_ENT(dest_page));
+ NUM_ENT(dest_page)++;
+
+ memcpy(dest, src, len);
+}
+
+/*
+ *
+ * Returns:
+ * pointer on success
+ * NULL on error
+ *
+ * PUBLIC: int __ham_add_ovflpage __P((HTAB *, PAGE *, int, PAGE **));
+ */
+int
+__ham_add_ovflpage(hashp, pagep, release, pp)
+ HTAB *hashp;
+ PAGE *pagep;
+ int release;
+ PAGE **pp;
+{
+ DB_ENV *dbenv;
+ DB_LSN new_lsn;
+ PAGE *new_pagep;
+ int ret;
+
+ dbenv = hashp->dbp->dbenv;
+
+ if ((ret = __ham_overflow_page(hashp->dbp, P_HASH, &new_pagep)) != 0)
+ return (ret);
+
+ if (DB_LOGGING(hashp->dbp)) {
+ if ((ret = __ham_newpage_log(dbenv->lg_info,
+ (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, PUTOVFL,
+ hashp->dbp->log_fileid, PGNO(pagep), &LSN(pagep),
+ PGNO(new_pagep), &LSN(new_pagep), PGNO_INVALID, NULL)) != 0)
+ return (ret);
+
+ /* Move lsn onto page. */
+ LSN(pagep) = LSN(new_pagep) = new_lsn;
+ }
+ NEXT_PGNO(pagep) = PGNO(new_pagep);
+ PREV_PGNO(new_pagep) = PGNO(pagep);
+
+ if (release)
+ ret = __ham_put_page(hashp->dbp, pagep, 1);
+
+ hashp->hash_overflows++;
+ *pp = new_pagep;
+ return (ret);
+}
+
+
+/*
+ * PUBLIC: int __ham_new_page __P((HTAB *, u_int32_t, u_int32_t, PAGE **));
+ */
+int
+__ham_new_page(hashp, addr, type, pp)
+ HTAB *hashp;
+ u_int32_t addr, type;
+ PAGE **pp;
+{
+ PAGE *pagep;
+ int ret;
+
+ if ((ret = memp_fget(hashp->dbp->mpf,
+ &addr, DB_MPOOL_CREATE, &pagep)) != 0)
+ return (ret);
+
+#ifdef DEBUG_SLOW
+ account_page(hashp, addr, 1);
+#endif
+ /* This should not be necessary because page-in should do it. */
+ P_INIT(pagep,
+ hashp->hdr->pagesize, addr, PGNO_INVALID, PGNO_INVALID, 0, type);
+
+ *pp = pagep;
+ return (0);
+}
+
+/*
+ * PUBLIC: int __ham_del_page __P((DB *, PAGE *));
+ */
+int
+__ham_del_page(dbp, pagep)
+ DB *dbp;
+ PAGE *pagep;
+{
+ DB_LSN new_lsn;
+ HTAB *hashp;
+ int ret;
+
+ hashp = (HTAB *)dbp->internal;
+ ret = 0;
+ DIRTY_META(hashp, ret);
+ if (ret != 0) {
+ if (ret != EAGAIN)
+ __db_err(hashp->dbp->dbenv,
+ "free_ovflpage: unable to lock meta data page %s\n",
+ strerror(ret));
+ /*
+ * If we are going to return an error, then we should free
+ * the page, so it doesn't stay pinned forever.
+ */
+ (void)__ham_put_page(hashp->dbp, pagep, 0);
+ return (ret);
+ }
+
+ if (DB_LOGGING(hashp->dbp)) {
+ if ((ret = __ham_newpgno_log(hashp->dbp->dbenv->lg_info,
+ (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, DELPGNO,
+ hashp->dbp->log_fileid, PGNO(pagep), hashp->hdr->last_freed,
+ (u_int32_t)TYPE(pagep), NEXT_PGNO(pagep), P_INVALID,
+ &LSN(pagep), &hashp->hdr->lsn)) != 0)
+ return (ret);
+
+ hashp->hdr->lsn = new_lsn;
+ LSN(pagep) = new_lsn;
+ }
+
+#ifdef DEBUG
+ {
+ db_pgno_t __pgno;
+ DB_LSN __lsn;
+ __pgno = pagep->pgno;
+ __lsn = pagep->lsn;
+ memset(pagep, 0xff, dbp->pgsize);
+ pagep->pgno = __pgno;
+ pagep->lsn = __lsn;
+ }
+#endif
+ TYPE(pagep) = P_INVALID;
+ NEXT_PGNO(pagep) = hashp->hdr->last_freed;
+ hashp->hdr->last_freed = PGNO(pagep);
+
+ return (__ham_put_page(hashp->dbp, pagep, 1));
+}
+
+
+/*
+ * PUBLIC: int __ham_put_page __P((DB *, PAGE *, int32_t));
+ */
+int
+__ham_put_page(dbp, pagep, is_dirty)
+ DB *dbp;
+ PAGE *pagep;
+ int32_t is_dirty;
+{
+#ifdef DEBUG_SLOW
+ account_page((HTAB *)dbp->cookie,
+ ((BKT *)((char *)pagep - sizeof(BKT)))->pgno, -1);
+#endif
+ return (memp_fput(dbp->mpf, pagep, (is_dirty ? DB_MPOOL_DIRTY : 0)));
+}
+
+/*
+ * __ham_dirty_page --
+ * Mark a page dirty.
+ *
+ * PUBLIC: int __ham_dirty_page __P((HTAB *, PAGE *));
+ */
+int
+__ham_dirty_page(hashp, pagep)
+ HTAB *hashp;
+ PAGE *pagep;
+{
+ return (memp_fset(hashp->dbp->mpf, pagep, DB_MPOOL_DIRTY));
+}
+
+/*
+ * PUBLIC: int __ham_get_page __P((DB *, db_pgno_t, PAGE **));
+ */
+int
+__ham_get_page(dbp, addr, pagep)
+ DB *dbp;
+ db_pgno_t addr;
+ PAGE **pagep;
+{
+ int ret;
+
+ ret = memp_fget(dbp->mpf, &addr, DB_MPOOL_CREATE, pagep);
+#ifdef DEBUG_SLOW
+ if (*pagep != NULL)
+ account_page((HTAB *)dbp->internal, addr, 1);
+#endif
+ return (ret);
+}
+
+/*
+ * PUBLIC: int __ham_overflow_page __P((DB *, u_int32_t, PAGE **));
+ */
+int
+__ham_overflow_page(dbp, type, pp)
+ DB *dbp;
+ u_int32_t type;
+ PAGE **pp;
+{
+ DB_LSN *lsnp, new_lsn;
+ HTAB *hashp;
+ PAGE *p;
+ db_pgno_t new_addr, next_free, newalloc_flag;
+ u_int32_t offset, splitnum;
+ int ret;
+
+ hashp = (HTAB *)dbp->internal;
+
+ ret = 0;
+ DIRTY_META(hashp, ret);
+ if (ret != 0)
+ return (ret);
+
+ /*
+ * This routine is split up into two parts. First we have
+ * to figure out the address of the new page that we are
+ * allocating. Then we have to log the allocation. Only
+ * after the log do we get to complete allocation of the
+ * new page.
+ */
+ new_addr = hashp->hdr->last_freed;
+ if (new_addr != PGNO_INVALID) {
+ if ((ret = __ham_get_page(hashp->dbp, new_addr, &p)) != 0)
+ return (ret);
+ next_free = NEXT_PGNO(p);
+ lsnp = &LSN(p);
+ newalloc_flag = 0;
+ } else {
+ splitnum = hashp->hdr->ovfl_point;
+ hashp->hdr->spares[splitnum]++;
+ offset = hashp->hdr->spares[splitnum] -
+ (splitnum ? hashp->hdr->spares[splitnum - 1] : 0);
+ new_addr = PGNO_OF(hashp, hashp->hdr->ovfl_point, offset);
+ if (new_addr > MAX_PAGES(hashp)) {
+ __db_err(hashp->dbp->dbenv, "hash: out of file pages");
+ hashp->hdr->spares[splitnum]--;
+ return (ENOMEM);
+ }
+ next_free = PGNO_INVALID;
+ p = NULL;
+ lsnp = NULL;
+ newalloc_flag = 1;
+ }
+
+ if (DB_LOGGING(hashp->dbp)) {
+ if ((ret = __ham_newpgno_log(hashp->dbp->dbenv->lg_info,
+ (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, ALLOCPGNO,
+ hashp->dbp->log_fileid, new_addr, next_free,
+ 0, newalloc_flag, type, lsnp, &hashp->hdr->lsn)) != 0)
+ return (ret);
+
+ hashp->hdr->lsn = new_lsn;
+ if (lsnp != NULL)
+ *lsnp = new_lsn;
+ }
+
+ if (p != NULL) {
+ /* We just took something off the free list, initialize it. */
+ hashp->hdr->last_freed = next_free;
+ P_INIT(p, hashp->hdr->pagesize, PGNO(p), PGNO_INVALID,
+ PGNO_INVALID, 0, (u_int8_t)type);
+ } else {
+ /* Get the new page. */
+ if ((ret = __ham_new_page(hashp, new_addr, type, &p)) != 0)
+ return (ret);
+ }
+ if (DB_LOGGING(hashp->dbp))
+ LSN(p) = new_lsn;
+
+ *pp = p;
+ return (0);
+}
+
+#ifdef DEBUG
+/*
+ * PUBLIC: #ifdef DEBUG
+ * PUBLIC: int bucket_to_page __P((HTAB *, int));
+ * PUBLIC: #endif
+ */
+int
+bucket_to_page(hashp, n)
+ HTAB *hashp;
+ int n;
+{
+ int ret_val;
+
+ ret_val = n + 1;
+ if (n != 0)
+ ret_val += hashp->hdr->spares[__db_log2(n + 1) - 1];
+ return (ret_val);
+}
+#endif
+
+
+/*
+ * Create a bunch of overflow pages at the current split point.
+ * PUBLIC: void __ham_init_ovflpages __P((HTAB *));
+ */
+void
+__ham_init_ovflpages(hp)
+ HTAB *hp;
+{
+ DB_LSN new_lsn;
+ PAGE *p;
+ db_pgno_t last_pgno;
+ u_int32_t i, numpages;
+
+ numpages = hp->hdr->ovfl_point + 1;
+
+ last_pgno = hp->hdr->last_freed;
+ if (DB_LOGGING(hp->dbp)) {
+ (void)__ham_ovfl_log(hp->dbp->dbenv->lg_info,
+ (DB_TXN *)hp->dbp->txn, &new_lsn, 0,
+ hp->dbp->log_fileid, PGNO_OF(hp, hp->hdr->ovfl_point, 1),
+ numpages, last_pgno, &hp->hdr->lsn);
+ hp->hdr->lsn = new_lsn;
+ } else
+ ZERO_LSN(new_lsn);
+
+ hp->hdr->spares[hp->hdr->ovfl_point] += numpages;
+ for (i = numpages; i > 0; i--) {
+ if (__ham_new_page(hp,
+ PGNO_OF(hp, hp->hdr->ovfl_point, i), P_INVALID, &p) != 0)
+ break;
+ LSN(p) = new_lsn;
+ NEXT_PGNO(p) = last_pgno;
+ last_pgno = PGNO(p);
+ (void)__ham_put_page(hp->dbp, p, 1);
+ }
+ hp->hdr->last_freed = last_pgno;
+}
+
+/*
+ * PUBLIC: int __ham_get_cpage __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
+ */
+int
+__ham_get_cpage(hashp, hcp, mode)
+ HTAB *hashp;
+ HASH_CURSOR *hcp;
+ db_lockmode_t mode;
+{
+ int ret;
+
+ if (hcp->lock == 0 && F_ISSET(hashp->dbp, DB_AM_LOCKING) &&
+ (ret = __ham_lock_bucket(hashp->dbp, hcp, mode)) != 0)
+ return (ret);
+
+ if (hcp->pagep == NULL) {
+ if (hcp->pgno == PGNO_INVALID) {
+ hcp->pgno = BUCKET_TO_PAGE(hashp, hcp->bucket);
+ hcp->bndx = 0;
+ }
+
+ if ((ret =
+ __ham_get_page(hashp->dbp, hcp->pgno, &hcp->pagep)) != 0)
+ return (ret);
+ }
+
+ if (hcp->dpgno != PGNO_INVALID && hcp->dpagep == NULL)
+ if ((ret =
+ __ham_get_page(hashp->dbp, hcp->dpgno, &hcp->dpagep)) != 0)
+ return (ret);
+ return (0);
+}
+
+/*
+ * Get a new page at the cursor, putting the last page if necessary.
+ * If the flag is set to H_ISDUP, then we are talking about the
+ * duplicate page, not the main page.
+ * PUBLIC: int __ham_next_cpage __P((HTAB *, HASH_CURSOR *, db_pgno_t,
+ * PUBLIC: int, int));
+ */
+int
+__ham_next_cpage(hashp, hcp, pgno, dirty, flags)
+ HTAB *hashp;
+ HASH_CURSOR *hcp;
+ db_pgno_t pgno;
+ int dirty;
+ int flags;
+{
+ PAGE *p;
+ int ret;
+
+ if (flags & H_ISDUP && hcp->dpagep != NULL &&
+ (ret = __ham_put_page(hashp->dbp, hcp->dpagep, dirty)) != 0)
+ return (ret);
+ else if (!(flags & H_ISDUP) && hcp->pagep != NULL &&
+ (ret = __ham_put_page(hashp->dbp, hcp->pagep, dirty)) != 0)
+ return (ret);
+
+ if ((ret = __ham_get_page(hashp->dbp, pgno, &p)) != 0)
+ return (ret);
+
+ if (flags & H_ISDUP) {
+ hcp->dpagep = p;
+ hcp->dpgno = pgno;
+ hcp->dndx = 0;
+ } else {
+ hcp->pagep = p;
+ hcp->pgno = pgno;
+ hcp->bndx = 0;
+ }
+
+ return (0);
+}
+
+/*
+ * __ham_lock_bucket --
+ * Get the lock on a particular bucket.
+ */
+static int
+__ham_lock_bucket(dbp, hcp, mode)
+ DB *dbp;
+ HASH_CURSOR *hcp;
+ db_lockmode_t mode;
+{
+ int ret;
+
+ /*
+ * What a way to trounce on the memory system. It might be
+ * worth copying the lk_info into the hashp.
+ */
+ ret = 0;
+ dbp->lock.pgno = (db_pgno_t)(hcp->bucket);
+ ret = lock_get(dbp->dbenv->lk_info,
+ dbp->txn == NULL ? dbp->locker : dbp->txn->txnid, 0,
+ &dbp->lock_dbt, mode, &hcp->lock);
+
+ return (ret < 0 ? EAGAIN : ret);
+}
+
+/*
+ * __ham_dpair --
+ * Delete a pair on a page, paying no attention to what the pair
+ * represents. The caller is responsible for freeing up duplicates
+ * or offpage entries that might be referenced by this pair.
+ *
+ * PUBLIC: void __ham_dpair __P((DB *, PAGE *, u_int32_t));
+ */
+void
+__ham_dpair(dbp, p, pndx)
+ DB *dbp;
+ PAGE *p;
+ u_int32_t pndx;
+{
+ db_indx_t delta, n;
+ u_int8_t *dest, *src;
+
+ /*
+ * Compute "delta", the amount we have to shift all of the
+ * offsets. To find the delta, we just need to calculate
+ * the size of the pair of elements we are removing.
+ */
+ delta = H_PAIRSIZE(p, dbp->pgsize, pndx);
+
+ /*
+ * The hard case: we want to remove something other than
+ * the last item on the page. We need to shift data and
+ * offsets down.
+ */
+ if ((db_indx_t)pndx != H_NUMPAIRS(p) - 1) {
+ /*
+ * Move the data: src is the first occupied byte on
+ * the page. (Length is delta.)
+ */
+ src = (u_int8_t *)p + HOFFSET(p);
+
+ /*
+ * Destination is delta bytes beyond src. This might
+ * be an overlapping copy, so we have to use memmove.
+ */
+ dest = src + delta;
+ memmove(dest, src, p->inp[H_DATAINDEX(pndx)] - HOFFSET(p));
+ }
+
+ /* Adjust the offsets. */
+ for (n = (db_indx_t)pndx; n < (db_indx_t)(H_NUMPAIRS(p) - 1); n++) {
+ p->inp[H_KEYINDEX(n)] = p->inp[H_KEYINDEX(n+1)] + delta;
+ p->inp[H_DATAINDEX(n)] = p->inp[H_DATAINDEX(n+1)] + delta;
+ }
+
+ /* Adjust page metadata. */
+ HOFFSET(p) = HOFFSET(p) + delta;
+ NUM_ENT(p) = NUM_ENT(p) - 2;
+}
+
+#ifdef DEBUG_SLOW
+static void
+account_page(hashp, pgno, inout)
+ HTAB *hashp;
+ db_pgno_t pgno;
+ int inout;
+{
+ static struct {
+ db_pgno_t pgno;
+ int times;
+ } list[100];
+ static int last;
+ int i, j;
+
+ if (inout == -1) /* XXX: Kluge */
+ inout = 0;
+
+ /* Find page in list. */
+ for (i = 0; i < last; i++)
+ if (list[i].pgno == pgno)
+ break;
+ /* Not found. */
+ if (i == last) {
+ list[last].times = inout;
+ list[last].pgno = pgno;
+ last++;
+ }
+ list[i].times = inout;
+ if (list[i].times == 0) {
+ for (j = i; j < last; j++)
+ list[j] = list[j + 1];
+ last--;
+ }
+ for (i = 0; i < last; i++, list[i].times++)
+ if (list[i].times > 20 && !is_bitmap_pgno(hashp, list[i].pgno))
+ (void)fprintf(stderr,
+ "Warning: pg %lu has been out for %d times\n",
+ (u_long)list[i].pgno, list[i].times);
+}
+#endif /* DEBUG_SLOW */
diff --git a/db2/hash/hash_rec.c b/db2/hash/hash_rec.c
new file mode 100644
index 0000000000..81d9bb5ea8
--- /dev/null
+++ b/db2/hash/hash_rec.c
@@ -0,0 +1,810 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ * Sleepycat Software. All rights reserved.
+ */
+/*
+ * Copyright (c) 1995, 1996
+ * Margo Seltzer. All rights reserved.
+ */
+/*
+ * Copyright (c) 1995, 1996
+ * The President and Fellows of Harvard University. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)hash_rec.c 10.12 (Sleepycat) 8/22/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_page.h"
+#include "hash.h"
+#include "btree.h"
+#include "log.h"
+#include "db_dispatch.h"
+#include "common_ext.h"
+
+/*
+ * __ham_insdel_recover --
+ *
+ * PUBLIC: int __ham_insdel_recover
+ * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__ham_insdel_recover(logp, dbtp, lsnp, redo, info)
+ DB_LOG *logp;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ int redo;
+ void *info;
+{
+ __ham_insdel_args *argp;
+ DB *mdbp, *file_dbp;
+ DB_MPOOLFILE *mpf;
+ HTAB *hashp;
+ PAGE *pagep;
+ u_int32_t op;
+ int cmp_n, cmp_p, getmeta, ret;
+
+ getmeta = 0;
+ hashp = NULL; /* XXX: shut the compiler up. */
+ REC_PRINT(__ham_insdel_print);
+ REC_INTRO(__ham_insdel_read);
+
+ ret = memp_fget(mpf, &argp->pgno, 0, &pagep);
+ if (ret != 0)
+ if (!redo) {
+ /*
+ * We are undoing and the page doesn't exist. That
+ * is equivalent to having a pagelsn of 0, so we
+ * would not have to undo anything. In this case,
+ * don't bother creating a page.
+ */
+ *lsnp = argp->prev_lsn;
+ ret = 0;
+ goto out;
+ } else if ((ret = memp_fget(mpf, &argp->pgno,
+ DB_MPOOL_CREATE, &pagep)) != 0)
+ goto out;
+
+
+ hashp = (HTAB *)file_dbp->internal;
+ GET_META(file_dbp, hashp);
+ getmeta = 1;
+
+ cmp_n = log_compare(lsnp, &LSN(pagep));
+ cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
+ /*
+ * Two possible things going on:
+ * redo a delete/undo a put: delete the item from the page.
+ * redo a put/undo a delete: add the item to the page.
+ * If we are undoing a delete, then the information logged is the
+ * entire entry off the page, not just the data of a dbt. In
+ * this case, we want to copy it back onto the page verbatim.
+ * We do this by calling __putitem with the type H_OFFPAGE instead
+ * of H_KEYDATA.
+ */
+ op = OPCODE_OF(argp->opcode);
+
+ if ((op == DELPAIR && cmp_n == 0 && !redo) ||
+ (op == PUTPAIR && cmp_p == 0 && redo)) {
+ /* Need to redo a PUT or undo a delete. */
+ __ham_putitem(pagep, &argp->key,
+ !redo || PAIR_ISKEYBIG(argp->opcode) ?
+ H_OFFPAGE : H_KEYDATA);
+ __ham_putitem(pagep, &argp->data,
+ !redo || PAIR_ISDATABIG(argp->opcode) ?
+ H_OFFPAGE : H_KEYDATA);
+
+ LSN(pagep) = redo ? *lsnp : argp->pagelsn;
+ if ((ret = __ham_put_page(file_dbp, pagep, 1)) != 0)
+ goto out;
+
+ } else if ((op == DELPAIR && cmp_p == 0 && redo)
+ || (op == PUTPAIR && cmp_n == 0 && !redo)) {
+ /* Need to undo a put or redo a delete. */
+ __ham_dpair(file_dbp, pagep, argp->ndx);
+ LSN(pagep) = redo ? *lsnp : argp->pagelsn;
+ if ((ret = __ham_put_page(file_dbp, (PAGE *)pagep, 1)) != 0)
+ goto out;
+ } else
+ if ((ret = __ham_put_page(file_dbp, (PAGE *)pagep, 0)) != 0)
+ goto out;
+
+ /* Return the previous LSN. */
+ *lsnp = argp->prev_lsn;
+
+out: if (getmeta)
+ RELEASE_META(file_dbp, hashp);
+ REC_CLOSE;
+}
+
+/*
+ * __ham_newpage_recover --
+ * This log message is used when we add/remove overflow pages. This
+ * message takes care of the pointer chains, not the data on the pages.
+ *
+ * PUBLIC: int __ham_newpage_recover
+ * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__ham_newpage_recover(logp, dbtp, lsnp, redo, info)
+ DB_LOG *logp;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ int redo;
+ void *info;
+{
+ __ham_newpage_args *argp;
+ DB *mdbp, *file_dbp;
+ DB_MPOOLFILE *mpf;
+ HTAB *hashp;
+ PAGE *pagep;
+ int cmp_n, cmp_p, change, getmeta, ret;
+
+ getmeta = 0;
+ hashp = NULL; /* XXX: shut the compiler up. */
+ REC_PRINT(__ham_newpage_print);
+ REC_INTRO(__ham_newpage_read);
+
+ ret = memp_fget(mpf, &argp->new_pgno, 0, &pagep);
+ if (ret != 0)
+ if (!redo) {
+ /*
+ * We are undoing and the page doesn't exist. That
+ * is equivalent to having a pagelsn of 0, so we
+ * would not have to undo anything. In this case,
+ * don't bother creating a page.
+ */
+ ret = 0;
+ goto ppage;
+ } else if ((ret = memp_fget(mpf, &argp->new_pgno,
+ DB_MPOOL_CREATE, &pagep)) != 0)
+ goto out;
+
+ hashp = (HTAB *)file_dbp->internal;
+ GET_META(file_dbp, hashp);
+ getmeta = 1;
+
+ /*
+ * There are potentially three pages we need to check: the one
+ * that we created/deleted, the one before it and the one after
+ * it.
+ */
+
+ cmp_n = log_compare(lsnp, &LSN(pagep));
+ cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
+ change = 0;
+
+ if ((cmp_p == 0 && redo && argp->opcode == PUTOVFL) ||
+ (cmp_n == 0 && !redo && argp->opcode == DELOVFL)) {
+ /* Redo a create new page or undo a delete new page. */
+ P_INIT(pagep, file_dbp->pgsize, argp->new_pgno,
+ argp->prev_pgno, argp->next_pgno, 0, P_HASH);
+ change = 1;
+ } else if ((cmp_p == 0 && redo && argp->opcode == DELOVFL) ||
+ (cmp_n == 0 && !redo && argp->opcode == PUTOVFL)) {
+ /*
+ * Redo a delete or undo a create new page. All we
+ * really need to do is change the LSN.
+ */
+ change = 1;
+ }
+
+ if (!change) {
+ if ((ret = __ham_put_page(file_dbp, (PAGE *)pagep, 0)) != 0)
+ goto out;
+ } else {
+ LSN(pagep) = redo ? *lsnp : argp->pagelsn;
+ if ((ret = __ham_put_page(file_dbp, (PAGE *)pagep, 1)) != 0)
+ goto out;
+ }
+
+ /* Now do the prev page. */
+ppage: if (argp->prev_pgno != PGNO_INVALID) {
+ ret = memp_fget(mpf, &argp->prev_pgno, 0, &pagep);
+
+ if (ret != 0)
+ if (!redo) {
+ /*
+ * We are undoing and the page doesn't exist.
+ * That is equivalent to having a pagelsn of 0,
+ * so we would not have to undo anything. In
+ * this case, don't bother creating a page.
+ */
+ ret = 0;
+ goto npage;
+ } else if ((ret =
+ memp_fget(mpf, &argp->prev_pgno,
+ DB_MPOOL_CREATE, &pagep)) != 0)
+ goto out;
+
+ cmp_n = log_compare(lsnp, &LSN(pagep));
+ cmp_p = log_compare(&LSN(pagep), &argp->prevlsn);
+ change = 0;
+
+ if ((cmp_p == 0 && redo && argp->opcode == PUTOVFL) ||
+ (cmp_n == 0 && !redo && argp->opcode == DELOVFL)) {
+ /* Redo a create new page or undo a delete new page. */
+ pagep->next_pgno = argp->new_pgno;
+ change = 1;
+ } else if ((cmp_p == 0 && redo && argp->opcode == DELOVFL) ||
+ (cmp_n == 0 && !redo && argp->opcode == PUTOVFL)) {
+ /* Redo a delete or undo a create new page. */
+ pagep->next_pgno = argp->next_pgno;
+ change = 1;
+ }
+
+ if (!change) {
+ if ((ret = __ham_put_page(file_dbp, (PAGE *)pagep, 0)) != 0)
+ goto out;
+ } else {
+ LSN(pagep) = redo ? *lsnp : argp->prevlsn;
+ if ((ret = __ham_put_page(file_dbp, (PAGE *)pagep, 1)) != 0)
+ goto out;
+ }
+ }
+
+ /* Now time to do the next page */
+npage: if (argp->next_pgno != PGNO_INVALID) {
+ ret = memp_fget(mpf, &argp->next_pgno, 0, &pagep);
+
+ if (ret != 0)
+ if (!redo) {
+ /*
+ * We are undoing and the page doesn't exist.
+ * That is equivalent to having a pagelsn of 0,
+ * so we would not have to undo anything. In
+ * this case, don't bother creating a page.
+ */
+ *lsnp = argp->prev_lsn;
+ ret = 0;
+ goto out;
+ } else if ((ret =
+ memp_fget(mpf, &argp->next_pgno,
+ DB_MPOOL_CREATE, &pagep)) != 0)
+ goto out;
+
+ cmp_n = log_compare(lsnp, &LSN(pagep));
+ cmp_p = log_compare(&LSN(pagep), &argp->nextlsn);
+ change = 0;
+
+ if ((cmp_p == 0 && redo && argp->opcode == PUTOVFL) ||
+ (cmp_n == 0 && !redo && argp->opcode == DELOVFL)) {
+ /* Redo a create new page or undo a delete new page. */
+ pagep->prev_pgno = argp->new_pgno;
+ change = 1;
+ } else if ((cmp_p == 0 && redo && argp->opcode == DELOVFL) ||
+ (cmp_n == 0 && !redo && argp->opcode == PUTOVFL)) {
+ /* Redo a delete or undo a create new page. */
+ pagep->prev_pgno = argp->prev_pgno;
+ change = 1;
+ }
+
+ if (!change) {
+ if ((ret =
+ __ham_put_page(file_dbp, (PAGE *)pagep, 0)) != 0)
+ goto out;
+ } else {
+ LSN(pagep) = redo ? *lsnp : argp->nextlsn;
+ if ((ret =
+ __ham_put_page(file_dbp, (PAGE *)pagep, 1)) != 0)
+ goto out;
+ }
+ }
+ *lsnp = argp->prev_lsn;
+
+out: if (getmeta)
+ RELEASE_META(file_dbp, hashp);
+ REC_CLOSE;
+}
+
+
+/*
+ * __ham_replace_recover --
+ * This log message refers to partial puts that are local to a single
+ * page. You can think of them as special cases of the more general
+ * insdel log message.
+ *
+ * PUBLIC: int __ham_replace_recover
+ * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__ham_replace_recover(logp, dbtp, lsnp, redo, info)
+ DB_LOG *logp;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ int redo;
+ void *info;
+{
+ __ham_replace_args *argp;
+ DB *mdbp, *file_dbp;
+ DB_MPOOLFILE *mpf;
+ DBT dbt;
+ HKEYDATA *hk;
+ HTAB *hashp;
+ PAGE *pagep;
+ int32_t grow;
+ int change, cmp_n, cmp_p, getmeta, ret;
+
+ getmeta = 0;
+ hashp = NULL; /* XXX: shut the compiler up. */
+ REC_PRINT(__ham_replace_print);
+ REC_INTRO(__ham_replace_read);
+
+ ret = memp_fget(mpf, &argp->pgno, 0, &pagep);
+ if (ret != 0)
+ if (!redo) {
+ /*
+ * We are undoing and the page doesn't exist. That
+ * is equivalent to having a pagelsn of 0, so we
+ * would not have to undo anything. In this case,
+ * don't bother creating a page.
+ */
+ *lsnp = argp->prev_lsn;
+ ret = 0;
+ goto out;
+ } else if ((ret = memp_fget(mpf, &argp->pgno,
+ DB_MPOOL_CREATE, &pagep)) != 0)
+ goto out;
+
+ hashp = (HTAB *)file_dbp->internal;
+ GET_META(file_dbp, hashp);
+ getmeta = 1;
+
+ cmp_n = log_compare(lsnp, &LSN(pagep));
+ cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
+
+ if (cmp_p == 0 && redo) {
+ change = 1;
+ /* Reapply the change as specified. */
+ dbt.data = argp->newitem.data;
+ dbt.size = argp->newitem.size;
+ grow = argp->newitem.size - argp->olditem.size;
+ LSN(pagep) = *lsnp;
+ } else if (cmp_n == 0 && !redo) {
+ change = 1;
+ /* Undo the already applied change. */
+ dbt.data = argp->olditem.data;
+ dbt.size = argp->olditem.size;
+ grow = argp->olditem.size - argp->newitem.size;
+ LSN(pagep) = argp->pagelsn;
+ } else {
+ change = 0;
+ grow = 0;
+ }
+
+ if (change) {
+ __ham_onpage_replace(pagep,
+ file_dbp->pgsize, argp->ndx, argp->off, grow, &dbt);
+ if (argp->makedup) {
+ hk = GET_HKEYDATA(pagep, argp->ndx);
+ if (redo)
+ hk->type = H_DUPLICATE;
+ else
+ hk->type = H_KEYDATA;
+ }
+ }
+
+ if ((ret = __ham_put_page(file_dbp, pagep, change)) != 0)
+ goto out;
+
+ *lsnp = argp->prev_lsn;
+
+out: if (getmeta)
+ RELEASE_META(file_dbp, hashp);
+ REC_CLOSE;
+}
+
+/*
+ * __ham_newpgno_recover --
+ * This log message is used when allocating or deleting an overflow
+ * page. It takes care of modifying the meta data.
+ *
+ * PUBLIC: int __ham_newpgno_recover
+ * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__ham_newpgno_recover(logp, dbtp, lsnp, redo, info)
+ DB_LOG *logp;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ int redo;
+ void *info;
+{
+ __ham_newpgno_args *argp;
+ DB *mdbp, *file_dbp;
+ DB_MPOOLFILE *mpf;
+ HTAB *hashp;
+ PAGE *pagep;
+ int change, cmp_n, cmp_p, getmeta, ret;
+
+ getmeta = 0;
+ hashp = NULL; /* XXX: shut the compiler up. */
+ REC_PRINT(__ham_newpgno_print);
+ REC_INTRO(__ham_newpgno_read);
+
+ hashp = (HTAB *)file_dbp->internal;
+ GET_META(file_dbp, hashp);
+ getmeta = 1;
+
+ /*
+ * There are two phases to the recovery here. First we need
+ * to update the meta data; then we need to update the page.
+ * We'll do the meta-data first.
+ */
+ cmp_n = log_compare(lsnp, &hashp->hdr->lsn);
+ cmp_p = log_compare(&hashp->hdr->lsn, &argp->metalsn);
+
+ change = 0;
+ if ((cmp_p == 0 && redo && argp->opcode == ALLOCPGNO) ||
+ (cmp_n == 0 && !redo && argp->opcode == DELPGNO)) {
+ /* Need to redo an allocation or undo a deletion. */
+ hashp->hdr->last_freed = argp->free_pgno;
+ if (redo && argp->old_pgno != 0) /* Must be ALLOCPGNO */
+ hashp->hdr->spares[hashp->hdr->ovfl_point]++;
+ change = 1;
+ } else if (cmp_p == 0 && redo && argp->opcode == DELPGNO) {
+ /* Need to redo a deletion */
+ hashp->hdr->last_freed = argp->pgno;
+ change = 1;
+ } else if (cmp_n == 0 && !redo && argp->opcode == ALLOCPGNO) {
+ /* undo an allocation. */
+ if (argp->old_pgno == 0)
+ hashp->hdr->last_freed = argp->pgno;
+ else {
+ hashp->hdr->spares[hashp->hdr->ovfl_point]--;
+ hashp->hdr->last_freed = 0;
+ }
+ change = 1;
+ }
+ if (change) {
+ hashp->hdr->lsn = redo ? *lsnp : argp->metalsn;
+ F_SET(file_dbp, DB_HS_DIRTYMETA);
+ }
+
+
+ /* Now check the newly allocated/freed page. */
+ ret = memp_fget(mpf, &argp->pgno, 0, &pagep);
+
+ if (ret != 0)
+ if (!redo) {
+ /*
+ * We are undoing and the page doesn't exist. That
+ * is equivalent to having a pagelsn of 0, so we
+ * would not have to undo anything. In this case,
+ * don't bother creating a page.
+ */
+ *lsnp = argp->prev_lsn;
+ ret = 0;
+ goto out;
+ } else if ((ret = memp_fget(mpf, &argp->pgno,
+ DB_MPOOL_CREATE, &pagep)) != 0)
+ goto out;
+
+ cmp_n = log_compare(lsnp, &LSN(pagep));
+ cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
+
+ change = 0;
+ if (cmp_p == 0 && redo && argp->opcode == ALLOCPGNO) {
+ /* Need to redo an allocation. */
+ P_INIT(pagep, file_dbp->pgsize, argp->pgno, PGNO_INVALID,
+ PGNO_INVALID, 0, argp->new_type);
+ change = 1;
+ } else if (cmp_n == 0 && !redo && argp->opcode == DELPGNO) {
+ /* Undoing a delete. */
+ P_INIT(pagep, file_dbp->pgsize, argp->pgno, PGNO_INVALID,
+ argp->old_pgno, 0, argp->old_type);
+ change = 1;
+ } else if ((cmp_p == 0 && redo && argp->opcode == DELPGNO) ||
+ (cmp_n == 0 && !redo && argp->opcode == ALLOCPGNO)) {
+ /* Need to redo a deletion or undo an allocation. */
+ NEXT_PGNO(pagep) = argp->free_pgno;
+ TYPE(pagep) = P_INVALID;
+ change = 1;
+ }
+ if (change)
+ LSN(pagep) = redo ? *lsnp : argp->pagelsn;
+
+ if ((ret = __ham_put_page(file_dbp, pagep, change)) != 0)
+ goto out;
+
+ *lsnp = argp->prev_lsn;
+
+out: if (getmeta)
+ RELEASE_META(file_dbp, hashp);
+ REC_CLOSE;
+
+}
+
+/*
+ * __ham_splitmeta_recover --
+ * This is the meta-data part of the split. Records the new and old
+ * bucket numbers and the new/old mask information.
+ *
+ * PUBLIC: int __ham_splitmeta_recover
+ * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__ham_splitmeta_recover(logp, dbtp, lsnp, redo, info)
+ DB_LOG *logp;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ int redo;
+ void *info;
+{
+ __ham_splitmeta_args *argp;
+ DB *mdbp, *file_dbp;
+ DB_MPOOLFILE *mpf;
+ HTAB *hashp;
+ int change, cmp_n, cmp_p, getmeta, ret;
+ u_int32_t pow;
+
+ getmeta = 0;
+ hashp = NULL; /* XXX: shut the compiler up. */
+ REC_PRINT(__ham_splitmeta_print);
+ REC_INTRO(__ham_splitmeta_read);
+
+ hashp = (HTAB *)file_dbp->internal;
+ GET_META(file_dbp, hashp);
+ getmeta = 1;
+
+ /*
+ * There are two phases to the recovery here. First we need
+ * to update the meta data; then we need to update the page.
+ * We'll do the meta-data first.
+ */
+ cmp_n = log_compare(lsnp, &hashp->hdr->lsn);
+ cmp_p = log_compare(&hashp->hdr->lsn, &argp->metalsn);
+
+ change = 0;
+ if (cmp_p == 0 && redo) {
+ /* Need to redo the split information. */
+ hashp->hdr->max_bucket = argp->bucket + 1;
+ pow = __db_log2(hashp->hdr->max_bucket + 1);
+ if (pow > hashp->hdr->ovfl_point) {
+ hashp->hdr->spares[pow] =
+ hashp->hdr->spares[hashp->hdr->ovfl_point];
+ hashp->hdr->ovfl_point = pow;
+ }
+ if (hashp->hdr->max_bucket > hashp->hdr->high_mask) {
+ hashp->hdr->low_mask = hashp->hdr->high_mask;
+ hashp->hdr->high_mask =
+ hashp->hdr->max_bucket | hashp->hdr->low_mask;
+ }
+ change = 1;
+ } else if (cmp_n == 0 && !redo) {
+ /* Need to undo the split information. */
+ hashp->hdr->max_bucket = argp->bucket;
+ hashp->hdr->ovfl_point = argp->ovflpoint;
+ hashp->hdr->spares[hashp->hdr->ovfl_point] = argp->spares;
+ pow = 1 << __db_log2(hashp->hdr->max_bucket + 1);
+ hashp->hdr->high_mask = pow - 1;
+ hashp->hdr->low_mask = (pow >> 1) - 1;
+ change = 1;
+ }
+ if (change) {
+ hashp->hdr->lsn = redo ? *lsnp : argp->metalsn;
+ F_SET(file_dbp, DB_HS_DIRTYMETA);
+ }
+ *lsnp = argp->prev_lsn;
+
+out: if (getmeta)
+ RELEASE_META(file_dbp, hashp);
+ REC_CLOSE;
+}
+
+/*
+ * __ham_splitdata_recover --
+ *
+ * PUBLIC: int __ham_splitdata_recover
+ * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__ham_splitdata_recover(logp, dbtp, lsnp, redo, info)
+ DB_LOG *logp;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ int redo;
+ void *info;
+{
+ __ham_splitdata_args *argp;
+ DB *mdbp, *file_dbp;
+ DB_MPOOLFILE *mpf;
+ HTAB *hashp;
+ PAGE *pagep;
+ int change, cmp_n, cmp_p, getmeta, ret;
+
+ getmeta = 0;
+ hashp = NULL; /* XXX: shut the compiler up. */
+ REC_PRINT(__ham_splitdata_print);
+ REC_INTRO(__ham_splitdata_read);
+
+ ret = memp_fget(mpf, &argp->pgno, 0, &pagep);
+ if (ret != 0)
+ if (!redo) {
+ /*
+ * We are undoing and the page doesn't exist. That
+ * is equivalent to having a pagelsn of 0, so we
+ * would not have to undo anything. In this case,
+ * don't bother creating a page.
+ */
+ *lsnp = argp->prev_lsn;
+ ret = 0;
+ goto out;
+ } else if ((ret = memp_fget(mpf, &argp->pgno,
+ DB_MPOOL_CREATE, &pagep)) != 0)
+ goto out;
+
+ hashp = (HTAB *)file_dbp->internal;
+ GET_META(file_dbp, hashp);
+ getmeta = 1;
+
+ cmp_n = log_compare(lsnp, &LSN(pagep));
+ cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
+
+ /*
+ * There are two types of log messages here, one for the old page
+ * and one for the new pages created. The original image in the
+ * SPLITOLD record is used for undo. The image in the SPLITNEW
+ * is used for redo. We should never have a case where there is
+ * a redo operation and the SPLITOLD record is on disk, but not
+ * the SPLITNEW record. Therefore, we only have work to do when
+ * redo NEW messages and undo OLD messages, but we have to update
+ * LSNs in both cases.
+ */
+ change = 0;
+ if (cmp_p == 0 && redo) {
+ if (argp->opcode == SPLITNEW)
+ /* Need to redo the split described. */
+ memcpy(pagep, argp->pageimage.data,
+ argp->pageimage.size);
+ LSN(pagep) = *lsnp;
+ change = 1;
+ } else if (cmp_n == 0 && !redo) {
+ if (argp->opcode == SPLITOLD) {
+ /* Put back the old image. */
+ memcpy(pagep, argp->pageimage.data,
+ argp->pageimage.size);
+ } else
+ P_INIT(pagep, file_dbp->pgsize, argp->pgno,
+ PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
+ LSN(pagep) = argp->pagelsn;
+ change = 1;
+ }
+ if ((ret = __ham_put_page(file_dbp, pagep, change)) != 0)
+ goto out;
+
+ *lsnp = argp->prev_lsn;
+
+out: if (getmeta)
+ RELEASE_META(file_dbp, hashp);
+ REC_CLOSE;
+}
+
+/*
+ * __ham_ovfl_recover --
+ * This message is generated when we initialize a set of overflow pages.
+ *
+ * PUBLIC: int __ham_ovfl_recover
+ * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__ham_ovfl_recover(logp, dbtp, lsnp, redo, info)
+ DB_LOG *logp;
+ DBT *dbtp;
+ DB_LSN *lsnp;
+ int redo;
+ void *info;
+{
+ __ham_ovfl_args *argp;
+ DB *mdbp, *file_dbp;
+ DB_MPOOLFILE *mpf;
+ HTAB *hashp;
+ PAGE *pagep;
+ db_pgno_t max_pgno, pgno;
+ int cmp_n, cmp_p, getmeta, ret;
+
+ getmeta = 0;
+ hashp = NULL; /* XXX: shut the compiler up. */
+ REC_PRINT(__ham_ovfl_print);
+ REC_INTRO(__ham_ovfl_read);
+
+ hashp = (HTAB *)file_dbp->internal;
+ GET_META(file_dbp, hashp);
+ getmeta = 1;
+ file_dbp = NULL;
+
+ cmp_n = log_compare(lsnp, &hashp->hdr->lsn);
+ cmp_p = log_compare(&hashp->hdr->lsn, &argp->metalsn);
+
+ if (cmp_p == 0 && redo) {
+ /* Redo the allocation. */
+ hashp->hdr->last_freed = argp->start_pgno;
+ hashp->hdr->spares[argp->npages - 1] += argp->npages;
+ hashp->hdr->lsn = *lsnp;
+ F_SET(file_dbp, DB_HS_DIRTYMETA);
+ } else if (cmp_n == 0 && !redo) {
+ hashp->hdr->last_freed = argp->free_pgno;
+ hashp->hdr->spares[argp->npages - 1] -= argp->npages;
+ hashp->hdr->lsn = argp->metalsn;
+ F_SET(file_dbp, DB_HS_DIRTYMETA);
+ }
+
+ max_pgno = argp->start_pgno + argp->npages - 1;
+ ret = 0;
+ for (pgno = argp->start_pgno; pgno <= max_pgno; pgno++) {
+ ret = memp_fget(mpf, &pgno, 0, &pagep);
+ if (ret != 0) {
+ if (redo && (ret = memp_fget(mpf, &pgno,
+ DB_MPOOL_CREATE, &pagep)) != 0)
+ goto out;
+ else if (!redo) {
+ (void)__ham_put_page(file_dbp, pagep, 0);
+ continue;
+ }
+ }
+ if (redo && log_compare((const DB_LSN *)lsnp,
+ (const DB_LSN *)&LSN(pagep)) > 0) {
+ P_INIT(pagep, file_dbp->pgsize, pgno, PGNO_INVALID,
+ pgno == max_pgno ? argp->free_pgno : pgno + 1,
+ 0, P_HASH);
+ LSN(pagep) = *lsnp;
+ ret = __ham_put_page(file_dbp, pagep, 1);
+ } else if (!redo) {
+ ZERO_LSN(pagep->lsn);
+ ret = __ham_put_page(file_dbp, pagep, 1);
+ } else
+ ret = __ham_put_page(file_dbp, pagep, 0);
+ if (ret)
+ goto out;
+ }
+
+ *lsnp = argp->prev_lsn;
+out: if (getmeta)
+ RELEASE_META(file_dbp, hashp);
+ REC_CLOSE;
+}
diff --git a/db2/hash/hash_stat.c b/db2/hash/hash_stat.c
new file mode 100644
index 0000000000..99c6078d86
--- /dev/null
+++ b/db2/hash/hash_stat.c
@@ -0,0 +1,58 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)hash_stat.c 10.6 (Sleepycat) 7/2/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "hash.h"
+#include "common_ext.h"
+
+/*
+ * __ham_stat --
+ * Gather/print the hash statistics.
+ *
+ * PUBLIC: int __ham_stat __P((DB *, FILE *));
+ */
+int
+__ham_stat(dbp, fp)
+ DB *dbp;
+ FILE *fp;
+{
+ HTAB *hashp;
+ int i;
+
+ hashp = (HTAB *)dbp->internal;
+
+ fprintf(fp, "hash: accesses %lu collisions %lu\n",
+ hashp->hash_accesses, hashp->hash_collisions);
+ fprintf(fp, "hash: expansions %lu\n", hashp->hash_expansions);
+ fprintf(fp, "hash: overflows %lu\n", hashp->hash_overflows);
+ fprintf(fp, "hash: big key/data pages %lu\n", hashp->hash_bigpages);
+
+ SET_LOCKER(dbp, NULL);
+ GET_META(dbp, hashp);
+ fprintf(fp, "keys %lu maxp %lu\n",
+ (u_long)hashp->hdr->nelem, (u_long)hashp->hdr->max_bucket);
+
+ for (i = 0; i < NCACHED; i++)
+ fprintf(fp,
+ "spares[%d] = %lu\n", i, (u_long)hashp->hdr->spares[i]);
+
+ RELEASE_META(dbp, hashp);
+ return (0);
+}