summaryrefslogtreecommitdiff
path: root/db2
diff options
context:
space:
mode:
Diffstat (limited to 'db2')
-rw-r--r--db2/Makefile4
-rw-r--r--db2/btree/bt_cursor.c62
-rw-r--r--db2/btree/bt_delete.c15
-rw-r--r--db2/btree/bt_put.c131
-rw-r--r--db2/btree/bt_search.c14
-rw-r--r--db2/btree/bt_split.c9
-rw-r--r--db2/btree/btree_auto.c8
-rw-r--r--db2/common/db_appinit.c21
-rw-r--r--db2/common/db_apprec.c29
-rw-r--r--db2/common/db_err.c12
-rw-r--r--db2/common/db_region.c23
-rw-r--r--db2/config.h3
-rw-r--r--db2/db.h54
-rw-r--r--db2/db/db_auto.c8
-rw-r--r--db2/db/db_dispatch.c31
-rw-r--r--db2/db/db_ret.c33
-rw-r--r--db2/db_int.h3
-rw-r--r--db2/dbm/dbm.c27
-rw-r--r--db2/hash/hash_auto.c8
-rw-r--r--db2/include/btree_ext.h2
-rw-r--r--db2/include/common_ext.h2
-rw-r--r--db2/include/db.h.src54
-rw-r--r--db2/include/db_cxx.h4
-rw-r--r--db2/include/db_ext.h1
-rw-r--r--db2/include/db_int.h.src3
-rw-r--r--db2/include/lock.h8
-rw-r--r--db2/include/log.h23
-rw-r--r--db2/include/mp.h23
-rw-r--r--db2/include/mp_ext.h6
-rw-r--r--db2/include/os_ext.h4
-rw-r--r--db2/include/os_func.h6
-rw-r--r--db2/lock/lock.c90
-rw-r--r--db2/lock/lock_deadlock.c5
-rw-r--r--db2/log/log.c4
-rw-r--r--db2/log/log_archive.c10
-rw-r--r--db2/log/log_auto.c2
-rw-r--r--db2/log/log_get.c33
-rw-r--r--db2/log/log_put.c106
-rw-r--r--db2/mp/mp_bh.c92
-rw-r--r--db2/mp/mp_fget.c114
-rw-r--r--db2/mp/mp_fopen.c224
-rw-r--r--db2/mp/mp_fput.c6
-rw-r--r--db2/mp/mp_fset.c29
-rw-r--r--db2/mp/mp_open.c13
-rw-r--r--db2/mp/mp_pr.c39
-rw-r--r--db2/mp/mp_region.c27
-rw-r--r--db2/mp/mp_sync.c32
-rw-r--r--db2/mutex/README8
-rw-r--r--db2/mutex/mutex.c16
-rw-r--r--db2/os/os_config.c (renamed from db2/os/os_func.c)131
-rw-r--r--db2/os/os_open.c7
-rw-r--r--db2/os/os_spin.c56
-rw-r--r--db2/txn/txn.c51
-rw-r--r--db2/txn/txn_auto.c2
54 files changed, 1093 insertions, 635 deletions
diff --git a/db2/Makefile b/db2/Makefile
index 8083ee3117..0ae06a3089 100644
--- a/db2/Makefile
+++ b/db2/Makefile
@@ -59,9 +59,9 @@ libdb-routines := bt_close bt_compare bt_conv bt_cursor bt_delete \
bt_split bt_stat btree_auto db db_appinit db_apprec \
db_auto \
db_byteorder db_conv db_dispatch db_dup db_err db_log2 \
- os_abs os_dir os_fid os_fsync os_func os_map os_oflags \
+ os_abs os_config os_dir os_fid os_fsync os_map os_oflags \
os_open os_rpath os_rw os_seek os_sleep os_stat os_unlink \
- db_overflow db_pr db_rec db_region db_ret db_salloc \
+ os_spin db_overflow db_pr db_rec db_region db_ret db_salloc \
db_shash db_thread hash hash_auto hash_conv hash_debug \
hash_dup hash_func hash_page hash_rec hash_stat lock \
lock_conflict lock_deadlock lock_util log log_archive \
diff --git a/db2/btree/bt_cursor.c b/db2/btree/bt_cursor.c
index e5f3faeb70..47ecd7c66d 100644
--- a/db2/btree/bt_cursor.c
+++ b/db2/btree/bt_cursor.c
@@ -8,7 +8,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)bt_cursor.c 10.35 (Sleepycat) 10/25/97";
+static const char sccsid[] = "@(#)bt_cursor.c 10.37 (Sleepycat) 11/22/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -33,7 +33,7 @@ static int __bam_c_next __P((DB *, CURSOR *, int));
static int __bam_c_physdel __P((DB *, CURSOR *, PAGE *));
static int __bam_c_prev __P((DB *, CURSOR *));
static int __bam_c_put __P((DBC *, DBT *, DBT *, int));
-static int __bam_c_rget __P((DB *, CURSOR *, DBT *, DBT *, int));
+static int __bam_c_rget __P((DB *, CURSOR *, DBT *, int));
static int __bam_c_search __P((DB *, CURSOR *, const DBT *, u_int, int, int *));
/* Discard the current page/lock held by a cursor. */
@@ -229,7 +229,7 @@ __bam_c_del(dbc, flags)
B_DSET(GET_BKEYDATA(h, indx + O_INDX)->type);
else
B_DSET(GET_BKEYDATA(h, indx)->type);
- (void)__bam_ca_delete(dbp, pgno, indx, NULL);
+ (void)__bam_ca_delete(dbp, pgno, indx, NULL, 0);
ret = memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY);
@@ -313,7 +313,7 @@ __bam_c_get(dbc, key, data, flags)
* been rammed into the interface.
*/
if (LF_ISSET(DB_GET_RECNO)) {
- ret = __bam_c_rget(dbp, cp, key, data, flags);
+ ret = __bam_c_rget(dbp, cp, data, flags);
PUTHANDLE(dbp);
return (ret);
}
@@ -441,10 +441,10 @@ err: if (cp->page != NULL)
* Return the record number for a cursor.
*/
static int
-__bam_c_rget(dbp, cp, key, data, flags)
+__bam_c_rget(dbp, cp, data, flags)
DB *dbp;
CURSOR *cp;
- DBT *key, *data;
+ DBT *data;
int flags;
{
BTREE *t;
@@ -1113,18 +1113,18 @@ __bam_cprint(dbp)
/*
* __bam_ca_delete --
- * Check if any of the cursors refer to the item we are about to delete.
- * We'll return the number of cursors that refer to the item in question.
- * If a cursor does refer to the item, then we set its deleted bit.
+ * Check if any of the cursors refer to the item we are about to delete,
+ * returning the number of cursors that refer to the item in question.
*
- * PUBLIC: int __bam_ca_delete __P((DB *, db_pgno_t, u_int32_t, CURSOR *));
+ * PUBLIC: int __bam_ca_delete __P((DB *, db_pgno_t, u_int32_t, CURSOR *, int));
*/
int
-__bam_ca_delete(dbp, pgno, indx, curs)
+__bam_ca_delete(dbp, pgno, indx, curs, key_delete)
DB *dbp;
db_pgno_t pgno;
u_int32_t indx;
CURSOR *curs;
+ int key_delete;
{
DBC *dbc;
CURSOR *cp;
@@ -1140,22 +1140,40 @@ __bam_ca_delete(dbp, pgno, indx, curs)
* It's possible for multiple cursors within the thread to have write
* locks on the same page, but, cursors within a thread must be single
* threaded, so all we're locking here is the cursor linked list.
- *
- * indx refers to the first of what might be a duplicate set. The
- * cursor passed in is the one initiating the delete, so we don't
- * want to count it.
*/
DB_THREAD_LOCK(dbp);
+
for (count = 0, dbc = TAILQ_FIRST(&dbp->curs_queue);
dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
cp = (CURSOR *)dbc->internal;
- if ((curs != cp &&
- cp->pgno == pgno && cp->indx == indx) ||
- (cp->dpgno == pgno && cp->dindx == indx)) {
- ++count;
- F_SET(cp, C_DELETED);
- }
+
+ /*
+ * Optionally, a cursor passed in is the one initiating the
+ * delete, so we don't want to count it or set its deleted
+ * flag. Otherwise, if a cursor refers to the item, then we
+ * set its deleted flag.
+ */
+ if (curs == cp)
+ continue;
+
+ /*
+ * If we're deleting the key itself and not just one of its
+ * duplicates, repoint the cursor to the main-page key/data
+ * pair, everything else is about to be discarded.
+ */
+ if (key_delete || cp->dpgno == PGNO_INVALID) {
+ if (cp->pgno == pgno && cp->indx == indx) {
+ cp->dpgno = PGNO_INVALID;
+ ++count;
+ F_SET(cp, C_DELETED);
+ }
+ } else
+ if (cp->dpgno == pgno && cp->dindx == indx) {
+ ++count;
+ F_SET(cp, C_DELETED);
+ }
}
+
DB_THREAD_UNLOCK(dbp);
return (count);
}
@@ -1440,7 +1458,7 @@ __bam_c_physdel(dbp, cp, h)
* If the item is referenced by another cursor, leave it up to that
* cursor to do the delete.
*/
- if (__bam_ca_delete(dbp, pgno, indx, cp) != 0)
+ if (__bam_ca_delete(dbp, pgno, indx, cp, 0) != 0)
return (0);
/*
diff --git a/db2/btree/bt_delete.c b/db2/btree/bt_delete.c
index 9593d0109c..dbd1995f89 100644
--- a/db2/btree/bt_delete.c
+++ b/db2/btree/bt_delete.c
@@ -47,7 +47,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)bt_delete.c 10.22 (Sleepycat) 11/2/97";
+static const char sccsid[] = "@(#)bt_delete.c 10.23 (Sleepycat) 11/22/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -101,17 +101,20 @@ __bam_delete(argdbp, txn, key, flags)
h = t->bt_csp->page;
indx = t->bt_csp->indx;
- /* Delete the key/data pair, including any duplicates. */
+ /* Delete the key/data pair, including any on-or-off page duplicates. */
for (cnt = 1, i = indx;; ++cnt)
if ((i += P_INDX) >= NUM_ENT(h) || h->inp[i] != h->inp[indx])
break;
for (; cnt > 0; --cnt, ++t->lstat.bt_deleted)
- if (__bam_ca_delete(dbp, h->pgno, indx, NULL) != 0) {
+ if (__bam_ca_delete(dbp, h->pgno, indx, NULL, 1) == 0) {
+ if ((ret = __bam_ditem(dbp, h, indx)) != 0)
+ goto err;
+ if ((ret = __bam_ditem(dbp, h, indx)) != 0)
+ goto err;
+ } else {
B_DSET(GET_BKEYDATA(h, indx + O_INDX)->type);
indx += P_INDX;
- } else if ((ret = __bam_ditem(dbp, h, indx)) != 0 ||
- (ret = __bam_ditem(dbp, h, indx)) != 0)
- goto err;
+ }
/* If we're using record numbers, update internal page record counts. */
if (F_ISSET(dbp, DB_BT_RECNUM) && (ret = __bam_adjust(dbp, t, -1)) != 0)
diff --git a/db2/btree/bt_put.c b/db2/btree/bt_put.c
index b3d775bb0f..3161b02b55 100644
--- a/db2/btree/bt_put.c
+++ b/db2/btree/bt_put.c
@@ -47,7 +47,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)bt_put.c 10.31 (Sleepycat) 10/26/97";
+static const char sccsid[] = "@(#)bt_put.c 10.35 (Sleepycat) 11/22/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -64,6 +64,7 @@ static const char sccsid[] = "@(#)bt_put.c 10.31 (Sleepycat) 10/26/97";
#include "btree.h"
static int __bam_fixed __P((BTREE *, DBT *));
+static int __bam_isdeleted __P((DB *, PAGE *, u_int32_t, int *));
static int __bam_lookup __P((DB *, DBT *, int *));
static int __bam_ndup __P((DB *, PAGE *, u_int32_t));
static int __bam_ovput __P((DB *, PAGE *, u_int32_t, DBT *));
@@ -89,7 +90,7 @@ __bam_put(argdbp, txn, key, data, flags)
DB *dbp;
PAGE *h;
db_indx_t indx;
- int exact, iflags, newkey, replace, ret, stack;
+ int exact, iflags, isdeleted, newkey, replace, ret, stack;
DEBUG_LWRITE(argdbp, txn, "bam_put", key, data, flags);
@@ -114,21 +115,25 @@ retry: /*
stack = 1;
/*
- * If an identical key is already in the tree, and DB_NOOVERWRITE is
- * set, an error is returned. If an identical key is already in the
- * tree and DB_NOOVERWRITE is not set, the key is either added (when
- * duplicates are permitted) or an error is returned. The exception
- * is when the item located is referenced by a cursor and marked for
- * deletion, in which case we permit the overwrite and flag the cursor.
+ * If DB_NOOVERWRITE is set and there's an identical key in the tree,
+ * return an error unless the data item has already been marked for
+ * deletion, or, all the remaining data items have already been marked
+ * for deletion in the case of duplicates. If all the data items have
+ * been marked for deletion, we do a replace, otherwise, it has to be
+ * a set of duplicates, and we simply append a new one to the set.
*/
- replace = 0;
- if (exact && flags == DB_NOOVERWRITE) {
- if (!B_DISSET(GET_BKEYDATA(h, indx + O_INDX)->type)) {
- ret = DB_KEYEXIST;
+ isdeleted = replace = 0;
+ if (exact) {
+ if ((ret = __bam_isdeleted(dbp, h, indx, &isdeleted)) != 0)
goto err;
- }
- replace = 1;
- __bam_ca_replace(dbp, h->pgno, indx, REPLACE_SETUP);
+ if (isdeleted) {
+ replace = 1;
+ __bam_ca_replace(dbp, h->pgno, indx, REPLACE_SETUP);
+ } else
+ if (flags == DB_NOOVERWRITE) {
+ ret = DB_KEYEXIST;
+ goto err;
+ }
}
/*
@@ -151,7 +156,7 @@ retry: /*
*/
newkey = dbp->type == DB_BTREE && !exact;
if (exact) {
- if (F_ISSET(dbp, DB_AM_DUP)) {
+ if (!isdeleted && F_ISSET(dbp, DB_AM_DUP)) {
/*
* Make sure that we're not looking at a page of
* duplicates -- if so, move to the last entry on
@@ -234,6 +239,88 @@ err: if (stack)
}
/*
+ * __bam_isdeleted --
+ * Return if the only remaining data item for the element has been
+ * deleted.
+ */
+static int
+__bam_isdeleted(dbp, h, indx, isdeletedp)
+ DB *dbp;
+ PAGE *h;
+ u_int32_t indx;
+ int *isdeletedp;
+{
+ BKEYDATA *bk;
+ db_pgno_t pgno;
+ int ret;
+
+ *isdeletedp = 1;
+ for (;;) {
+ bk = GET_BKEYDATA(h, indx + O_INDX);
+ switch (B_TYPE(bk->type)) {
+ case B_KEYDATA:
+ case B_OVERFLOW:
+ if (!B_DISSET(bk->type)) {
+ *isdeletedp = 0;
+ return (0);
+ }
+ break;
+ case B_DUPLICATE:
+ /*
+ * If the data item referencing the off-page duplicates
+ * is flagged as deleted, we're done. Else, we have to
+ * walk the chain of duplicate pages.
+ */
+ if (B_DISSET(bk->type))
+ return (0);
+ goto dupchk;
+ default:
+ return (__db_pgfmt(dbp, h->pgno));
+ }
+
+ /*
+ * If there are no more on-page duplicate items, then every
+ * data item for this key must have been deleted.
+ */
+ if (indx + P_INDX >= (u_int32_t)NUM_ENT(h))
+ return (0);
+ if (h->inp[indx] != h->inp[indx + P_INDX])
+ return (0);
+
+ /* Check the next item. */
+ indx += P_INDX;
+ }
+ /* NOTREACHED */
+
+dupchk: /* Check a chain of duplicate pages. */
+ pgno = ((BOVERFLOW *)bk)->pgno;
+ for (;;) {
+ /* Acquire the next page in the duplicate chain. */
+ if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
+ return (ret);
+
+ /* Check each item for a delete flag. */
+ for (indx = 0; indx < NUM_ENT(h); ++indx)
+ if (!B_DISSET(GET_BKEYDATA(h, indx)->type)) {
+ *isdeletedp = 0;
+ goto done;
+ }
+ /*
+ * If we reach the end of the duplicate pages, then every
+ * item we reviewed must have been deleted.
+ */
+ if ((pgno = NEXT_PGNO(h)) == PGNO_INVALID)
+ goto done;
+
+ (void)memp_fput(dbp->mpf, h, 0);
+ }
+ /* NOTREACHED */
+
+done: (void)memp_fput(dbp->mpf, h, 0);
+ return (0);
+}
+
+/*
* __bam_lookup --
* Find the right location in the tree for the key.
*/
@@ -425,10 +512,10 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
if (op == DB_CURRENT) {
bk = GET_BKEYDATA(h,
indx + (TYPE(h) == P_LBTREE ? O_INDX : 0));
- if (B_TYPE(bk->type) == B_OVERFLOW)
- have_bytes = BOVERFLOW_PSIZE;
- else
+ if (B_TYPE(bk->type) == B_KEYDATA)
have_bytes = BKEYDATA_PSIZE(bk->len);
+ else
+ have_bytes = BOVERFLOW_PSIZE;
need_bytes = 0;
} else {
have_bytes = 0;
@@ -542,7 +629,7 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
* If we're dealing with offpage items, we have to
* delete and then re-add the item.
*/
- if (bigdata || B_TYPE(bk->type) == B_OVERFLOW) {
+ if (bigdata || B_TYPE(bk->type) != B_KEYDATA) {
if ((ret = __bam_ditem(dbp, h, indx)) != 0)
return (ret);
break;
@@ -704,9 +791,9 @@ __bam_ritem(dbp, h, indx, data)
{
BKEYDATA *bk;
DBT orig, repl;
- db_indx_t lo, ln, min, off, prefix, suffix;
+ db_indx_t cnt, lo, ln, min, off, prefix, suffix;
int32_t nbytes;
- int cnt, ret;
+ int ret;
u_int8_t *p, *t;
/*
diff --git a/db2/btree/bt_search.c b/db2/btree/bt_search.c
index a21a8208bc..c39c9af322 100644
--- a/db2/btree/bt_search.c
+++ b/db2/btree/bt_search.c
@@ -47,7 +47,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)bt_search.c 10.8 (Sleepycat) 10/25/97";
+static const char sccsid[] = "@(#)bt_search.c 10.9 (Sleepycat) 11/18/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -119,12 +119,20 @@ __bam_search(dbp, key, flags, stop, recnop, exactp)
return (ret);
}
- /* Decide if we need to save this page; if we do, write lock it. */
+ /*
+ * Decide if we need to save this page; if we do, write lock it.
+ * We deliberately don't lock-couple on this call. If the tree
+ * is tiny, i.e., one page, and two threads are busily updating
+ * the root page, we're almost guaranteed deadlocks galore, as
+ * each one gets a read lock and then blocks the other's attempt
+ * for a write lock.
+ */
if (!stack &&
((LF_ISSET(S_PARENT) && (u_int8_t)(stop + 1) >= h->level) ||
(LF_ISSET(S_WRITE) && h->level == LEAFLEVEL))) {
(void)memp_fput(dbp->mpf, h, 0);
- if ((ret = __bam_lget(dbp, 1, pg, DB_LOCK_WRITE, &lock)) != 0)
+ (void)__BT_LPUT(dbp, lock);
+ if ((ret = __bam_lget(dbp, 0, pg, DB_LOCK_WRITE, &lock)) != 0)
return (ret);
if ((ret = __bam_pget(dbp, &h, &pg, 0)) != 0) {
(void)__BT_LPUT(dbp, lock);
diff --git a/db2/btree/bt_split.c b/db2/btree/bt_split.c
index bc09131b00..219d486dc5 100644
--- a/db2/btree/bt_split.c
+++ b/db2/btree/bt_split.c
@@ -44,7 +44,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)bt_split.c 10.17 (Sleepycat) 11/2/97";
+static const char sccsid[] = "@(#)bt_split.c 10.18 (Sleepycat) 11/23/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -396,14 +396,14 @@ __bam_broot(dbp, rootp, lp, rp)
* The btree comparison code guarantees that the left-most key on any
* level of the tree is never used, so it doesn't need to be filled in.
*/
+ memset(&bi, 0, sizeof(bi));
bi.len = 0;
B_TSET(bi.type, B_KEYDATA, 0);
bi.pgno = lp->pgno;
if (F_ISSET(dbp, DB_BT_RECNUM)) {
bi.nrecs = __bam_total(lp);
RE_NREC_SET(rootp, bi.nrecs);
- } else
- bi.nrecs = 0;
+ }
hdr.data = &bi;
hdr.size = SSZA(BINTERNAL, data);
if ((ret =
@@ -591,6 +591,7 @@ __bam_pinsert(dbp, parent, lchild, rchild)
return (DB_NEEDSPLIT);
/* Add a new record for the right page. */
+ memset(&bi, 0, sizeof(bi));
bi.len = child_bi->len;
B_TSET(bi.type, child_bi->type, 0);
bi.pgno = rchild->pgno;
@@ -640,6 +641,7 @@ noprefix: nksize = child_bk->len;
if (P_FREESPACE(ppage) < nbytes)
return (DB_NEEDSPLIT);
+ memset(&bi, 0, sizeof(bi));
bi.len = nksize;
B_TSET(bi.type, child_bk->type, 0);
bi.pgno = rchild->pgno;
@@ -661,6 +663,7 @@ noprefix: nksize = child_bk->len;
if (P_FREESPACE(ppage) < nbytes)
return (DB_NEEDSPLIT);
+ memset(&bi, 0, sizeof(bi));
bi.len = BOVERFLOW_SIZE;
B_TSET(bi.type, child_bk->type, 0);
bi.pgno = rchild->pgno;
diff --git a/db2/btree/btree_auto.c b/db2/btree/btree_auto.c
index 45232bbc41..18b9b34975 100644
--- a/db2/btree/btree_auto.c
+++ b/db2/btree/btree_auto.c
@@ -100,7 +100,6 @@ int __bam_pg_alloc_log(logp, txnid, ret_lsnp, flags,
* PUBLIC: int __bam_pg_alloc_print
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
-
int
__bam_pg_alloc_print(notused1, dbtp, lsnp, notused3, notused4)
DB_LOG *notused1;
@@ -265,7 +264,6 @@ int __bam_pg_free_log(logp, txnid, ret_lsnp, flags,
* PUBLIC: int __bam_pg_free_print
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
-
int
__bam_pg_free_print(notused1, dbtp, lsnp, notused3, notused4)
DB_LOG *notused1;
@@ -460,7 +458,6 @@ int __bam_split_log(logp, txnid, ret_lsnp, flags,
* PUBLIC: int __bam_split_print
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
-
int
__bam_split_print(notused1, dbtp, lsnp, notused3, notused4)
DB_LOG *notused1;
@@ -657,7 +654,6 @@ int __bam_rsplit_log(logp, txnid, ret_lsnp, flags,
* PUBLIC: int __bam_rsplit_print
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
-
int
__bam_rsplit_print(notused1, dbtp, lsnp, notused3, notused4)
DB_LOG *notused1;
@@ -836,7 +832,6 @@ int __bam_adj_log(logp, txnid, ret_lsnp, flags,
* PUBLIC: int __bam_adj_print
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
-
int
__bam_adj_print(notused1, dbtp, lsnp, notused3, notused4)
DB_LOG *notused1;
@@ -995,7 +990,6 @@ int __bam_cadjust_log(logp, txnid, ret_lsnp, flags,
* PUBLIC: int __bam_cadjust_print
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
-
int
__bam_cadjust_print(notused1, dbtp, lsnp, notused3, notused4)
DB_LOG *notused1;
@@ -1145,7 +1139,6 @@ int __bam_cdel_log(logp, txnid, ret_lsnp, flags,
* PUBLIC: int __bam_cdel_print
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
-
int
__bam_cdel_print(notused1, dbtp, lsnp, notused3, notused4)
DB_LOG *notused1;
@@ -1329,7 +1322,6 @@ int __bam_repl_log(logp, txnid, ret_lsnp, flags,
* PUBLIC: int __bam_repl_print
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
-
int
__bam_repl_print(notused1, dbtp, lsnp, notused3, notused4)
DB_LOG *notused1;
diff --git a/db2/common/db_appinit.c b/db2/common/db_appinit.c
index 74ba9ff426..05fc7cc084 100644
--- a/db2/common/db_appinit.c
+++ b/db2/common/db_appinit.c
@@ -8,7 +8,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)db_appinit.c 10.36 (Sleepycat) 10/28/97";
+static const char sccsid[] = "@(#)db_appinit.c 10.37 (Sleepycat) 11/25/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -108,10 +108,23 @@ db_appinit(db_home, db_config, dbenv, flags)
if ((ret = __db_parse(dbenv, *p)) != 0)
goto err;
- /* Parse the config file. */
+ /*
+ * Parse the config file.
+ *
+ * XXX
+ * Don't use sprintf(3)/snprintf(3) -- the former is dangerous, and
+ * the latter isn't standard, and we're manipulating strings handed
+ * us by the application.
+ */
if (dbenv->db_home != NULL) {
- (void)snprintf(buf,
- sizeof(buf), "%s/DB_CONFIG", dbenv->db_home);
+#define CONFIG_NAME "/DB_CONFIG"
+ if (strlen(dbenv->db_home) +
+ strlen(CONFIG_NAME) + 1 > sizeof(buf)) {
+ ret = ENAMETOOLONG;
+ goto err;
+ }
+ (void)strcpy(buf, dbenv->db_home);
+ (void)strcat(buf, CONFIG_NAME);
if ((fp = fopen(buf, "r")) != NULL) {
while (fgets(buf, sizeof(buf), fp) != NULL) {
if ((lp = strchr(buf, '\n')) != NULL)
diff --git a/db2/common/db_apprec.c b/db2/common/db_apprec.c
index ac0176d70f..188c6b9f95 100644
--- a/db2/common/db_apprec.c
+++ b/db2/common/db_apprec.c
@@ -11,7 +11,7 @@
static const char copyright[] =
"@(#) Copyright (c) 1997\n\
Sleepycat Software Inc. All rights reserved.\n";
-static const char sccsid[] = "@(#)db_apprec.c 10.18 (Sleepycat) 9/30/97";
+static const char sccsid[] = "@(#)db_apprec.c 10.19 (Sleepycat) 11/23/97";
#endif
#ifndef NO_SYSTEM_INCLUDES
@@ -72,10 +72,8 @@ __db_apprec(dbenv, flags)
if (LF_ISSET(DB_RECOVER_FATAL))
first_flag = DB_FIRST;
else {
- if ((ret = __log_findckp(lp, &lsn)) == DB_NOTFOUND) {
- F_SET(lp, is_thread);
- return (0);
- }
+ if ((ret = __log_findckp(lp, &lsn)) == DB_NOTFOUND)
+ goto out;
first_flag = DB_SET;
}
@@ -88,7 +86,7 @@ __db_apprec(dbenv, flags)
(u_long)lsn.file, (u_long)lsn.offset);
else
__db_err(dbenv, "Retrieving first LSN");
- goto err;
+ goto out;
}
first_lsn = lsn;
@@ -99,7 +97,7 @@ __db_apprec(dbenv, flags)
if ((ret =
log_get(dbenv->lg_info, &lsn, &data, DB_NEXT)) != 0) {
if (ret != DB_NOTFOUND)
- goto err;
+ goto out;
break;
}
}
@@ -123,7 +121,7 @@ __db_apprec(dbenv, flags)
goto msgerr;
}
if (ret != 0 && ret != DB_NOTFOUND)
- goto err;
+ goto out;
for (ret = log_get(lp, &lsn, &data, DB_NEXT);
ret == 0; ret = log_get(lp, &lsn, &data, DB_NEXT)) {
@@ -134,7 +132,7 @@ __db_apprec(dbenv, flags)
goto msgerr;
}
if (ret != DB_NOTFOUND)
- goto err;
+ goto out;
/* Now close all the db files that are open. */
__log_close_files(lp);
@@ -148,7 +146,7 @@ __db_apprec(dbenv, flags)
dbenv->tx_info->region->last_ckp = ckp_lsn;
dbenv->tx_info->region->time_ckp = (u_int32_t)now;
if ((ret = txn_checkpoint(dbenv->tx_info, 0, 0)) != 0)
- goto err;
+ goto out;
if (dbenv->db_verbose) {
__db_err(lp->dbenv, "Recovery complete at %s", ctime(&now));
@@ -160,12 +158,13 @@ __db_apprec(dbenv, flags)
(u_long)dbenv->tx_info->region->last_ckp.offset);
}
- F_SET(lp, is_thread);
- return (0);
+ if (0) {
+msgerr: __db_err(dbenv, "Recovery function for LSN %lu %lu failed",
+ (u_long)lsn.file, (u_long)lsn.offset);
+ }
-msgerr: __db_err(dbenv, "Recovery function for LSN %lu %lu failed",
- (u_long)lsn.file, (u_long)lsn.offset);
+out: F_SET(lp, is_thread);
+ __db_txnlist_end(txninfo);
-err: F_SET(lp, is_thread);
return (ret);
}
diff --git a/db2/common/db_err.c b/db2/common/db_err.c
index f3e7b40448..13f2cb5dc3 100644
--- a/db2/common/db_err.c
+++ b/db2/common/db_err.c
@@ -8,7 +8,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)db_err.c 10.18 (Sleepycat) 8/27/97";
+static const char sccsid[] = "@(#)db_err.c 10.19 (Sleepycat) 11/9/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -302,10 +302,13 @@ __db_cgetchk(dbp, key, data, flags, isvalid)
break;
case DB_SET:
break;
- case DB_SET_RECNO:
case DB_GET_RECNO:
if (!F_ISSET(dbp, DB_BT_RECNUM))
goto err;
+ break;
+ case DB_SET_RECNO:
+ if (!F_ISSET(dbp, DB_BT_RECNUM))
+ goto err;
check_key = 1;
break;
default:
@@ -313,8 +316,9 @@ err: return (__db_ferr(dbp->dbenv, "c_get", 0));
}
/* Check for invalid key/data flags. */
- DB_CHECK_FLAGS(dbp->dbenv, "key", key->flags,
- DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL);
+ if (check_key)
+ DB_CHECK_FLAGS(dbp->dbenv, "key", key->flags,
+ DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL);
DB_CHECK_FLAGS(dbp->dbenv, "data", data->flags,
DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL);
diff --git a/db2/common/db_region.c b/db2/common/db_region.c
index 3e8cd2dc66..4fc603a2b0 100644
--- a/db2/common/db_region.c
+++ b/db2/common/db_region.c
@@ -43,7 +43,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)db_region.c 10.15 (Sleepycat) 10/25/97";
+static const char sccsid[] = "@(#)db_region.c 10.18 (Sleepycat) 11/28/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -80,14 +80,14 @@ static int __db_rmap __P((DB_ENV *, int, size_t, void *));
* into memory, NULL on error.
*
* PUBLIC: int __db_rcreate __P((DB_ENV *, APPNAME,
- * PUBLIC: const char *, const char *, int, size_t, int *, void *));
+ * PUBLIC: const char *, const char *, int, size_t, int, int *, void *));
*/
int
-__db_rcreate(dbenv, appname, path, file, mode, size, fdp, retp)
+__db_rcreate(dbenv, appname, path, file, mode, size, oflags, fdp, retp)
DB_ENV *dbenv;
APPNAME appname;
const char *path, *file;
- int mode, *fdp;
+ int mode, oflags, *fdp;
size_t size;
void *retp;
{
@@ -110,12 +110,13 @@ __db_rcreate(dbenv, appname, path, file, mode, size, fdp, retp)
/*
* Now open the file. We need to make sure that multiple processes
* that attempt to create the region at the same time are properly
- * ordered, so we open it O_EXCL and O_CREAT so two simultaneous
+ * ordered, so we open it DB_EXCL and DB_CREATE so two simultaneous
* attempts to create the region will return failure in one of the
* attempts.
*/
- if (fd == -1 && (ret = __db_open(name,
- DB_CREATE | DB_EXCL, DB_CREATE | DB_EXCL, mode, &fd)) != 0) {
+ oflags |= DB_CREATE | DB_EXCL;
+ if (fd == -1 &&
+ (ret = __db_open(name, oflags, oflags, mode, &fd)) != 0) {
if (ret != EEXIST)
__db_err(dbenv,
"region create: %s: %s", name, strerror(ret));
@@ -398,7 +399,7 @@ __db_runlink(dbenv, appname, path, file, force)
/* If the file doesn't exist, we're done. */
if (__db_exists(name, NULL))
- return (0); /* XXX: ENOENT? */
+ goto done;
/*
* If we're called with a force flag, try and unlink the file. This
@@ -412,8 +413,7 @@ __db_runlink(dbenv, appname, path, file, force)
if (force) {
if ((ret = __db_unlink(name)) != 0 && ret != ENOENT)
goto err1;
- FREES(name);
- return (0);
+ goto done;
}
/* Open and lock the region. */
@@ -453,7 +453,7 @@ __db_runlink(dbenv, appname, path, file, force)
(void)__db_sleep(0, 250000);
}
if (ret == 0) {
- FREES(name);
+done: FREES(name);
return (0);
}
@@ -467,6 +467,7 @@ __db_runlink(dbenv, appname, path, file, force)
err2: (void)__db_mutex_unlock(&rp->lock, fd);
(void)__db_rclose(dbenv, fd, rp);
err1: __db_err(dbenv, "region unlink: %s: %s", name, strerror(ret));
+
FREES(name);
return (ret);
}
diff --git a/db2/config.h b/db2/config.h
index 27dbdaa439..7f784a0d9b 100644
--- a/db2/config.h
+++ b/db2/config.h
@@ -114,6 +114,9 @@
/* Define if you have the strsep function. */
#define HAVE_STRSEP 1
+/* Define if you have the sysconf function. */
+#define HAVE_SYSCONF 1
+
/* Define if you have the vsnprintf function. */
#define HAVE_VSNPRINTF 1
diff --git a/db2/db.h b/db2/db.h
index fb2d6bb3da..f976acafb7 100644
--- a/db2/db.h
+++ b/db2/db.h
@@ -4,7 +4,7 @@
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*
- * @(#)db.h.src 10.91 (Sleepycat) 11/3/97
+ * @(#)db.h.src 10.97 (Sleepycat) 11/28/97
*/
#ifndef _DB_H_
@@ -73,8 +73,8 @@
#define DB_VERSION_MAJOR 2
#define DB_VERSION_MINOR 3
-#define DB_VERSION_PATCH 12
-#define DB_VERSION_STRING "Sleepycat Software: DB 2.3.12: (11/3/97)"
+#define DB_VERSION_PATCH 14
+#define DB_VERSION_STRING "Sleepycat Software: DB 2.3.14: (11/28/97)"
typedef u_int32_t db_pgno_t; /* Page number type. */
typedef u_int16_t db_indx_t; /* Page offset type. */
@@ -129,8 +129,10 @@ struct __db_dbt {
};
/*
- * DB configuration. There are a set of functions which the application
- * can replace with its own versions.
+ * DB internal configuration.
+ *
+ * There are a set of functions that the application can replace with its
+ * own versions, and some other knobs which can be turned at run-time.
*/
#define DB_FUNC_CALLOC 1 /* ANSI C calloc. */
#define DB_FUNC_CLOSE 2 /* POSIX 1003.1 close. */
@@ -147,11 +149,12 @@ struct __db_dbt {
#define DB_FUNC_REALLOC 13 /* ANSI C realloc. */
#define DB_FUNC_SEEK 14 /* POSIX 1003.1 lseek. */
#define DB_FUNC_SLEEP 15 /* DB: sleep secs/usecs. */
-#define DB_FUNC_STRDUP 16 /* ANSI C strdup. */
+#define DB_FUNC_STRDUP 16 /* DB: strdup(3). */
#define DB_FUNC_UNLINK 17 /* POSIX 1003.1 unlink. */
#define DB_FUNC_UNMAP 18 /* DB: unmap shared memory file. */
#define DB_FUNC_WRITE 19 /* POSIX 1003.1 write. */
#define DB_FUNC_YIELD 20 /* DB: yield thread to scheduler. */
+#define DB_TSL_SPINS 21 /* DB: initialize spin count. */
/*
* Database configuration and initialization.
@@ -211,10 +214,10 @@ struct __db_dbt {
* locking subsystem.
*/
#define DB_LOCK_NORUN 0x0
-#define DB_LOCK_DEFAULT 0x1
-#define DB_LOCK_OLDEST 0x2
-#define DB_LOCK_RANDOM 0x3
-#define DB_LOCK_YOUNGEST 0x4
+#define DB_LOCK_DEFAULT 0x1 /* Default policy. */
+#define DB_LOCK_OLDEST 0x2 /* Abort oldest transaction. */
+#define DB_LOCK_RANDOM 0x3 /* Abort random transaction. */
+#define DB_LOCK_YOUNGEST 0x4 /* Abort youngest transaction. */
struct __db_env {
int db_lorder; /* Byte order. */
@@ -265,6 +268,10 @@ struct __db_env {
/*******************************************************
* Access methods.
*******************************************************/
+/*
+ * XXX
+ * Changes here must be reflected in java/src/com/sleepycat/db/Db.java.
+ */
typedef enum {
DB_BTREE=1, /* B+tree. */
DB_HASH, /* Extended Linear Hashing. */
@@ -347,7 +354,13 @@ struct __db_info {
#define DB_SET_RANGE 0x020000 /* c_get() */
#define DB_SET_RECNO 0x040000 /* c_get() */
-/* DB (user visible) error return codes. */
+/*
+ * DB (user visible) error return codes.
+ *
+ * XXX
+ * Changes to any of the user visible error return codes must be reflected
+ * in java/src/com/sleepycat/db/Db.java.
+ */
#define DB_INCOMPLETE ( -1) /* Sync didn't finish. */
#define DB_KEYEMPTY ( -2) /* The key/data pair was deleted or
was never created by the user. */
@@ -516,6 +529,7 @@ int db_appinit __P((const char *, char * const *, DB_ENV *, int));
int db_appexit __P((DB_ENV *));
int db_jump_set __P((void *, int));
int db_open __P((const char *, DBTYPE, int, int, DB_ENV *, DB_INFO *, DB **));
+int db_value_set __P((int, int));
char *db_version __P((int *, int *, int *));
#if defined(__cplusplus)
};
@@ -533,16 +547,26 @@ char *db_version __P((int *, int *, int *));
/* Flag values for lock_detect(). */
#define DB_LOCK_CONFLICT 0x01 /* Run on any conflict. */
-/* Request types. */
+/*
+ * Request types.
+ *
+ * XXX
+ * Changes here must be reflected in java/src/com/sleepycat/db/Db.java.
+ */
typedef enum {
- DB_LOCK_DUMP, /* Display held locks. */
+ DB_LOCK_DUMP=0, /* Display held locks. */
DB_LOCK_GET, /* Get the lock. */
DB_LOCK_PUT, /* Release the lock. */
DB_LOCK_PUT_ALL, /* Release locker's locks. */
DB_LOCK_PUT_OBJ /* Release locker's locks on obj. */
} db_lockop_t;
-/* Simple R/W lock modes and for multi-granularity intention locking. */
+/*
+ * Simple R/W lock modes and for multi-granularity intention locking.
+ *
+ * XXX
+ * Changes here must be reflected in java/src/com/sleepycat/db/Db.java.
+ */
typedef enum {
DB_LOCK_NG=0, /* Not granted. */
DB_LOCK_READ, /* Shared/read. */
@@ -577,7 +601,7 @@ extern const u_int8_t db_riw_conflicts[];
extern "C" {
#endif
int lock_close __P((DB_LOCKTAB *));
-int lock_detect __P((DB_LOCKTAB *, int, u_int32_t));
+int lock_detect __P((DB_LOCKTAB *, int, int));
int lock_get __P((DB_LOCKTAB *,
u_int32_t, int, const DBT *, db_lockmode_t, DB_LOCK *));
int lock_id __P((DB_LOCKTAB *, u_int32_t *));
diff --git a/db2/db/db_auto.c b/db2/db/db_auto.c
index d40d964542..88bca7b583 100644
--- a/db2/db/db_auto.c
+++ b/db2/db/db_auto.c
@@ -122,7 +122,6 @@ int __db_addrem_log(logp, txnid, ret_lsnp, flags,
* PUBLIC: int __db_addrem_print
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
-
int
__db_addrem_print(notused1, dbtp, lsnp, notused3, notused4)
DB_LOG *notused1;
@@ -312,7 +311,6 @@ int __db_split_log(logp, txnid, ret_lsnp, flags,
* PUBLIC: int __db_split_print
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
-
int
__db_split_print(notused1, dbtp, lsnp, notused3, notused4)
DB_LOG *notused1;
@@ -507,7 +505,6 @@ int __db_big_log(logp, txnid, ret_lsnp, flags,
* PUBLIC: int __db_big_print
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
-
int
__db_big_print(notused1, dbtp, lsnp, notused3, notused4)
DB_LOG *notused1;
@@ -678,7 +675,6 @@ int __db_ovref_log(logp, txnid, ret_lsnp, flags,
* PUBLIC: int __db_ovref_print
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
-
int
__db_ovref_print(notused1, dbtp, lsnp, notused3, notused4)
DB_LOG *notused1;
@@ -842,7 +838,6 @@ int __db_relink_log(logp, txnid, ret_lsnp, flags,
* PUBLIC: int __db_relink_print
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
-
int
__db_relink_print(notused1, dbtp, lsnp, notused3, notused4)
DB_LOG *notused1;
@@ -1005,7 +1000,6 @@ int __db_addpage_log(logp, txnid, ret_lsnp, flags,
* PUBLIC: int __db_addpage_print
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
-
int
__db_addpage_print(notused1, dbtp, lsnp, notused3, notused4)
DB_LOG *notused1;
@@ -1180,7 +1174,6 @@ int __db_debug_log(logp, txnid, ret_lsnp, flags,
* PUBLIC: int __db_debug_print
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
-
int
__db_debug_print(notused1, dbtp, lsnp, notused3, notused4)
DB_LOG *notused1;
@@ -1335,7 +1328,6 @@ int __db_noop_log(logp, txnid, ret_lsnp, flags)
* PUBLIC: int __db_noop_print
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
-
int
__db_noop_print(notused1, dbtp, lsnp, notused3, notused4)
DB_LOG *notused1;
diff --git a/db2/db/db_dispatch.c b/db2/db/db_dispatch.c
index a4bcdb7628..4f89d2b917 100644
--- a/db2/db/db_dispatch.c
+++ b/db2/db/db_dispatch.c
@@ -43,7 +43,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)db_dispatch.c 10.6 (Sleepycat) 10/25/97";
+static const char sccsid[] = "@(#)db_dispatch.c 10.7 (Sleepycat) 11/23/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -236,8 +236,8 @@ __db_txnlist_find(listp, txnid)
void *listp;
u_int32_t txnid;
{
- __db_txnlist *p;
__db_txnhead *hp;
+ __db_txnlist *p;
if ((hp = (struct __db_txnhead *)listp) == NULL)
return (DB_NOTFOUND);
@@ -255,12 +255,16 @@ __db_txnlist_find(listp, txnid)
}
#ifdef DEBUG
+/*
+ * __db_txnlist_print --
+ * Print out the transaction list.
+ */
void
__db_txnlist_print(listp)
void *listp;
{
- __db_txnlist *p;
__db_txnhead *hp;
+ __db_txnlist *p;
hp = (struct __db_txnhead *)listp;
printf("Maxid: %lu\n", (u_long)hp->maxid);
@@ -268,3 +272,24 @@ __db_txnlist_print(listp)
printf("TXNID: %lu\n", (u_long)p->txnid);
}
#endif
+
+/*
+ * __db_txnlist_end --
+ * Discard transaction linked list.
+ *
+ * PUBLIC: void __db_txnlist_end __P((void *));
+ */
+void
+__db_txnlist_end(listp)
+ void *listp;
+{
+ __db_txnhead *hp;
+ __db_txnlist *p;
+
+ hp = (struct __db_txnhead *)listp;
+ while ((p = LIST_FIRST(&hp->head)) != LIST_END(&hp->head)) {
+ LIST_REMOVE(p, links);
+ __db_free(p);
+ }
+ __db_free(listp);
+}
diff --git a/db2/db/db_ret.c b/db2/db/db_ret.c
index bcec308b95..65441aa45a 100644
--- a/db2/db/db_ret.c
+++ b/db2/db/db_ret.c
@@ -8,7 +8,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)db_ret.c 10.8 (Sleepycat) 10/25/97";
+static const char sccsid[] = "@(#)db_ret.c 10.10 (Sleepycat) 11/28/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -116,35 +116,48 @@ __db_retcopy(dbt, data, len, memp, memsize, db_malloc)
dbt->size = len;
/*
- * Allocate any necessary memory.
+ * Allocate memory to be owned by the application: DB_DBT_MALLOC.
*
- * XXX: Never allocate 0 bytes.
+ * !!!
+ * We always allocate memory, even if we're copying out 0 bytes. This
+ * guarantees consistency, i.e., the application can always free memory
+ * without concern as to how many bytes of the record were requested.
+ *
+ * XXX
+ * Never allocate 0 bytes, it's known to make malloc/realloc unhappy.
+ *
+ * Use the memory specified by the application: DB_DBT_USERMEM.
+ *
+ * !!!
+ * If the length we're going to copy is 0, the application-supplied
+ * memory pointer is allowed to be NULL.
*/
if (F_ISSET(dbt, DB_DBT_MALLOC)) {
dbt->data = db_malloc == NULL ?
- (void *)__db_malloc(len + 1) :
+ (void *)__db_malloc(len) :
(void *)db_malloc(len + 1);
if (dbt->data == NULL)
return (ENOMEM);
} else if (F_ISSET(dbt, DB_DBT_USERMEM)) {
- if (dbt->ulen < len)
+ if (len != 0 && (dbt->data == NULL || dbt->ulen < len))
return (ENOMEM);
} else if (memp == NULL || memsize == NULL) {
return (EINVAL);
} else {
- if (*memsize == 0 || *memsize < len) {
+ if (len != 0 && (*memsize == 0 || *memsize < len)) {
*memp = *memp == NULL ?
- (void *)__db_malloc(len + 1) :
- (void *)__db_realloc(*memp, len + 1);
+ (void *)__db_malloc(len) :
+ (void *)__db_realloc(*memp, len);
if (*memp == NULL) {
*memsize = 0;
return (ENOMEM);
}
- *memsize = len + 1;
+ *memsize = len;
}
dbt->data = *memp;
}
- memcpy(dbt->data, data, len);
+ if (len != 0)
+ memcpy(dbt->data, data, len);
return (0);
}
diff --git a/db2/db_int.h b/db2/db_int.h
index 1f6c790345..21460722a3 100644
--- a/db2/db_int.h
+++ b/db2/db_int.h
@@ -4,7 +4,7 @@
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*
- * @(#)db_int.h.src 10.36 (Sleepycat) 10/31/97
+ * @(#)db_int.h.src 10.37 (Sleepycat) 11/25/97
*/
#ifndef _DB_INTERNAL_H_
@@ -168,6 +168,7 @@ typedef struct _db_mutex_t {
off_t off; /* Backing file offset. */
u_long pid; /* Lock holder: 0 or process pid. */
#endif
+ u_int32_t spins; /* Spins before block. */
u_int32_t mutex_set_wait; /* Granted after wait. */
u_int32_t mutex_set_nowait; /* Granted without waiting. */
} db_mutex_t;
diff --git a/db2/dbm/dbm.c b/db2/dbm/dbm.c
index 1fa92ce1fa..bd7c7a6636 100644
--- a/db2/dbm/dbm.c
+++ b/db2/dbm/dbm.c
@@ -47,7 +47,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)dbm.c 10.6 (Sleepycat) 8/27/97";
+static const char sccsid[] = "@(#)dbm.c 10.7 (Sleepycat) 11/25/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -198,9 +198,20 @@ dbm_open(file, oflags, mode)
dbinfo.h_ffactor = 40;
dbinfo.h_nelem = 1;
- (void)snprintf(path, sizeof(path), "%s%s", file, DBM_SUFFIX);
- if ((__set_errno(db_open(path,
- DB_HASH, __db_oflags(oflags), mode, NULL, &dbinfo, &dbp))) != 0)
+ /*
+ * XXX
+ * Don't use sprintf(3)/snprintf(3) -- the former is dangerous, and
+ * the latter isn't standard, and we're manipulating strings handed
+ * us by the application.
+ */
+ if (strlen(file) + strlen(DBM_SUFFIX) + 1 > sizeof(path)) {
+ errno = ENAMETOOLONG;
+ return (NULL);
+ }
+ (void)strcpy(path, file);
+ (void)strcat(path, DBM_SUFFIX);
+ if ((errno = db_open(path,
+ DB_HASH, __db_oflags(oflags), mode, NULL, &dbinfo, &dbp)) != 0)
return (NULL);
return ((DBM *)dbp);
}
@@ -261,7 +272,7 @@ dbm_firstkey(db)
DBC *cp;
if ((cp = TAILQ_FIRST(&db->curs_queue)) == NULL)
- if ((__set_errno(db->cursor(db, NULL, &cp))) != 0) {
+ if ((errno = db->cursor(db, NULL, &cp)) != 0) {
memset(&key, 0, sizeof(key));
return (key);
}
@@ -294,7 +305,7 @@ dbm_nextkey(db)
int status;
if ((cp = TAILQ_FIRST(&db->curs_queue)) == NULL)
- if ((__set_errno(db->cursor(db, NULL, &cp))) != 0) {
+ if ((errno = db->cursor(db, NULL, &cp)) != 0) {
memset(&key, 0, sizeof(key));
return (key);
}
@@ -330,9 +341,9 @@ dbm_delete(db, key)
_key.size = key.dsize;
ret = (((DB *)db)->del)((DB *)db, NULL, &_key, 0);
if (ret < 0)
- __set_errno(ENOENT);
+ errno = ENOENT;
else if (ret > 0) {
- __set_errno(ret);
+ errno = ret;
ret = -1;
}
return (ret);
diff --git a/db2/hash/hash_auto.c b/db2/hash/hash_auto.c
index 4820eb8611..787ee04ddb 100644
--- a/db2/hash/hash_auto.c
+++ b/db2/hash/hash_auto.c
@@ -119,7 +119,6 @@ int __ham_insdel_log(logp, txnid, ret_lsnp, flags,
* PUBLIC: int __ham_insdel_print
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
-
int
__ham_insdel_print(notused1, dbtp, lsnp, notused3, notused4)
DB_LOG *notused1;
@@ -316,7 +315,6 @@ int __ham_newpage_log(logp, txnid, ret_lsnp, flags,
* PUBLIC: int __ham_newpage_print
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
-
int
__ham_newpage_print(notused1, dbtp, lsnp, notused3, notused4)
DB_LOG *notused1;
@@ -479,7 +477,6 @@ int __ham_splitmeta_log(logp, txnid, ret_lsnp, flags,
* PUBLIC: int __ham_splitmeta_print
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
-
int
__ham_splitmeta_print(notused1, dbtp, lsnp, notused3, notused4)
DB_LOG *notused1;
@@ -640,7 +637,6 @@ int __ham_splitdata_log(logp, txnid, ret_lsnp, flags,
* PUBLIC: int __ham_splitdata_print
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
-
int
__ham_splitdata_print(notused1, dbtp, lsnp, notused3, notused4)
DB_LOG *notused1;
@@ -832,7 +828,6 @@ int __ham_replace_log(logp, txnid, ret_lsnp, flags,
* PUBLIC: int __ham_replace_print
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
-
int
__ham_replace_print(notused1, dbtp, lsnp, notused3, notused4)
DB_LOG *notused1;
@@ -1034,7 +1029,6 @@ int __ham_newpgno_log(logp, txnid, ret_lsnp, flags,
* PUBLIC: int __ham_newpgno_print
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
-
int
__ham_newpgno_print(notused1, dbtp, lsnp, notused3, notused4)
DB_LOG *notused1;
@@ -1203,7 +1197,6 @@ int __ham_ovfl_log(logp, txnid, ret_lsnp, flags,
* PUBLIC: int __ham_ovfl_print
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
-
int
__ham_ovfl_print(notused1, dbtp, lsnp, notused3, notused4)
DB_LOG *notused1;
@@ -1386,7 +1379,6 @@ int __ham_copypage_log(logp, txnid, ret_lsnp, flags,
* PUBLIC: int __ham_copypage_print
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
-
int
__ham_copypage_print(notused1, dbtp, lsnp, notused3, notused4)
DB_LOG *notused1;
diff --git a/db2/include/btree_ext.h b/db2/include/btree_ext.h
index bbe0d971b2..46f2227bdd 100644
--- a/db2/include/btree_ext.h
+++ b/db2/include/btree_ext.h
@@ -11,7 +11,7 @@ int __bam_cursor __P((DB *, DB_TXN *, DBC **));
int __bam_c_iclose __P((DB *, DBC *));
int __bam_get __P((DB *, DB_TXN *, DBT *, DBT *, int));
int __bam_ovfl_chk __P((DB *, CURSOR *, u_int32_t, int));
-int __bam_ca_delete __P((DB *, db_pgno_t, u_int32_t, CURSOR *));
+int __bam_ca_delete __P((DB *, db_pgno_t, u_int32_t, CURSOR *, int));
void __bam_ca_di __P((DB *, db_pgno_t, u_int32_t, int));
void __bam_ca_dup __P((DB *,
db_pgno_t, u_int32_t, u_int32_t, db_pgno_t, u_int32_t));
diff --git a/db2/include/common_ext.h b/db2/include/common_ext.h
index b814582abd..29bc9aa4e2 100644
--- a/db2/include/common_ext.h
+++ b/db2/include/common_ext.h
@@ -23,7 +23,7 @@ int __db_syncchk __P((const DB *, int));
int __db_ferr __P((const DB_ENV *, const char *, int));
u_int32_t __db_log2 __P((u_int32_t));
int __db_rcreate __P((DB_ENV *, APPNAME,
- const char *, const char *, int, size_t, int *, void *));
+ const char *, const char *, int, size_t, int, int *, void *));
int __db_rinit __P((DB_ENV *, RLAYOUT *, int, size_t, int));
int __db_ropen __P((DB_ENV *,
APPNAME, const char *, const char *, int, int *, void *));
diff --git a/db2/include/db.h.src b/db2/include/db.h.src
index 3cc2bfd4fc..654eb16425 100644
--- a/db2/include/db.h.src
+++ b/db2/include/db.h.src
@@ -4,7 +4,7 @@
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*
- * @(#)db.h.src 10.91 (Sleepycat) 11/3/97
+ * @(#)db.h.src 10.97 (Sleepycat) 11/28/97
*/
#ifndef _DB_H_
@@ -73,8 +73,8 @@
#define DB_VERSION_MAJOR 2
#define DB_VERSION_MINOR 3
-#define DB_VERSION_PATCH 12
-#define DB_VERSION_STRING "Sleepycat Software: DB 2.3.12: (11/3/97)"
+#define DB_VERSION_PATCH 14
+#define DB_VERSION_STRING "Sleepycat Software: DB 2.3.14: (11/28/97)"
typedef u_int32_t db_pgno_t; /* Page number type. */
typedef u_int16_t db_indx_t; /* Page offset type. */
@@ -129,8 +129,10 @@ struct __db_dbt {
};
/*
- * DB configuration. There are a set of functions which the application
- * can replace with its own versions.
+ * DB internal configuration.
+ *
+ * There are a set of functions that the application can replace with its
+ * own versions, and some other knobs which can be turned at run-time.
*/
#define DB_FUNC_CALLOC 1 /* ANSI C calloc. */
#define DB_FUNC_CLOSE 2 /* POSIX 1003.1 close. */
@@ -147,11 +149,12 @@ struct __db_dbt {
#define DB_FUNC_REALLOC 13 /* ANSI C realloc. */
#define DB_FUNC_SEEK 14 /* POSIX 1003.1 lseek. */
#define DB_FUNC_SLEEP 15 /* DB: sleep secs/usecs. */
-#define DB_FUNC_STRDUP 16 /* ANSI C strdup. */
+#define DB_FUNC_STRDUP 16 /* DB: strdup(3). */
#define DB_FUNC_UNLINK 17 /* POSIX 1003.1 unlink. */
#define DB_FUNC_UNMAP 18 /* DB: unmap shared memory file. */
#define DB_FUNC_WRITE 19 /* POSIX 1003.1 write. */
#define DB_FUNC_YIELD 20 /* DB: yield thread to scheduler. */
+#define DB_TSL_SPINS 21 /* DB: initialize spin count. */
/*
* Database configuration and initialization.
@@ -211,10 +214,10 @@ struct __db_dbt {
* locking subsystem.
*/
#define DB_LOCK_NORUN 0x0
-#define DB_LOCK_DEFAULT 0x1
-#define DB_LOCK_OLDEST 0x2
-#define DB_LOCK_RANDOM 0x3
-#define DB_LOCK_YOUNGEST 0x4
+#define DB_LOCK_DEFAULT 0x1 /* Default policy. */
+#define DB_LOCK_OLDEST 0x2 /* Abort oldest transaction. */
+#define DB_LOCK_RANDOM 0x3 /* Abort random transaction. */
+#define DB_LOCK_YOUNGEST 0x4 /* Abort youngest transaction. */
struct __db_env {
int db_lorder; /* Byte order. */
@@ -265,6 +268,10 @@ struct __db_env {
/*******************************************************
* Access methods.
*******************************************************/
+/*
+ * XXX
+ * Changes here must be reflected in java/src/com/sleepycat/db/Db.java.
+ */
typedef enum {
DB_BTREE=1, /* B+tree. */
DB_HASH, /* Extended Linear Hashing. */
@@ -347,7 +354,13 @@ struct __db_info {
#define DB_SET_RANGE 0x020000 /* c_get() */
#define DB_SET_RECNO 0x040000 /* c_get() */
-/* DB (user visible) error return codes. */
+/*
+ * DB (user visible) error return codes.
+ *
+ * XXX
+ * Changes to any of the user visible error return codes must be reflected
+ * in java/src/com/sleepycat/db/Db.java.
+ */
#define DB_INCOMPLETE ( -1) /* Sync didn't finish. */
#define DB_KEYEMPTY ( -2) /* The key/data pair was deleted or
was never created by the user. */
@@ -516,6 +529,7 @@ int db_appinit __P((const char *, char * const *, DB_ENV *, int));
int db_appexit __P((DB_ENV *));
int db_jump_set __P((void *, int));
int db_open __P((const char *, DBTYPE, int, int, DB_ENV *, DB_INFO *, DB **));
+int db_value_set __P((int, int));
char *db_version __P((int *, int *, int *));
#if defined(__cplusplus)
};
@@ -533,16 +547,26 @@ char *db_version __P((int *, int *, int *));
/* Flag values for lock_detect(). */
#define DB_LOCK_CONFLICT 0x01 /* Run on any conflict. */
-/* Request types. */
+/*
+ * Request types.
+ *
+ * XXX
+ * Changes here must be reflected in java/src/com/sleepycat/db/Db.java.
+ */
typedef enum {
- DB_LOCK_DUMP, /* Display held locks. */
+ DB_LOCK_DUMP=0, /* Display held locks. */
DB_LOCK_GET, /* Get the lock. */
DB_LOCK_PUT, /* Release the lock. */
DB_LOCK_PUT_ALL, /* Release locker's locks. */
DB_LOCK_PUT_OBJ /* Release locker's locks on obj. */
} db_lockop_t;
-/* Simple R/W lock modes and for multi-granularity intention locking. */
+/*
+ * Simple R/W lock modes and for multi-granularity intention locking.
+ *
+ * XXX
+ * Changes here must be reflected in java/src/com/sleepycat/db/Db.java.
+ */
typedef enum {
DB_LOCK_NG=0, /* Not granted. */
DB_LOCK_READ, /* Shared/read. */
@@ -577,7 +601,7 @@ extern const u_int8_t db_riw_conflicts[];
extern "C" {
#endif
int lock_close __P((DB_LOCKTAB *));
-int lock_detect __P((DB_LOCKTAB *, int, u_int32_t));
+int lock_detect __P((DB_LOCKTAB *, int, int));
int lock_get __P((DB_LOCKTAB *,
u_int32_t, int, const DBT *, db_lockmode_t, DB_LOCK *));
int lock_id __P((DB_LOCKTAB *, u_int32_t *));
diff --git a/db2/include/db_cxx.h b/db2/include/db_cxx.h
index 01d1231092..83523c5559 100644
--- a/db2/include/db_cxx.h
+++ b/db2/include/db_cxx.h
@@ -4,7 +4,7 @@
* Copyright (c) 1997
* Sleepycat Software. All rights reserved.
*
- * @(#)db_cxx.h 10.12 (Sleepycat) 10/25/97
+ * @(#)db_cxx.h 10.13 (Sleepycat) 11/25/97
*/
#ifndef _DB_CXX_H_
@@ -202,7 +202,7 @@ class _exported DbLockTab
friend DbEnv;
public:
int close();
- int detect(int atype, u_int32_t flags);
+ int detect(int flags, int atype);
int get(u_int32_t locker, int flags, const Dbt *obj,
db_lockmode_t lock_mode, DbLock *lock);
int id(u_int32_t *idp);
diff --git a/db2/include/db_ext.h b/db2/include/db_ext.h
index f9b3b3a214..15eeaf50a3 100644
--- a/db2/include/db_ext.h
+++ b/db2/include/db_ext.h
@@ -65,6 +65,7 @@ int __db_add_recovery __P((DB_ENV *,
int __db_txnlist_init __P((void *));
int __db_txnlist_add __P((void *, u_int32_t));
int __db_txnlist_find __P((void *, u_int32_t));
+void __db_txnlist_end __P((void *));
int __db_dput __P((DB *,
DBT *, PAGE **, db_indx_t *, int (*)(DB *, u_int32_t, PAGE **)));
int __db_drem __P((DB *,
diff --git a/db2/include/db_int.h.src b/db2/include/db_int.h.src
index abd93a6e8e..03a882fded 100644
--- a/db2/include/db_int.h.src
+++ b/db2/include/db_int.h.src
@@ -4,7 +4,7 @@
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*
- * @(#)db_int.h.src 10.36 (Sleepycat) 10/31/97
+ * @(#)db_int.h.src 10.37 (Sleepycat) 11/25/97
*/
#ifndef _DB_INTERNAL_H_
@@ -168,6 +168,7 @@ typedef struct _db_mutex_t {
off_t off; /* Backing file offset. */
u_long pid; /* Lock holder: 0 or process pid. */
#endif
+ u_int32_t spins; /* Spins before block. */
u_int32_t mutex_set_wait; /* Granted after wait. */
u_int32_t mutex_set_nowait; /* Granted without waiting. */
} db_mutex_t;
diff --git a/db2/include/lock.h b/db2/include/lock.h
index 8a927f076e..5031b65d06 100644
--- a/db2/include/lock.h
+++ b/db2/include/lock.h
@@ -4,7 +4,7 @@
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*
- * @(#)lock.h 10.9 (Sleepycat) 10/25/97
+ * @(#)lock.h 10.10 (Sleepycat) 11/13/97
*/
typedef struct __db_lockobj DB_LOCKOBJ;
@@ -85,10 +85,14 @@ struct __db_lockobj {
} dlinks;
#define DB_LOCK_OBJTYPE 1
#define DB_LOCK_LOCKER 2
+ /* Allocate room in the object to
+ * hold typical DB lock structures
+ * so that we do not have to
+ * allocate them from shalloc. */
+ u_int8_t objdata[sizeof(struct __db_ilock)];
u_int8_t type; /* Real object or locker id. */
};
-
#define dd_id wlinks._dd_id
#define waiters wlinks._waiters
#define holders dlinks._holders
diff --git a/db2/include/log.h b/db2/include/log.h
index a192a38136..405daf4148 100644
--- a/db2/include/log.h
+++ b/db2/include/log.h
@@ -4,7 +4,7 @@
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*
- * @(#)log.h 10.15 (Sleepycat) 11/2/97
+ * @(#)log.h 10.16 (Sleepycat) 11/9/97
*/
#ifndef _LOG_H_
@@ -117,20 +117,35 @@ struct __log {
SH_TAILQ_HEAD(__fq) fq; /* List of file names. */
+ /*
+ * The lsn LSN is the file offset that we're about to write and which
+ * we will return to the user.
+ */
DB_LSN lsn; /* LSN at current file offset. */
- DB_LSN c_lsn; /* LSN of the last checkpoint. */
+
+ /*
+ * The s_lsn LSN is the last LSN that we know is on disk, not just
+ * written, by synced.
+ */
DB_LSN s_lsn; /* LSN of the last sync. */
- DB_LSN uw_lsn; /* LSN of 1st rec not fully on disk. */
u_int32_t len; /* Length of the last record. */
- size_t b_off; /* Current offset in the buffer. */
u_int32_t w_off; /* Current write offset in the file. */
+ DB_LSN c_lsn; /* LSN of the last checkpoint. */
time_t chkpt; /* Time of the last checkpoint. */
DB_LOG_STAT stat; /* Log statistics. */
+ /*
+ * The f_lsn LSN is the LSN (returned to the user) that "owns" the
+ * first byte of the buffer. If the record associated with the LSN
+ * spans buffers, it may not reflect the physical file location of
+ * the first byte of the buffer.
+ */
+ DB_LSN f_lsn; /* LSN of first byte in the buffer. */
+ size_t b_off; /* Current offset in the buffer. */
u_int8_t buf[4 * 1024]; /* Log buffer. */
};
diff --git a/db2/include/mp.h b/db2/include/mp.h
index f68f42b144..f108246f2c 100644
--- a/db2/include/mp.h
+++ b/db2/include/mp.h
@@ -4,7 +4,7 @@
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*
- * @(#)mp.h 10.19 (Sleepycat) 10/25/97
+ * @(#)mp.h 10.22 (Sleepycat) 11/28/97
*/
struct __bh; typedef struct __bh BH;
@@ -23,7 +23,6 @@ struct __mpoolfile; typedef struct __mpoolfile MPOOLFILE;
#define DB_CACHESIZE_MIN ( 20 * 1024)
#define INVALID 0 /* Invalid shared memory offset. */
-#define TEMPORARY "<tmp>" /* Temporary file name. */
/*
* There are three ways we do locking in the mpool code:
@@ -122,10 +121,10 @@ struct __db_mpool {
int fd; /* Underlying mmap'd fd. */
-
#define MP_ISPRIVATE 0x01 /* Private, so local memory. */
#define MP_LOCKHANDLE 0x02 /* Threaded, lock handles and region. */
#define MP_LOCKREGION 0x04 /* Concurrent access, lock region. */
+#define MP_MALLOC 0x08 /* If region in allocated memory. */
u_int32_t flags;
};
@@ -157,7 +156,6 @@ struct __db_mpoolfile {
/* These fields are not protected. */
TAILQ_ENTRY(__db_mpoolfile) q; /* Linked list of DB_MPOOLFILE's. */
- char *path; /* Initial file path. */
DB_MPOOL *dbmp; /* Overlying DB_MPOOL. */
MPOOLFILE *mfp; /* Underlying MPOOLFILE. */
@@ -165,11 +163,9 @@ struct __db_mpoolfile {
size_t len; /* Length of mmap'd region. */
/* These fields need to be protected for multi-threaded support. */
-#define MP_PATH_ALLOC 0x001 /* Path is allocated memory. */
-#define MP_PATH_TEMP 0x002 /* Backing file is a temporary. */
-#define MP_READONLY 0x004 /* File is readonly. */
-#define MP_UPGRADE 0x008 /* File descriptor is readwrite. */
-#define MP_UPGRADE_FAIL 0x010 /* Upgrade wasn't possible. */
+#define MP_READONLY 0x01 /* File is readonly. */
+#define MP_UPGRADE 0x02 /* File descriptor is readwrite. */
+#define MP_UPGRADE_FAIL 0x04 /* Upgrade wasn't possible. */
u_int32_t flags;
};
@@ -220,12 +216,9 @@ struct __mpoolfile {
u_int32_t ref; /* Reference count. */
int ftype; /* File type. */
- int can_mmap; /* If the file can be mmap'd. */
-
int lsn_off; /* Page's LSN offset. */
size_t path_off; /* File name location. */
-
size_t fileid_off; /* File identification location. */
size_t pgcookie_len; /* Pgin/pgout cookie length. */
@@ -233,6 +226,12 @@ struct __mpoolfile {
int lsn_cnt; /* Checkpoint buffers left to write. */
+ db_pgno_t last_pgno; /* Last page in the file. */
+
+#define MP_CAN_MMAP 0x01 /* If the file can be mmap'd. */
+#define MP_TEMP 0x02 /* Backing file is a temporary. */
+ u_int32_t flags;
+
DB_MPOOL_FSTAT stat; /* Per-file mpool statistics. */
};
diff --git a/db2/include/mp_ext.h b/db2/include/mp_ext.h
index 49d86ba2e5..b78b3423cd 100644
--- a/db2/include/mp_ext.h
+++ b/db2/include/mp_ext.h
@@ -5,8 +5,10 @@ int __memp_pgread __P((DB_MPOOLFILE *, BH *, int));
int __memp_pgwrite __P((DB_MPOOLFILE *, BH *, int *, int *));
int __memp_pg __P((DB_MPOOLFILE *, BH *, int));
void __memp_bhfree __P((DB_MPOOL *, MPOOLFILE *, BH *, int));
-int __memp_fopen __P((DB_MPOOL *, const char *, int, int,
- int, size_t, int, DBT *, u_int8_t *, int, DB_MPOOLFILE **));
+int __memp_fopen __P((DB_MPOOL *, MPOOLFILE *, const char *, int,
+ int, int, size_t, int, DBT *, u_int8_t *, int, DB_MPOOLFILE **));
+char * __memp_fn __P((DB_MPOOLFILE *));
+char * __memp_fns __P((DB_MPOOL *, MPOOLFILE *));
void __memp_debug __P((DB_MPOOL *, FILE *, int));
int __memp_ralloc __P((DB_MPOOL *, size_t, size_t *, void *));
int __memp_ropen
diff --git a/db2/include/os_ext.h b/db2/include/os_ext.h
index e48a1e9407..2edf2e257d 100644
--- a/db2/include/os_ext.h
+++ b/db2/include/os_ext.h
@@ -1,5 +1,8 @@
/* DO NOT EDIT: automatically built by dist/distrib. */
int __db_abspath __P((const char *));
+void *__db_calloc __P((size_t, size_t));
+void *__db_malloc __P((size_t));
+void *__db_realloc __P((void *, size_t));
int __os_dirlist __P((const char *, char ***, int *));
void __os_dirfree __P((char **, int));
int __db_fileid __P((DB_ENV *, const char *, int, u_int8_t *));
@@ -14,6 +17,7 @@ int __db_read __P((int, void *, size_t, ssize_t *));
int __db_write __P((int, void *, size_t, ssize_t *));
int __os_seek __P((int, size_t, db_pgno_t, u_long, int));
int __os_sleep __P((u_long, u_long));
+int __os_spin __P((void));
int __os_exists __P((const char *, int *));
int __os_ioinfo __P((const char *, int, off_t *, off_t *));
int __db_unlink __P((const char *));
diff --git a/db2/include/os_func.h b/db2/include/os_func.h
index 0a72942903..54b64ffaa2 100644
--- a/db2/include/os_func.h
+++ b/db2/include/os_func.h
@@ -4,12 +4,11 @@
* Copyright (c) 1997
* Sleepycat Software. All rights reserved.
*
- * @(#)os_func.h 10.2 (Sleepycat) 10/28/97
+ * @(#)os_func.h 10.4 (Sleepycat) 11/28/97
*/
/* Calls which can be replaced by the application. */
struct __db_jumptab {
- void *(*db_calloc) __P((size_t, size_t)); /* DB_FUNC_CALLOC */
int (*db_close) __P((int)); /* DB_FUNC_CLOSE */
void (*db_dirfree) __P((char **, int)); /* DB_FUNC_DIRFREE */
int (*db_dirlist) /* DB_FUNC_DIRLIST */
@@ -54,7 +53,6 @@ extern struct __db_jumptab __db_jump;
* part of DB is the only code that should use the __os_XXX names, all other
* parts of DB should be calling __db_XXX functions.
*/
-#define __db_calloc __db_jump.db_calloc
#define __os_close __db_jump.db_close /* __db_close is a wrapper. */
#define __db_dirfree __db_jump.db_dirfree
#define __db_dirlist __db_jump.db_dirlist
@@ -62,11 +60,9 @@ extern struct __db_jumptab __db_jump;
#define __db_free __db_jump.db_free
#define __os_fsync __db_jump.db_fsync /* __db_fsync is a wrapper. */
#define __db_ioinfo __db_jump.db_ioinfo
-#define __db_malloc __db_jump.db_malloc
#define __db_map __db_jump.db_map
#define __os_open __db_jump.db_open /* __db_open is a wrapper. */
#define __os_read __db_jump.db_read /* __db_read is a wrapper. */
-#define __db_realloc __db_jump.db_realloc
#define __db_seek __db_jump.db_seek
#define __db_sleep __db_jump.db_sleep
#define __db_strdup __db_jump.db_strdup
diff --git a/db2/lock/lock.c b/db2/lock/lock.c
index f1223a9fa6..9b1cbc8a08 100644
--- a/db2/lock/lock.c
+++ b/db2/lock/lock.c
@@ -8,7 +8,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)lock.c 10.38 (Sleepycat) 10/25/97";
+static const char sccsid[] = "@(#)lock.c 10.41 (Sleepycat) 11/28/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -91,7 +91,7 @@ __lock_create(path, mode, dbenv)
if ((ret =
__db_rcreate(dbenv, DB_APP_NONE, path, DB_DEFAULT_LOCK_FILE, mode,
LOCK_REGION_SIZE(lock_modes, maxlocks, __db_tablesize(maxlocks)),
- &fd, &lrp)) != 0)
+ 0, &fd, &lrp)) != 0)
return (ret);
/* Region exists; now initialize it. */
@@ -600,7 +600,9 @@ __lock_put_internal(lt, lockp, do_all)
if (SH_TAILQ_FIRST(&sh_obj->holders, __db_lock) == NULL) {
HASHREMOVE_EL(lt->hashtab, __db_lockobj,
links, sh_obj, lt->region->table_size, __lock_lhash);
- __db_shalloc_free(lt->mem, SH_DBT_PTR(&sh_obj->lockobj));
+ if (sh_obj->lockobj.size > sizeof(sh_obj->objdata))
+ __db_shalloc_free(lt->mem,
+ SH_DBT_PTR(&sh_obj->lockobj));
SH_TAILQ_INSERT_HEAD(&lt->region->free_objs, sh_obj, links,
__db_lockobj);
state_changed = 1;
@@ -633,7 +635,7 @@ __lock_get_internal(lt, locker, flags, obj, lock_mode, lockp)
DB_LOCKOBJ *sh_obj, *sh_locker;
DB_LOCKREGION *lrp;
size_t newl_off;
- int ret;
+ int ihold, ret;
ret = 0;
/*
@@ -680,29 +682,40 @@ __lock_get_internal(lt, locker, flags, obj, lock_mode, lockp)
* new lock if it does not conflict with anyone on the holders list
* OR anyone on the waiters list. The reason that we don't grant if
* there's a conflict is that this can lead to starvation (a writer
- * waiting on a popularly read item will never ben granted). The
+ * waiting on a popularly read item will never be granted). The
* downside of this is that a waiting reader can prevent an upgrade
- * from reader to writer, which is not uncommon. In case of conflict,
- * we put the new lock on the end of the waiters list.
+ * from reader to writer, which is not uncommon.
+ *
+ * There is one exception to the no-conflict rule. If a lock is held
+ * by the requesting locker AND the new lock does not conflict with
+ * any other holders, then we grant the lock. The most common place
+ * this happens is when the holder has a WRITE lock and a READ lock
+ * request comes in for the same locker. If we do not grant the read
+ * lock, then we guarantee deadlock.
+ *
+ * In case of conflict, we put the new lock on the end of the waiters
+ * list.
*/
+ ihold = 0;
for (lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock);
lp != NULL;
lp = SH_TAILQ_NEXT(lp, links, __db_lock)) {
- if (CONFLICTS(lt, lp->mode, lock_mode) &&
- locker != lp->holder)
+ if (locker == lp->holder) {
+ if (lp->mode == lock_mode &&
+ lp->status == DB_LSTAT_HELD) {
+ /* Lock is held, just inc the ref count. */
+ lp->refcount++;
+ SH_TAILQ_INSERT_HEAD(&lrp->free_locks,
+ newl, links, __db_lock);
+ *lockp = lp;
+ return (0);
+ } else
+ ihold = 1;
+ } else if (CONFLICTS(lt, lp->mode, lock_mode))
break;
- else if (lp->holder == locker && lp->mode == lock_mode &&
- lp->status == DB_LSTAT_HELD) {
- /* Lock is already held, just inc the ref count. */
- lp->refcount++;
- SH_TAILQ_INSERT_HEAD(&lrp->free_locks, newl, links,
- __db_lock);
- *lockp = lp;
- return (0);
- }
}
- if (lp == NULL)
+ if (lp == NULL && !ihold)
for (lp = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock);
lp != NULL;
lp = SH_TAILQ_NEXT(lp, links, __db_lock)) {
@@ -1261,25 +1274,37 @@ __lock_getobj(lt, locker, dbt, type, objp)
*/
if (sh_obj == NULL) {
/* Create new object and then insert it into hash table. */
- if ((sh_obj = SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj))
- == NULL) {
+ if ((sh_obj =
+ SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj)) == NULL) {
if ((ret = __lock_grow_region(lt, DB_LOCK_OBJ, 0)) != 0)
return (ret);
lrp = lt->region;
sh_obj = SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj);
}
- if ((ret = __db_shalloc(lt->mem, obj_size, 0, &p)) != 0) {
- if ((ret = __lock_grow_region(lt,
- DB_LOCK_MEM, obj_size)) != 0)
- return (ret);
- lrp = lt->region;
- /* Reacquire the head of the list. */
- sh_obj = SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj);
- (void)__db_shalloc(lt->mem, obj_size, 0, &p);
- }
- sh_obj->type = type;
+
+ /*
+ * If we can fit this object in the structure, do so instead
+ * of shalloc-ing space for it.
+ */
+ if (obj_size <= sizeof(sh_obj->objdata))
+ p = sh_obj->objdata;
+ else
+ if ((ret =
+ __db_shalloc(lt->mem, obj_size, 0, &p)) != 0) {
+ if ((ret = __lock_grow_region(lt,
+ DB_LOCK_MEM, obj_size)) != 0)
+ return (ret);
+ lrp = lt->region;
+ /* Reacquire the head of the list. */
+ sh_obj = SH_TAILQ_FIRST(&lrp->free_objs,
+ __db_lockobj);
+ (void)__db_shalloc(lt->mem, obj_size, 0, &p);
+ }
+
src = type == DB_LOCK_OBJTYPE ? dbt->data : (void *)&locker;
memcpy(p, src, obj_size);
+
+ sh_obj->type = type;
SH_TAILQ_REMOVE(&lrp->free_objs, sh_obj, links, __db_lockobj);
SH_TAILQ_INIT(&sh_obj->waiters);
@@ -1329,7 +1354,8 @@ __lock_freeobj(lt, obj)
{
HASHREMOVE_EL(lt->hashtab,
__db_lockobj, links, obj, lt->region->table_size, __lock_lhash);
- __db_shalloc_free(lt->mem, SH_DBT_PTR(&obj->lockobj));
+ if (obj->lockobj.size > sizeof(obj->objdata))
+ __db_shalloc_free(lt->mem, SH_DBT_PTR(&obj->lockobj));
SH_TAILQ_INSERT_HEAD(&lt->region->free_objs, obj, links, __db_lockobj);
}
diff --git a/db2/lock/lock_deadlock.c b/db2/lock/lock_deadlock.c
index 566021fe89..93c438ca36 100644
--- a/db2/lock/lock_deadlock.c
+++ b/db2/lock/lock_deadlock.c
@@ -11,7 +11,7 @@
static const char copyright[] =
"@(#) Copyright (c) 1997\n\
Sleepycat Software Inc. All rights reserved.\n";
-static const char sccsid[] = "@(#)lock_deadlock.c 10.25 (Sleepycat) 11/1/97";
+static const char sccsid[] = "@(#)lock_deadlock.c 10.26 (Sleepycat) 11/25/97";
#endif
#ifndef NO_SYSTEM_INCLUDES
@@ -66,8 +66,7 @@ static void __dd_debug __P((DB_ENV *, locker_info *, u_int32_t *, u_int32_t));
int
lock_detect(lt, flags, atype)
DB_LOCKTAB *lt;
- int flags;
- u_int32_t atype;
+ int flags, atype;
{
DB_ENV *dbenv;
locker_info *idmap;
diff --git a/db2/log/log.c b/db2/log/log.c
index 17681f8e0f..a9bf7a95ab 100644
--- a/db2/log/log.c
+++ b/db2/log/log.c
@@ -7,7 +7,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)log.c 10.33 (Sleepycat) 11/2/97";
+static const char sccsid[] = "@(#)log.c 10.34 (Sleepycat) 11/28/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -91,7 +91,7 @@ log_open(path, flags, mode, dbenv, lpp)
retry_cnt = newregion = 0;
retry: if (LF_ISSET(DB_CREATE)) {
ret = __db_rcreate(dbenv, DB_APP_LOG, path,
- DB_DEFAULT_LOG_FILE, mode, len, &fd, &dblp->maddr);
+ DB_DEFAULT_LOG_FILE, mode, len, 0, &fd, &dblp->maddr);
if (ret == 0) {
/* Put the LOG structure first in the region. */
lp = dblp->maddr;
diff --git a/db2/log/log_archive.c b/db2/log/log_archive.c
index 140ea31fd1..0248e2815c 100644
--- a/db2/log/log_archive.c
+++ b/db2/log/log_archive.c
@@ -8,7 +8,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)log_archive.c 10.28 (Sleepycat) 10/28/97";
+static const char sccsid[] = "@(#)log_archive.c 10.29 (Sleepycat) 11/12/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -360,9 +360,9 @@ __absname(pref, name, newnamep)
* If the user has their own malloc routine, use it.
*/
static int
-__usermem(listp, func)
+__usermem(listp, cmpfunc)
char ***listp;
- void *(*func) __P((size_t));
+ void *(*cmpfunc) __P((size_t));
{
size_t len;
char **array, **arrayp, **orig, *strp;
@@ -378,10 +378,10 @@ __usermem(listp, func)
* XXX
* Don't simplify this expression, SunOS compilers don't like it.
*/
- if (func == NULL)
+ if (cmpfunc == NULL)
array = (char **)__db_malloc(len);
else
- array = (char **)func(len);
+ array = (char **)cmpfunc(len);
if (array == NULL)
return (ENOMEM);
strp = (char *)(array + (orig - *listp) + 1);
diff --git a/db2/log/log_auto.c b/db2/log/log_auto.c
index d5dbfe1f5f..61626b090e 100644
--- a/db2/log/log_auto.c
+++ b/db2/log/log_auto.c
@@ -102,7 +102,6 @@ int __log_register_log(logp, txnid, ret_lsnp, flags,
* PUBLIC: int __log_register_print
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
-
int
__log_register_print(notused1, dbtp, lsnp, notused3, notused4)
DB_LOG *notused1;
@@ -250,7 +249,6 @@ int __log_unregister_log(logp, txnid, ret_lsnp, flags,
* PUBLIC: int __log_unregister_print
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
-
int
__log_unregister_print(notused1, dbtp, lsnp, notused3, notused4)
DB_LOG *notused1;
diff --git a/db2/log/log_get.c b/db2/log/log_get.c
index ed35d57f82..2d1512c6b9 100644
--- a/db2/log/log_get.c
+++ b/db2/log/log_get.c
@@ -7,7 +7,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)log_get.c 10.21 (Sleepycat) 10/25/97";
+static const char sccsid[] = "@(#)log_get.c 10.22 (Sleepycat) 11/22/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -44,24 +44,21 @@ log_get(dblp, alsn, dbt, flags)
/* Validate arguments. */
#define OKFLAGS (DB_CHECKPOINT | \
DB_CURRENT | DB_FIRST | DB_LAST | DB_NEXT | DB_PREV | DB_SET)
- if (flags != 0) {
- if ((ret =
- __db_fchk(dblp->dbenv, "log_get", flags, OKFLAGS)) != 0)
- return (ret);
- switch (flags) {
- case DB_CHECKPOINT:
- case DB_CURRENT:
- case DB_FIRST:
- case DB_LAST:
- case DB_NEXT:
- case DB_PREV:
- case DB_SET:
- case 0:
- break;
- default:
- return (__db_ferr(dblp->dbenv, "log_get", 1));
- }
+ if ((ret = __db_fchk(dblp->dbenv, "log_get", flags, OKFLAGS)) != 0)
+ return (ret);
+ switch (flags) {
+ case DB_CHECKPOINT:
+ case DB_CURRENT:
+ case DB_FIRST:
+ case DB_LAST:
+ case DB_NEXT:
+ case DB_PREV:
+ case DB_SET:
+ break;
+ default:
+ return (__db_ferr(dblp->dbenv, "log_get", 1));
}
+
if (F_ISSET(dblp, DB_AM_THREAD)) {
if (LF_ISSET(DB_NEXT | DB_PREV | DB_CURRENT))
return (__db_ferr(dblp->dbenv, "log_get", 1));
diff --git a/db2/log/log_put.c b/db2/log/log_put.c
index 92d9563301..42fec88a7d 100644
--- a/db2/log/log_put.c
+++ b/db2/log/log_put.c
@@ -7,7 +7,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)log_put.c 10.20 (Sleepycat) 11/2/97";
+static const char sccsid[] = "@(#)log_put.c 10.22 (Sleepycat) 11/12/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -28,10 +28,10 @@ static const char sccsid[] = "@(#)log_put.c 10.20 (Sleepycat) 11/2/97";
#include "hash.h"
#include "common_ext.h"
-static int __log_fill __P((DB_LOG *, void *, u_int32_t));
+static int __log_fill __P((DB_LOG *, DB_LSN *, void *, u_int32_t));
static int __log_flush __P((DB_LOG *, const DB_LSN *));
static int __log_newfd __P((DB_LOG *));
-static int __log_putr __P((DB_LOG *, const DBT *, u_int32_t));
+static int __log_putr __P((DB_LOG *, DB_LSN *, const DBT *, u_int32_t));
static int __log_write __P((DB_LOG *, void *, u_int32_t));
/*
@@ -117,12 +117,13 @@ __log_put(dblp, lsn, dbt, flags)
/* Reset the file write offset. */
lp->w_off = 0;
-
- /* Reset the first-unwritten LSN for the buffer. */
- lp->uw_lsn = lp->lsn;
} else
lastoff = 0;
+ /* Initialize the LSN information returned to the user. */
+ lsn->file = lp->lsn.file;
+ lsn->offset = lp->lsn.offset;
+
/*
* Insert persistent information as the first record in every file.
* Note that the previous length is wrong for the very first record
@@ -131,17 +132,17 @@ __log_put(dblp, lsn, dbt, flags)
if (lp->lsn.offset == 0) {
t.data = &lp->persist;
t.size = sizeof(LOGP);
- if ((ret = __log_putr(dblp,
+ if ((ret = __log_putr(dblp, lsn,
&t, lastoff == 0 ? 0 : lastoff - lp->len)) != 0)
return (ret);
- }
- /* Initialize the LSN information returned to the user. */
- lsn->file = lp->lsn.file;
- lsn->offset = lp->lsn.offset;
+ /* Update the LSN information returned to the user. */
+ lsn->file = lp->lsn.file;
+ lsn->offset = lp->lsn.offset;
+ }
- /* Put out the user's record. */
- if ((ret = __log_putr(dblp, dbt, lp->lsn.offset - lp->len)) != 0)
+ /* Write the application's log record. */
+ if ((ret = __log_putr(dblp, lsn, dbt, lp->lsn.offset - lp->len)) != 0)
return (ret);
/*
@@ -184,19 +185,6 @@ __log_put(dblp, lsn, dbt, flags)
(void)time(&lp->chkpt);
lp->stat.st_wc_bytes = lp->stat.st_wc_mbytes = 0;
}
-
- /*
- * When an application calls the log_flush routine, we need to figure
- * out if the current buffer needs to be flushed. The problem is that
- * if a record spans buffers, it's possible for the record continued
- * in the current buffer to have begun in a previous buffer. Each time
- * we write a buffer, we update the first-unwritten LSN to point to the
- * first LSN after that written buffer. If we have a spanning record,
- * correct that value to be the LSN that started it all, here.
- */
- if (lsn->offset < lp->w_off && lsn->offset + lp->len > lp->w_off)
- lp->uw_lsn = *lsn;
-
return (0);
}
@@ -205,8 +193,9 @@ __log_put(dblp, lsn, dbt, flags)
* Actually put a record into the log.
*/
static int
-__log_putr(dblp, dbt, prev)
+__log_putr(dblp, lsn, dbt, prev)
DB_LOG *dblp;
+ DB_LSN *lsn;
const DBT *dbt;
u_int32_t prev;
{
@@ -225,15 +214,15 @@ __log_putr(dblp, dbt, prev)
hdr.len = sizeof(HDR) + dbt->size;
hdr.cksum = __ham_func4(dbt->data, dbt->size);
- if ((ret = __log_fill(dblp, &hdr, sizeof(HDR))) != 0)
+ if ((ret = __log_fill(dblp, lsn, &hdr, sizeof(HDR))) != 0)
return (ret);
+ lp->len = sizeof(HDR);
lp->lsn.offset += sizeof(HDR);
- if ((ret = __log_fill(dblp, dbt->data, dbt->size)) != 0)
+ if ((ret = __log_fill(dblp, lsn, dbt->data, dbt->size)) != 0)
return (ret);
+ lp->len += dbt->size;
lp->lsn.offset += dbt->size;
-
- lp->len = sizeof(HDR) + dbt->size;
return (0);
}
@@ -266,7 +255,7 @@ __log_flush(dblp, lsn)
{
DB_LSN t_lsn;
LOG *lp;
- int ret;
+ int current, ret;
ret = 0;
lp = dblp->lp;
@@ -292,23 +281,27 @@ __log_flush(dblp, lsn)
/*
* If the LSN is less than the last-sync'd LSN, we're done. Note,
* the last-sync LSN saved in s_lsn is the LSN of the first byte
- * that has not yet been written to disk, so the test is <, not <=.
+ * we absolutely know has been written to disk, so the test is <=.
*/
if (lsn->file < lp->s_lsn.file ||
- (lsn->file == lp->s_lsn.file && lsn->offset < lp->s_lsn.offset))
+ (lsn->file == lp->s_lsn.file && lsn->offset <= lp->s_lsn.offset))
return (0);
/*
* We may need to write the current buffer. We have to write the
* current buffer if the flush LSN is greater than or equal to the
- * first-unwritten LSN (uw_lsn). If we write the buffer, then we
- * update the first-unwritten LSN.
+ * buffer's starting LSN.
*/
+ current = 0;
if (lp->b_off != 0 &&
- lsn->file >= lp->uw_lsn.file && lsn->offset >= lp->uw_lsn.offset)
+ lsn->file >= lp->f_lsn.file && lsn->offset >= lp->f_lsn.offset) {
if ((ret = __log_write(dblp, lp->buf, lp->b_off)) != 0)
return (ret);
+ lp->b_off = 0;
+ current = 1;
+ }
+
/*
* It's possible that this thread may never have written to this log
* file. Acquire a file descriptor if we don't already have one.
@@ -323,10 +316,14 @@ __log_flush(dblp, lsn)
++lp->stat.st_scount;
/*
- * Set the last-synced LSN, the first LSN after the last record
- * that we know is on disk.
+ * Set the last-synced LSN, using the LSN of the current buffer. If
+ * the current buffer was flushed, we know the LSN of the first byte
+ * of the buffer is on disk, otherwise, we only know that the LSN of
+ * the record before the one beginning the current buffer is on disk.
*/
- lp->s_lsn = lp->uw_lsn;
+ lp->s_lsn = lp->f_lsn;
+ if (!current)
+ --lp->s_lsn.offset;
return (0);
}
@@ -336,8 +333,9 @@ __log_flush(dblp, lsn)
* Write information into the log.
*/
static int
-__log_fill(dblp, addr, len)
+__log_fill(dblp, lsn, addr, len)
DB_LOG *dblp;
+ DB_LSN *lsn;
void *addr;
u_int32_t len;
{
@@ -349,6 +347,15 @@ __log_fill(dblp, addr, len)
/* Copy out the data. */
for (lp = dblp->lp; len > 0;) {
/*
+ * If we're beginning a new buffer, note the user LSN to which
+ * the first byte of the buffer belongs. We have to know this
+ * when flushing the buffer so that we know if the in-memory
+ * buffer needs to be flushed.
+ */
+ if (lp->b_off == 0)
+ lp->f_lsn = *lsn;
+
+ /*
* If we're on a buffer boundary and the data is big enough,
* copy as many records as we can directly from the data.
*/
@@ -371,9 +378,12 @@ __log_fill(dblp, addr, len)
lp->b_off += nw;
/* If we fill the buffer, flush it. */
- if (lp->b_off == sizeof(lp->buf) &&
- (ret = __log_write(dblp, lp->buf, sizeof(lp->buf))) != 0)
- return (ret);
+ if (lp->b_off == sizeof(lp->buf)) {
+ if ((ret =
+ __log_write(dblp, lp->buf, sizeof(lp->buf))) != 0)
+ return (ret);
+ lp->b_off = 0;
+ }
}
return (0);
}
@@ -412,14 +422,8 @@ __log_write(dblp, addr, len)
if (nw != (int32_t)len)
return (EIO);
- /*
- * Reset the buffer offset, update the seek offset, and update the
- * first-unwritten LSN.
- */
- lp->b_off = 0;
+ /* Reset the buffer offset and update the seek offset. */
lp->w_off += len;
- lp->uw_lsn.file = lp->lsn.file;
- lp->uw_lsn.offset = lp->w_off;
/* Update written statistics. */
if ((lp->stat.st_w_bytes += len) >= MEGABYTE) {
diff --git a/db2/mp/mp_bh.c b/db2/mp/mp_bh.c
index a707603eec..578abedcb6 100644
--- a/db2/mp/mp_bh.c
+++ b/db2/mp/mp_bh.c
@@ -7,7 +7,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)mp_bh.c 10.21 (Sleepycat) 10/25/97";
+static const char sccsid[] = "@(#)mp_bh.c 10.23 (Sleepycat) 11/26/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -40,7 +40,6 @@ __memp_bhwrite(dbmp, mfp, bhp, restartp, wrotep)
BH *bhp;
int *restartp, *wrotep;
{
- DBT dbt;
DB_MPOOLFILE *dbmfp;
DB_MPREG *mpreg;
@@ -53,7 +52,7 @@ __memp_bhwrite(dbmp, mfp, bhp, restartp, wrotep)
* Walk the process' DB_MPOOLFILE list and find a file descriptor for
* the file. We also check that the descriptor is open for writing.
* If we find a descriptor on the file that's not open for writing, we
- * try and upgrade it to make it writeable.
+ * try and upgrade it to make it writeable. If that fails, we're done.
*/
LOCKHANDLE(dbmp, dbmp->mutexp);
for (dbmfp = TAILQ_FIRST(&dbmp->dbmfq);
@@ -86,18 +85,34 @@ __memp_bhwrite(dbmp, mfp, bhp, restartp, wrotep)
}
/*
- * Try and open the file; ignore any error, assume it's a permissions
- * problem.
+ * Try and open the file, attaching to the underlying shared area.
*
* XXX
- * There's no negative cache here, so we may repeatedly try and open
- * files that we have previously tried (and failed) to open.
+ * Don't try to attach to temporary files. There are two problems in
+ * trying to do that. First, if we have different privileges than the
+ * process that "owns" the temporary file, we might create the backing
+ * disk file such that the owning process couldn't read/write its own
+ * buffers, e.g., memp_trickle() running as root creating a file owned
+ * as root, mode 600. Second, if the temporary file has already been
+ * created, we don't have any way of finding out what its real name is,
+ * and, even if we did, it was already unlinked (so that it won't be
+ * left if the process dies horribly). This decision causes a problem,
+ * however: if the temporary file consumes the entire buffer cache,
+ * and the owner doesn't flush the buffers to disk, we could end up
+ * with resource starvation, and the memp_trickle() thread couldn't do
+ * anything about it. That's a pretty unlikely scenario, though.
+ *
+ * XXX
+ * There's no negative cache, so we may repeatedly try and open files
+ * that we have previously tried (and failed) to open.
+ *
+ * Ignore any error, assume it's a permissions problem.
*/
- dbt.size = mfp->pgcookie_len;
- dbt.data = R_ADDR(dbmp, mfp->pgcookie_off);
- if (__memp_fopen(dbmp, R_ADDR(dbmp, mfp->path_off),
- mfp->ftype, 0, 0, mfp->stat.st_pagesize,
- mfp->lsn_off, &dbt, R_ADDR(dbmp, mfp->fileid_off), 0, &dbmfp) != 0)
+ if (F_ISSET(mfp, MP_TEMP))
+ return (0);
+
+ if (__memp_fopen(dbmp, mfp, R_ADDR(dbmp, mfp->path_off), mfp->ftype,
+ 0, 0, mfp->stat.st_pagesize, 0, NULL, NULL, 0, &dbmfp) != 0)
return (0);
found: return (__memp_pgwrite(dbmfp, bhp, restartp, wrotep));
@@ -144,7 +159,7 @@ __memp_pgread(dbmfp, bhp, can_create)
UNLOCKHANDLE(dbmp, dbmfp->mutexp);
__db_err(dbmp->dbenv,
"%s: page %lu doesn't exist, create flag not set",
- dbmfp->path, (u_long)bhp->pgno);
+ __memp_fn(dbmfp), (u_long)bhp->pgno);
goto err;
}
UNLOCKHANDLE(dbmp, dbmfp->mutexp);
@@ -270,12 +285,14 @@ __memp_pgwrite(dbmfp, bhp, restartp, wrotep)
/* Temporary files may not yet have been created. */
LOCKHANDLE(dbmp, dbmfp->mutexp);
- if (dbmfp->fd == -1 && ((ret = __db_appname(dbenv, DB_APP_TMP,
- NULL, NULL, &dbmfp->fd, NULL)) != 0 || dbmfp->fd == -1)) {
- UNLOCKHANDLE(dbmp, dbmfp->mutexp);
- __db_err(dbenv, "unable to create temporary backing file");
- goto err;
- }
+ if (dbmfp->fd == -1)
+ if ((ret = __db_appname(dbenv, DB_APP_TMP,
+ NULL, NULL, &dbmfp->fd, NULL)) != 0 || dbmfp->fd == -1) {
+ UNLOCKHANDLE(dbmp, dbmfp->mutexp);
+ __db_err(dbenv,
+ "unable to create temporary backing file");
+ goto err;
+ }
/* Write the page out. */
if ((ret = __db_seek(dbmfp->fd, pagesize, bhp->pgno, 0, SEEK_SET)) != 0)
@@ -350,8 +367,8 @@ __memp_pgwrite(dbmfp, bhp, restartp, wrotep)
return (0);
-syserr: __db_err(dbenv,
- "%s: %s failed for page %lu", dbmfp->path, fail, (u_long)bhp->pgno);
+syserr: __db_err(dbenv, "%s: %s failed for page %lu",
+ __memp_fn(dbmfp), fail, (u_long)bhp->pgno);
err: UNLOCKBUFFER(dbmp, bhp);
LOCKREGION(dbmp);
@@ -416,7 +433,7 @@ __memp_pg(dbmfp, bhp, is_pgin)
err: UNLOCKHANDLE(dbmp, dbmp->mutexp);
__db_err(dbmp->dbenv, "%s: %s failed for page %lu",
- dbmfp->path, is_pgin ? "pgin" : "pgout", (u_long)bhp->pgno);
+ __memp_fn(dbmfp), is_pgin ? "pgin" : "pgout", (u_long)bhp->pgno);
return (ret);
}
@@ -462,7 +479,8 @@ __memp_upgrade(dbmp, dbmfp, mfp)
DB_MPOOLFILE *dbmfp;
MPOOLFILE *mfp;
{
- int fd;
+ int fd, ret;
+ char *rpath;
/*
* !!!
@@ -477,16 +495,24 @@ __memp_upgrade(dbmp, dbmfp, mfp)
if (F_ISSET(dbmfp, MP_UPGRADE_FAIL))
return (1);
- /* Try the open. */
- if (__db_open(R_ADDR(dbmp, mfp->path_off), 0, 0, 0, &fd) != 0) {
+ /*
+ * Calculate the real name for this file and try to open it read/write.
+ * We know we have a valid pathname for the file because it's the only
+ * way we could have gotten a file descriptor of any kind.
+ */
+ if ((ret = __db_appname(dbmp->dbenv, DB_APP_DATA,
+ NULL, R_ADDR(dbmp, mfp->path_off), NULL, &rpath)) != 0)
+ return (ret);
+ if (__db_open(rpath, 0, 0, 0, &fd) != 0) {
F_SET(dbmfp, MP_UPGRADE_FAIL);
- return (1);
+ ret = 1;
+ } else {
+ /* Swap the descriptors and set the upgrade flag. */
+ (void)__db_close(dbmfp->fd);
+ dbmfp->fd = fd;
+ F_SET(dbmfp, MP_UPGRADE);
+ ret = 0;
}
-
- /* Swap the descriptors and set the upgrade flag. */
- (void)__db_close(dbmfp->fd);
- dbmfp->fd = fd;
- F_SET(dbmfp, MP_UPGRADE);
-
- return (0);
+ FREES(rpath);
+ return (ret);
}
diff --git a/db2/mp/mp_fget.c b/db2/mp/mp_fget.c
index 3f99e60505..1010751c92 100644
--- a/db2/mp/mp_fget.c
+++ b/db2/mp/mp_fget.c
@@ -7,7 +7,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)mp_fget.c 10.30 (Sleepycat) 10/25/97";
+static const char sccsid[] = "@(#)mp_fget.c 10.32 (Sleepycat) 11/26/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -38,13 +38,11 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp)
int flags;
void *addrp;
{
- BH *bhp, *tbhp;
+ BH *bhp;
DB_MPOOL *dbmp;
MPOOL *mp;
MPOOLFILE *mfp;
- db_pgno_t lastpgno;
size_t bucket, mf_offset;
- off_t size;
u_long cnt;
int b_incr, b_inserted, readonly_alloc, ret;
void *addr;
@@ -97,7 +95,7 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp)
mf_offset = R_OFFSET(dbmp, mfp);
addr = NULL;
bhp = NULL;
- b_incr = b_inserted = readonly_alloc = ret = 0;
+ b_incr = b_inserted = ret = 0;
LOCKREGION(dbmp);
@@ -114,11 +112,10 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp)
* It would be possible to do so by reference counting the open
* pages from the mmap, but it's unclear to me that it's worth it.
*/
- if (dbmfp->addr != NULL && dbmfp->mfp->can_mmap) {
- lastpgno = dbmfp->len == 0 ?
- 0 : (dbmfp->len - 1) / mfp->stat.st_pagesize;
+ if (dbmfp->addr != NULL && F_ISSET(dbmfp->mfp, MP_CAN_MMAP)) {
+ readonly_alloc = 0;
if (LF_ISSET(DB_MPOOL_LAST))
- *pgnoaddr = lastpgno;
+ *pgnoaddr = mfp->last_pgno;
else {
/*
* !!!
@@ -128,10 +125,10 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp)
*/
if (LF_ISSET(DB_MPOOL_CREATE | DB_MPOOL_NEW))
readonly_alloc = 1;
- else if (*pgnoaddr > lastpgno) {
+ else if (*pgnoaddr > mfp->last_pgno) {
__db_err(dbmp->dbenv,
"%s: page %lu doesn't exist",
- dbmfp->path, (u_long)*pgnoaddr);
+ __memp_fn(dbmfp), (u_long)*pgnoaddr);
ret = EINVAL;
goto err;
}
@@ -146,79 +143,38 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp)
}
}
- /*
- * If requesting the last page or a new page, find the last page. The
- * tricky thing is that the user may have created a page already that's
- * after any page that exists in the file.
- */
- if (LF_ISSET(DB_MPOOL_LAST | DB_MPOOL_NEW)) {
- /*
- * Temporary files may not yet have been created.
- *
- * Don't lock -- there are no atomicity issues for stat(2).
- */
- if (dbmfp->fd == -1)
- size = 0;
- else if ((ret =
- __db_ioinfo(dbmfp->path, dbmfp->fd, &size, NULL)) != 0) {
- __db_err(dbmp->dbenv,
- "%s: %s", dbmfp->path, strerror(ret));
- goto err;
- }
-
- *pgnoaddr = size == 0 ? 0 : (size - 1) / mfp->stat.st_pagesize;
+ /* Check if requesting the last page or a new page. */
+ if (LF_ISSET(DB_MPOOL_LAST))
+ *pgnoaddr = mfp->last_pgno;
- /*
- * Walk the list of BH's, looking for later pages. Save the
- * pointer if a later page is found so that we don't have to
- * search the list twice.
- *
- * If requesting a new page, return the page one after the last
- * page -- which we'll have to create.
- */
- for (tbhp = SH_TAILQ_FIRST(&mp->bhq, __bh);
- tbhp != NULL; tbhp = SH_TAILQ_NEXT(tbhp, q, __bh))
- if (tbhp->pgno >= *pgnoaddr &&
- tbhp->mf_offset == mf_offset) {
- bhp = tbhp;
- *pgnoaddr = bhp->pgno;
- }
- if (LF_ISSET(DB_MPOOL_NEW))
- ++*pgnoaddr;
- }
-
- /* If we already found the right buffer, return it. */
- if (LF_ISSET(DB_MPOOL_LAST) && bhp != NULL) {
- addr = bhp->buf;
- goto found;
+ if (LF_ISSET(DB_MPOOL_NEW)) {
+ *pgnoaddr = mfp->last_pgno + 1;
+ goto alloc;
}
- /* If we haven't checked the BH hash bucket queue, do the search. */
- if (!LF_ISSET(DB_MPOOL_LAST | DB_MPOOL_NEW)) {
- bucket = BUCKET(mp, mf_offset, *pgnoaddr);
- for (cnt = 0,
- bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh);
- bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) {
- ++cnt;
- if (bhp->pgno == *pgnoaddr &&
- bhp->mf_offset == mf_offset) {
- addr = bhp->buf;
- ++mp->stat.st_hash_searches;
- if (cnt > mp->stat.st_hash_longest)
- mp->stat.st_hash_longest = cnt;
- mp->stat.st_hash_examined += cnt;
- goto found;
- }
- }
- if (cnt != 0) {
+ /* Check the BH hash bucket queue. */
+ bucket = BUCKET(mp, mf_offset, *pgnoaddr);
+ for (cnt = 0,
+ bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh);
+ bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) {
+ ++cnt;
+ if (bhp->pgno == *pgnoaddr && bhp->mf_offset == mf_offset) {
+ addr = bhp->buf;
++mp->stat.st_hash_searches;
if (cnt > mp->stat.st_hash_longest)
mp->stat.st_hash_longest = cnt;
mp->stat.st_hash_examined += cnt;
+ goto found;
}
}
+ if (cnt != 0) {
+ ++mp->stat.st_hash_searches;
+ if (cnt > mp->stat.st_hash_longest)
+ mp->stat.st_hash_longest = cnt;
+ mp->stat.st_hash_examined += cnt;
+ }
- /*
+alloc: /*
* Allocate a new buffer header and data space, and mark the contents
* as useless.
*/
@@ -300,7 +256,7 @@ found: /* Increment the reference count. */
if (bhp->ref == UINT16_T_MAX) {
__db_err(dbmp->dbenv,
"%s: too many references to page %lu",
- dbmfp->path, bhp->pgno);
+ __memp_fn(dbmfp), bhp->pgno);
ret = EINVAL;
goto err;
}
@@ -346,6 +302,14 @@ found: /* Increment the reference count. */
++mfp->stat.st_cache_hit;
}
+ /*
+ * If we're returning a page after our current notion of the last-page,
+ * update our information. Note, there's no way to un-instantiate this
+ * page, it's going to exist whether it's returned to us dirty or not.
+ */
+ if (bhp->pgno > mfp->last_pgno)
+ mfp->last_pgno = bhp->pgno;
+
mapret: LOCKHANDLE(dbmp, dbmfp->mutexp);
++dbmfp->pinref;
UNLOCKHANDLE(dbmp, dbmfp->mutexp);
diff --git a/db2/mp/mp_fopen.c b/db2/mp/mp_fopen.c
index de59c9ea9b..bdc4713863 100644
--- a/db2/mp/mp_fopen.c
+++ b/db2/mp/mp_fopen.c
@@ -7,7 +7,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)mp_fopen.c 10.30 (Sleepycat) 10/25/97";
+static const char sccsid[] = "@(#)mp_fopen.c 10.32 (Sleepycat) 11/26/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -28,8 +28,8 @@ static const char sccsid[] = "@(#)mp_fopen.c 10.30 (Sleepycat) 10/25/97";
#include "common_ext.h"
static int __memp_mf_close __P((DB_MPOOL *, DB_MPOOLFILE *));
-static int __memp_mf_open __P((DB_MPOOL *,
- DB_MPOOLFILE *, int, size_t, int, DBT *, u_int8_t *, int, MPOOLFILE **));
+static int __memp_mf_open __P((DB_MPOOL *, DB_MPOOLFILE *,
+ const char *, int, size_t, off_t, int, DBT *, u_int8_t *, MPOOLFILE **));
/*
* memp_fopen --
@@ -53,7 +53,13 @@ memp_fopen(dbmp, path, ftype,
"memp_fopen", flags, DB_CREATE | DB_NOMMAP | DB_RDONLY)) != 0)
return (ret);
- return (__memp_fopen(dbmp, path, ftype,
+ /* Require a non-zero pagesize. */
+ if (pagesize == 0) {
+ __db_err(dbmp->dbenv, "memp_fopen: pagesize not specified");
+ return (EINVAL);
+ }
+
+ return (__memp_fopen(dbmp, NULL, path, ftype,
flags, mode, pagesize, lsn_offset, pgcookie, fileid, 1, retp));
}
@@ -61,13 +67,14 @@ memp_fopen(dbmp, path, ftype,
* __memp_fopen --
* Open a backing file for the memory pool; internal version.
*
- * PUBLIC: int __memp_fopen __P((DB_MPOOL *, const char *, int, int,
- * PUBLIC: int, size_t, int, DBT *, u_int8_t *, int, DB_MPOOLFILE **));
+ * PUBLIC: int __memp_fopen __P((DB_MPOOL *, MPOOLFILE *, const char *, int,
+ * PUBLIC: int, int, size_t, int, DBT *, u_int8_t *, int, DB_MPOOLFILE **));
*/
int
-__memp_fopen(dbmp, path,
+__memp_fopen(dbmp, mfp, path,
ftype, flags, mode, pagesize, lsn_offset, pgcookie, fileid, needlock, retp)
DB_MPOOL *dbmp;
+ MPOOLFILE *mfp;
const char *path;
int ftype, flags, mode, lsn_offset, needlock;
size_t pagesize;
@@ -77,24 +84,27 @@ __memp_fopen(dbmp, path,
{
DB_ENV *dbenv;
DB_MPOOLFILE *dbmfp;
- MPOOLFILE *mfp;
off_t size;
int ret;
+ u_int8_t idbuf[DB_FILE_ID_LEN];
+ char *rpath;
+ /*
+ * XXX
+ * If mfp is provided, the following arguments do NOT need to be
+ * specified:
+ * lsn_offset
+ * pgcookie
+ * fileid
+ */
dbenv = dbmp->dbenv;
ret = 0;
-
- /* Require a non-zero pagesize. */
- if (pagesize == 0) {
- __db_err(dbenv, "memp_fopen: pagesize not specified");
- return (EINVAL);
- }
+ rpath = NULL;
/* Allocate and initialize the per-process structure. */
if ((dbmfp =
(DB_MPOOLFILE *)__db_calloc(1, sizeof(DB_MPOOLFILE))) == NULL) {
- __db_err(dbenv, "%s: %s",
- path == NULL ? TEMPORARY : path, strerror(ENOMEM));
+ __db_err(dbenv, "memp_fopen: %s", strerror(ENOMEM));
return (ENOMEM);
}
dbmfp->dbmp = dbmp;
@@ -109,54 +119,66 @@ __memp_fopen(dbmp, path,
ret = EINVAL;
goto err;
}
- dbmfp->path = (char *)TEMPORARY;
- F_SET(dbmfp, MP_PATH_TEMP);
+ size = 0;
} else {
- /* Calculate the real name for this file. */
+ /* Get the real name for this file and open it. */
if ((ret = __db_appname(dbenv,
- DB_APP_DATA, NULL, path, NULL, &dbmfp->path)) != 0)
+ DB_APP_DATA, NULL, path, NULL, &rpath)) != 0)
goto err;
- F_SET(dbmfp, MP_PATH_ALLOC);
-
-
- /* Open the file. */
- if ((ret = __db_open(dbmfp->path,
+ if ((ret = __db_open(rpath,
LF_ISSET(DB_CREATE | DB_RDONLY), DB_CREATE | DB_RDONLY,
mode, &dbmfp->fd)) != 0) {
- __db_err(dbenv, "%s: %s", dbmfp->path, strerror(ret));
+ __db_err(dbenv, "%s: %s", rpath, strerror(ret));
goto err;
}
/* Don't permit files that aren't a multiple of the pagesize. */
- if ((ret =
- __db_ioinfo(dbmfp->path, dbmfp->fd, &size, NULL)) != 0) {
- __db_err(dbenv, "%s: %s", dbmfp->path, strerror(ret));
+ if ((ret = __db_ioinfo(rpath, dbmfp->fd, &size, NULL)) != 0) {
+ __db_err(dbenv, "%s: %s", rpath, strerror(ret));
goto err;
}
if (size % pagesize) {
__db_err(dbenv,
"%s: file size not a multiple of the pagesize",
- dbmfp->path);
+ rpath);
ret = EINVAL;
goto err;
}
+
+ /*
+ * Get the file id if we weren't given one. Generated file id's
+ * don't use timestamps, otherwise there'd be no chance of any
+ * other process joining the party.
+ */
+ if (mfp == NULL && fileid == NULL) {
+ if ((ret = __db_fileid(dbenv, rpath, 0, idbuf)) != 0)
+ goto err;
+ fileid = idbuf;
+ }
}
/*
- * Find/allocate the shared file objects. This includes allocating
- * space for the per-process thread lock.
+ * If we weren't provided an underlying shared object to join with,
+ * find/allocate the shared file objects. Also allocate space for
+ * for the per-process thread lock.
*/
if (needlock)
LOCKREGION(dbmp);
- ret = __memp_mf_open(dbmp, dbmfp, ftype, pagesize,
- lsn_offset, pgcookie, fileid, F_ISSET(dbmfp, MP_PATH_TEMP), &mfp);
+
+ if (mfp == NULL)
+ ret = __memp_mf_open(dbmp, dbmfp, path,
+ ftype, pagesize, size, lsn_offset, pgcookie, fileid, &mfp);
+ else {
+ ++mfp->ref;
+ ret = 0;
+ }
if (ret == 0 &&
F_ISSET(dbmp, MP_LOCKHANDLE) && (ret =
__memp_ralloc(dbmp, sizeof(db_mutex_t), NULL, &dbmfp->mutexp)) == 0)
LOCKINIT(dbmp, dbmfp->mutexp);
+
if (needlock)
UNLOCKREGION(dbmp);
-
if (ret != 0)
goto err;
@@ -184,25 +206,25 @@ __memp_fopen(dbmp, path,
* flatly impossible. Hope that mmap fails if the file is too large.
*/
#define DB_MAXMMAPSIZE (10 * 1024 * 1024) /* 10 Mb. */
- if (mfp->can_mmap) {
+ if (F_ISSET(mfp, MP_CAN_MMAP)) {
if (!F_ISSET(dbmfp, MP_READONLY))
- mfp->can_mmap = 0;
+ F_CLR(mfp, MP_CAN_MMAP);
if (path == NULL)
- mfp->can_mmap = 0;
+ F_CLR(mfp, MP_CAN_MMAP);
if (ftype != 0)
- mfp->can_mmap = 0;
+ F_CLR(mfp, MP_CAN_MMAP);
if (LF_ISSET(DB_NOMMAP))
- mfp->can_mmap = 0;
+ F_CLR(mfp, MP_CAN_MMAP);
if (size > (dbenv == NULL || dbenv->mp_mmapsize == 0 ?
DB_MAXMMAPSIZE : (off_t)dbenv->mp_mmapsize))
- mfp->can_mmap = 0;
+ F_CLR(mfp, MP_CAN_MMAP);
}
dbmfp->addr = NULL;
- if (mfp->can_mmap) {
+ if (F_ISSET(mfp, MP_CAN_MMAP)) {
dbmfp->len = size;
if (__db_map(dbmfp->fd, dbmfp->len, 1, 1, &dbmfp->addr) != 0) {
- mfp->can_mmap = 0;
dbmfp->addr = NULL;
+ F_CLR(mfp, MP_CAN_MMAP);
}
}
@@ -217,8 +239,8 @@ err: /*
* Note that we do not have to free the thread mutex, because we
* never get to here after we have successfully allocated it.
*/
- if (F_ISSET(dbmfp, MP_PATH_ALLOC))
- FREES(dbmfp->path);
+ if (rpath != NULL)
+ FREES(rpath);
if (dbmfp->fd != -1)
(void)__db_close(dbmfp->fd);
if (dbmfp != NULL)
@@ -231,78 +253,80 @@ err: /*
* Open an MPOOLFILE.
*/
static int
-__memp_mf_open(dbmp, dbmfp,
- ftype, pagesize, lsn_offset, pgcookie, fileid, istemp, retp)
+__memp_mf_open(dbmp, dbmfp, path,
+ ftype, pagesize, size, lsn_offset, pgcookie, fileid, retp)
DB_MPOOL *dbmp;
DB_MPOOLFILE *dbmfp;
- int ftype, lsn_offset, istemp;
+ const char *path;
+ int ftype, lsn_offset;
size_t pagesize;
+ off_t size;
DBT *pgcookie;
u_int8_t *fileid;
MPOOLFILE **retp;
{
MPOOLFILE *mfp;
int ret;
- u_int8_t idbuf[DB_FILE_ID_LEN];
void *p;
- /* Temporary files can't match previous files. */
- if (istemp)
- goto alloc;
+#define ISTEMPORARY (path == NULL)
/*
- * Get the file id if we weren't give one. Generated file id's don't
- * use timestamps, otherwise there'd be no chance of anyone joining
- * the party.
+ * Walk the list of MPOOLFILE's, looking for a matching file.
+ * Temporary files can't match previous files.
*/
- if (fileid == NULL) {
- if ((ret =
- __db_fileid(dbmp->dbenv, dbmfp->path, 0, idbuf)) != 0)
- return (ret);
- fileid = idbuf;
- }
-
- /* Walk the list of MPOOLFILE's, looking for a matching file. */
- for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
- mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile))
- if (!memcmp(fileid,
- R_ADDR(dbmp, mfp->fileid_off), DB_FILE_ID_LEN)) {
- if (ftype != mfp->ftype ||
- pagesize != mfp->stat.st_pagesize) {
- __db_err(dbmp->dbenv,
- "%s: ftype or pagesize changed",
- dbmfp->path);
- ret = EINVAL;
- mfp = NULL;
- goto ret1;
+ if (!ISTEMPORARY)
+ for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
+ mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) {
+ if (F_ISSET(mfp, MP_TEMP))
+ continue;
+ if (!memcmp(fileid,
+ R_ADDR(dbmp, mfp->fileid_off), DB_FILE_ID_LEN)) {
+ if (ftype != mfp->ftype ||
+ pagesize != mfp->stat.st_pagesize) {
+ __db_err(dbmp->dbenv,
+ "%s: ftype or pagesize changed",
+ path);
+ return (EINVAL);
+ }
+
+ /* Found it: increment the reference count. */
+ ++mfp->ref;
+ *retp = mfp;
+ return (0);
}
- /* Found it: increment the reference count. */
- ++mfp->ref;
- goto ret1;
}
/* Allocate a new MPOOLFILE. */
-alloc: if ((ret = __memp_ralloc(dbmp, sizeof(MPOOLFILE), NULL, &mfp)) != 0)
- goto ret1;
+ if ((ret = __memp_ralloc(dbmp, sizeof(MPOOLFILE), NULL, &mfp)) != 0)
+ return (ret);
+ *retp = mfp;
/* Initialize the structure. */
memset(mfp, 0, sizeof(MPOOLFILE));
mfp->ref = 1;
mfp->ftype = ftype;
- mfp->can_mmap = 1;
mfp->lsn_off = lsn_offset;
- mfp->stat.st_pagesize = pagesize;
- /* Copy the file path into shared memory. */
- if ((ret = __memp_ralloc(dbmp,
- strlen(dbmfp->path) + 1, &mfp->path_off, &p)) != 0)
- goto err;
- memcpy(p, dbmfp->path, strlen(dbmfp->path) + 1);
+ /*
+ * If the user specifies DB_MPOOL_LAST or DB_MPOOL_NEW on a memp_fget,
+ * we have to know the last page in the file. Figure it out and save
+ * it away.
+ */
+ mfp->stat.st_pagesize = pagesize;
+ mfp->last_pgno = size == 0 ? 0 : (size - 1) / mfp->stat.st_pagesize;
- /* Copy the file identification string into shared memory. */
- if (istemp)
- mfp->fileid_off = 0;
+ F_SET(mfp, MP_CAN_MMAP);
+ if (ISTEMPORARY)
+ F_SET(mfp, MP_TEMP);
else {
+ /* Copy the file path into shared memory. */
+ if ((ret = __memp_ralloc(dbmp,
+ strlen(path) + 1, &mfp->path_off, &p)) != 0)
+ goto err;
+ memcpy(p, path, strlen(path) + 1);
+
+ /* Copy the file identification string into shared memory. */
if ((ret = __memp_ralloc(dbmp,
DB_FILE_ID_LEN, &mfp->fileid_off, &p)) != 0)
goto err;
@@ -328,15 +352,13 @@ alloc: if ((ret = __memp_ralloc(dbmp, sizeof(MPOOLFILE), NULL, &mfp)) != 0)
err: if (mfp->path_off != 0)
__db_shalloc_free(dbmp->addr,
R_ADDR(dbmp, mfp->path_off));
- if (!istemp)
+ if (mfp->fileid_off != 0)
__db_shalloc_free(dbmp->addr,
R_ADDR(dbmp, mfp->fileid_off));
if (mfp != NULL)
__db_shalloc_free(dbmp->addr, mfp);
mfp = NULL;
}
-
-ret1: *retp = mfp;
return (0);
}
@@ -357,7 +379,7 @@ memp_fclose(dbmfp)
/* Complain if pinned blocks never returned. */
if (dbmfp->pinref != 0)
__db_err(dbmp->dbenv, "%s: close: %lu blocks left pinned",
- dbmfp->path, (u_long)dbmfp->pinref);
+ __memp_fn(dbmfp), (u_long)dbmfp->pinref);
/* Remove the DB_MPOOLFILE structure from the list. */
LOCKHANDLE(dbmp, dbmp->mutexp);
@@ -370,18 +392,18 @@ memp_fclose(dbmfp)
/* Discard any mmap information. */
if (dbmfp->addr != NULL &&
(ret = __db_unmap(dbmfp->addr, dbmfp->len)) != 0)
- __db_err(dbmp->dbenv, "%s: %s", dbmfp->path, strerror(ret));
+ __db_err(dbmp->dbenv,
+ "%s: %s", __memp_fn(dbmfp), strerror(ret));
/* Close the file; temporary files may not yet have been created. */
if (dbmfp->fd != -1 && (t_ret = __db_close(dbmfp->fd)) != 0) {
- __db_err(dbmp->dbenv, "%s: %s", dbmfp->path, strerror(t_ret));
+ __db_err(dbmp->dbenv,
+ "%s: %s", __memp_fn(dbmfp), strerror(t_ret));
if (ret != 0)
t_ret = ret;
}
/* Free memory. */
- if (F_ISSET(dbmfp, MP_PATH_ALLOC))
- FREES(dbmfp->path);
if (dbmfp->mutexp != NULL) {
LOCKREGION(dbmp);
__db_shalloc_free(dbmp->addr, dbmfp->mutexp);
@@ -434,7 +456,8 @@ __memp_mf_close(dbmp, dbmfp)
if (F_ISSET(bhp, BH_DIRTY))
__db_err(dbmp->dbenv,
"%s: close: pgno %lu left dirty; ref %lu",
- dbmfp->path, (u_long)bhp->pgno, (u_long)bhp->ref);
+ __memp_fn(dbmfp),
+ (u_long)bhp->pgno, (u_long)bhp->ref);
#endif
if (bhp->mf_offset == mf_offset) {
@@ -452,7 +475,8 @@ __memp_mf_close(dbmp, dbmfp)
/* Free the space. */
__db_shalloc_free(dbmp->addr, mfp);
- __db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->path_off));
+ if (mfp->path_off != 0)
+ __db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->path_off));
if (mfp->fileid_off != 0)
__db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->fileid_off));
if (mfp->pgcookie_off != 0)
diff --git a/db2/mp/mp_fput.c b/db2/mp/mp_fput.c
index 892f179d3a..38e86b8ac5 100644
--- a/db2/mp/mp_fput.c
+++ b/db2/mp/mp_fput.c
@@ -7,7 +7,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)mp_fput.c 10.14 (Sleepycat) 10/5/97";
+static const char sccsid[] = "@(#)mp_fput.c 10.16 (Sleepycat) 11/26/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -54,7 +54,7 @@ memp_fput(dbmfp, pgaddr, flags)
if (LF_ISSET(DB_MPOOL_DIRTY) && F_ISSET(dbmfp, MP_READONLY)) {
__db_err(dbmp->dbenv,
"%s: dirty flag set for readonly file page",
- dbmfp->path);
+ __memp_fn(dbmfp));
return (EACCES);
}
}
@@ -64,7 +64,7 @@ memp_fput(dbmfp, pgaddr, flags)
if (dbmfp->pinref == 0)
__db_err(dbmp->dbenv,
"%s: put: more blocks returned than retrieved",
- dbmfp->path);
+ __memp_fn(dbmfp));
else
--dbmfp->pinref;
UNLOCKHANDLE(dbmp, dbmfp->mutexp);
diff --git a/db2/mp/mp_fset.c b/db2/mp/mp_fset.c
index a7d2706008..2eff7dd74c 100644
--- a/db2/mp/mp_fset.c
+++ b/db2/mp/mp_fset.c
@@ -7,7 +7,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)mp_fset.c 10.10 (Sleepycat) 10/5/97";
+static const char sccsid[] = "@(#)mp_fset.c 10.12 (Sleepycat) 11/26/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -43,20 +43,21 @@ memp_fset(dbmfp, pgaddr, flags)
mp = dbmp->mp;
/* Validate arguments. */
- if (flags != 0) {
- if ((ret = __db_fchk(dbmp->dbenv, "memp_fset", flags,
- DB_MPOOL_DIRTY | DB_MPOOL_CLEAN | DB_MPOOL_DISCARD)) != 0)
- return (ret);
- if ((ret = __db_fcchk(dbmp->dbenv, "memp_fset",
- flags, DB_MPOOL_CLEAN, DB_MPOOL_DIRTY)) != 0)
- return (ret);
+ if (flags == 0)
+ return (__db_ferr(dbmp->dbenv, "memp_fset", 1));
- if (LF_ISSET(DB_MPOOL_DIRTY) && F_ISSET(dbmfp, MP_READONLY)) {
- __db_err(dbmp->dbenv,
- "%s: dirty flag set for readonly file page",
- dbmfp->path);
- return (EACCES);
- }
+ if ((ret = __db_fchk(dbmp->dbenv, "memp_fset", flags,
+ DB_MPOOL_DIRTY | DB_MPOOL_CLEAN | DB_MPOOL_DISCARD)) != 0)
+ return (ret);
+ if ((ret = __db_fcchk(dbmp->dbenv, "memp_fset",
+ flags, DB_MPOOL_CLEAN, DB_MPOOL_DIRTY)) != 0)
+ return (ret);
+
+ if (LF_ISSET(DB_MPOOL_DIRTY) && F_ISSET(dbmfp, MP_READONLY)) {
+ __db_err(dbmp->dbenv,
+ "%s: dirty flag set for readonly file page",
+ __memp_fn(dbmfp));
+ return (EACCES);
}
/* Convert the page address to a buffer header. */
diff --git a/db2/mp/mp_open.c b/db2/mp/mp_open.c
index 4c19739ebd..ca81f8d6d6 100644
--- a/db2/mp/mp_open.c
+++ b/db2/mp/mp_open.c
@@ -7,7 +7,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)mp_open.c 10.15 (Sleepycat) 10/25/97";
+static const char sccsid[] = "@(#)mp_open.c 10.16 (Sleepycat) 11/28/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -66,17 +66,6 @@ memp_open(path, flags, mode, dbenv, retp)
F_SET(dbmp, MP_ISPRIVATE);
/*
- * XXX
- * HP-UX won't permit mutexes to live in anything but shared memory.
- * So, we have to instantiate the shared mpool region file on that
- * architecture, regardless. If this turns out to be a performance
- * problem, we could probably use anonymous memory instead.
- */
-#if defined(__hppa)
- F_CLR(dbmp, MP_ISPRIVATE);
-#endif
-
- /*
* Map in the region. We do locking regardless, as portions of it are
* implemented in common code (if we put the region in a file, that is).
*/
diff --git a/db2/mp/mp_pr.c b/db2/mp/mp_pr.c
index 01f0920df4..6ff1131b6e 100644
--- a/db2/mp/mp_pr.c
+++ b/db2/mp/mp_pr.c
@@ -7,7 +7,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)mp_pr.c 10.18 (Sleepycat) 11/1/97";
+static const char sccsid[] = "@(#)mp_pr.c 10.20 (Sleepycat) 11/26/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -100,7 +100,7 @@ memp_stat(dbmp, gspp, fspp, db_malloc)
mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
mfp != NULL;
++tfsp, mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) {
- name = R_ADDR(dbmp, mfp->path_off);
+ name = __memp_fns(dbmp, mfp);
nlen = strlen(name);
len = sizeof(DB_MPOOL_FSTAT) + nlen + 1;
if ((*tfsp = db_malloc == NULL ?
@@ -120,6 +120,37 @@ memp_stat(dbmp, gspp, fspp, db_malloc)
}
/*
+ * __memp_fn --
+ * On errors we print whatever is available as the file name.
+ *
+ * PUBLIC: char * __memp_fn __P((DB_MPOOLFILE *));
+ */
+char *
+__memp_fn(dbmfp)
+ DB_MPOOLFILE *dbmfp;
+{
+ return (__memp_fns(dbmfp->dbmp, dbmfp->mfp));
+}
+
+/*
+ * __memp_fns --
+ * On errors we print whatever is available as the file name.
+ *
+ * PUBLIC: char * __memp_fns __P((DB_MPOOL *, MPOOLFILE *));
+ *
+ */
+char *
+__memp_fns(dbmp, mfp)
+ DB_MPOOL *dbmp;
+ MPOOLFILE *mfp;
+{
+ if (mfp->path_off == 0)
+ return ((char *)"temporary");
+
+ return ((char *)R_ADDR(dbmp, mfp->path_off));
+}
+
+/*
* __memp_debug --
* Display MPOOL structures.
*
@@ -152,7 +183,7 @@ __memp_debug(dbmp, fp, data)
(void)fprintf(fp, "%lu process-local files\n", cnt);
for (dbmfp = TAILQ_FIRST(&dbmp->dbmfq);
dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q)) {
- (void)fprintf(fp, "%s\n", dbmfp->path);
+ (void)fprintf(fp, "%s\n", __memp_fn(dbmfp));
__memp_pdbmf(fp, dbmfp, data);
}
@@ -285,7 +316,7 @@ __memp_pmf(fp, mfp, data)
return;
(void)fprintf(fp, " %d references; %s; pagesize: %lu\n", mfp->ref,
- mfp->can_mmap ? "mmap" : "read/write",
+ F_ISSET(mfp, MP_CAN_MMAP) ? "mmap" : "read/write",
(u_long)mfp->stat.st_pagesize);
}
diff --git a/db2/mp/mp_region.c b/db2/mp/mp_region.c
index 6b2f93125c..c20e669749 100644
--- a/db2/mp/mp_region.c
+++ b/db2/mp/mp_region.c
@@ -7,7 +7,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)mp_region.c 10.16 (Sleepycat) 10/25/97";
+static const char sccsid[] = "@(#)mp_region.c 10.18 (Sleepycat) 11/29/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -224,17 +224,28 @@ retry: if (LF_ISSET(DB_CREATE)) {
* the file descriptor for locking. However, it should not
* be possible for DB_THREAD to be set if HAVE_SPINLOCKS aren't
* defined.
+ *
+ * XXX
+ * HP-UX won't permit mutexes to live in anything but shared
+ * memory. So, instantiate the shared mpool region file on
+ * that architecture, regardless. If this turns out to be a
+ * performance problem, we could use anonymous memory instead.
*/
- if (F_ISSET(dbmp, MP_ISPRIVATE)) {
+#if !defined(__hppa)
+ if (F_ISSET(dbmp, MP_ISPRIVATE))
if ((dbmp->maddr = __db_malloc(rlen)) == NULL)
ret = ENOMEM;
- else
+ else {
+ F_SET(dbmp, MP_MALLOC);
ret = __db_rinit(dbmp->dbenv,
dbmp->maddr, 0, rlen, 0);
- } else
+ }
+ else
+#endif
ret = __db_rcreate(dbmp->dbenv, DB_APP_NONE, path,
- DB_DEFAULT_MPOOL_FILE, mode, rlen, &fd,
- &dbmp->maddr);
+ DB_DEFAULT_MPOOL_FILE, mode, rlen,
+ F_ISSET(dbmp, MP_ISPRIVATE) ? DB_TEMPORARY : 0,
+ &fd, &dbmp->maddr);
if (ret == 0) {
/* Put the MPOOL structure first in the region. */
mp = dbmp->maddr;
@@ -315,7 +326,7 @@ retry: if (LF_ISSET(DB_CREATE)) {
dbmp->fd = fd;
/* If we locked the region, release it now. */
- if (!F_ISSET(dbmp, MP_ISPRIVATE))
+ if (!F_ISSET(dbmp, MP_MALLOC))
UNLOCKREGION(dbmp);
return (0);
@@ -339,7 +350,7 @@ int
__memp_rclose(dbmp)
DB_MPOOL *dbmp;
{
- if (F_ISSET(dbmp, MP_ISPRIVATE)) {
+ if (F_ISSET(dbmp, MP_MALLOC)) {
__db_free(dbmp->maddr);
return (0);
}
diff --git a/db2/mp/mp_sync.c b/db2/mp/mp_sync.c
index 2f042df9e1..47a7f2ebca 100644
--- a/db2/mp/mp_sync.c
+++ b/db2/mp/mp_sync.c
@@ -7,7 +7,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)mp_sync.c 10.15 (Sleepycat) 11/1/97";
+static const char sccsid[] = "@(#)mp_sync.c 10.17 (Sleepycat) 11/26/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -201,7 +201,7 @@ memp_sync(dbmp, lsnp)
*/
if (!wrote) {
__db_err(dbenv, "%s: unable to flush page: %lu",
- R_ADDR(dbmp, mfp->path_off),
+ __memp_fns(dbmp, mfp),
(u_long)bharray[next]->pgno);
ret = EPERM;
goto err;
@@ -244,16 +244,24 @@ memp_fsync(dbmfp)
size_t mf_offset;
int ar_cnt, cnt, nalloc, next, pincnt, notused, ret, wrote;
+ dbmp = dbmfp->dbmp;
+
/*
* If this handle doesn't have a file descriptor that's open for
* writing, or if the file is a temporary, there's no reason to
* proceed further.
*/
- if (F_ISSET(dbmfp, MP_READONLY | MP_PATH_TEMP))
+ if (F_ISSET(dbmfp, MP_READONLY))
return (0);
ret = 0;
- dbmp = dbmfp->dbmp;
+ LOCKREGION(dbmp);
+ if (F_ISSET(dbmfp->mfp, MP_TEMP))
+ ret = 1;
+ UNLOCKREGION(dbmp);
+ if (ret)
+ return (0);
+
mf_offset = R_OFFSET(dbmp, dbmfp->mfp);
/*
@@ -407,18 +415,26 @@ loop: total = mp->stat.st_page_clean + mp->stat.st_page_dirty;
continue;
mfp = R_ADDR(dbmp, bhp->mf_offset);
+
+ /*
+ * We can't write to temporary files -- see the comment in
+ * mp_bh.c:__memp_bhwrite().
+ */
+ if (F_ISSET(mfp, MP_TEMP))
+ continue;
+
if ((ret =
__memp_bhwrite(dbmp, mfp, bhp, &notused, &wrote)) != 0)
goto err;
/*
- * Any process syncing the shared memory buffer pool
- * had better be able to write to any underlying file.
- * Be understanding, but firm, on this point.
+ * Any process syncing the shared memory buffer pool had better
+ * be able to write to any underlying file. Be understanding,
+ * but firm, on this point.
*/
if (!wrote) {
__db_err(dbmp->dbenv, "%s: unable to flush page: %lu",
- R_ADDR(dbmp, mfp->path_off), (u_long)bhp->pgno);
+ __memp_fns(dbmp, mfp), (u_long)bhp->pgno);
ret = EPERM;
goto err;
}
diff --git a/db2/mutex/README b/db2/mutex/README
index 30d6b6a7d1..fceeef7ed8 100644
--- a/db2/mutex/README
+++ b/db2/mutex/README
@@ -1,4 +1,4 @@
-# @(#)README 10.1 (Sleepycat) 4/12/97
+# @(#)README 10.2 (Sleepycat) 11/25/97
Resource locking routines: lock based on a db_mutex_t. All this gunk
(including trying to make assembly code portable), is necessary because
@@ -11,9 +11,9 @@ information.
If HAVE_SPINLOCKS is defined (i.e. we know how to do test-and-sets for
this compiler/architecture combination), we try and lock the resource tsl
-TSL_DEFAULT_SPINS times. If we can't acquire the lock that way, we use
-a system call to sleep for 10ms, 20ms, 40ms, etc. (The time is bounded
-at 1 second, just in case.) Using the timer backoff means that there are
+__db_tsl_spins times. If we can't acquire the lock that way, we use a
+system call to sleep for 10ms, 20ms, 40ms, etc. (The time is bounded at
+1 second, just in case.) Using the timer backoff means that there are
two assumptions: that locks are held for brief periods (never over system
calls or I/O) and that locks are not hotly contested.
diff --git a/db2/mutex/mutex.c b/db2/mutex/mutex.c
index 7c8ea6ebd1..6e87c5f215 100644
--- a/db2/mutex/mutex.c
+++ b/db2/mutex/mutex.c
@@ -8,7 +8,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)mutex.c 10.28 (Sleepycat) 10/31/97";
+static const char sccsid[] = "@(#)mutex.c 10.29 (Sleepycat) 11/25/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -101,12 +101,6 @@ static const char sccsid[] = "@(#)mutex.c 10.28 (Sleepycat) 10/31/97";
#endif /* HAVE_SPINLOCKS */
-#ifdef MORE_THAN_ONE_PROCESSOR
-#define TSL_DEFAULT_SPINS 5 /* Default spins before block. */
-#else
-#define TSL_DEFAULT_SPINS 1 /* Default spins before block. */
-#endif
-
/*
* __db_mutex_init --
* Initialize a DB mutex structure.
@@ -130,6 +124,7 @@ __db_mutex_init(mp, off)
#ifdef HAVE_SPINLOCKS
TSL_INIT(&mp->tsl_resource);
+ mp->spins = __os_spin();
#else
mp->off = off;
#endif
@@ -155,11 +150,8 @@ __db_mutex_lock(mp, fd)
int nspins;
for (usecs = MS(10);;) {
- /*
- * Try and acquire the uncontested resource lock for
- * TSL_DEFAULT_SPINS.
- */
- for (nspins = TSL_DEFAULT_SPINS; nspins > 0; --nspins)
+ /* Try and acquire the uncontested resource lock for N spins. */
+ for (nspins = mp->spins; nspins > 0; --nspins)
if (TSL_SET(&mp->tsl_resource)) {
#ifdef DEBUG
if (mp->pid != 0) {
diff --git a/db2/os/os_func.c b/db2/os/os_config.c
index afd40f4624..ecb4f1c2e7 100644
--- a/db2/os/os_func.c
+++ b/db2/os/os_config.c
@@ -8,7 +8,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)os_func.c 10.4 (Sleepycat) 10/28/97";
+static const char sccsid[] = "@(#)os_config.c 10.9 (Sleepycat) 11/28/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -31,7 +31,6 @@ static const char sccsid[] = "@(#)os_func.c 10.4 (Sleepycat) 10/28/97";
#define imported
#endif
-imported extern void *calloc __P((size_t, size_t));
imported extern int close __P((int));
imported extern void free __P((void *));
imported extern int fsync __P((int));
@@ -42,16 +41,16 @@ imported extern char *strdup __P((const char *));
imported extern void *realloc __P((void *, size_t));
imported extern int unlink __P((const char *));
imported extern ssize_t write __P((int, const void *, size_t));
+imported extern void *memset __P((void *, int, size_t));
/*
* __db_jump --
* This list of interfaces that applications can replace. In some
* cases, the user is permitted to replace the standard ANSI C or
- * POSIX 1003.1 call, e.g., calloc or read. In others, we provide
+ * POSIX 1003.1 call, e.g., malloc or read. In others, we provide
* a local interface to the functionality, e.g., __os_map.
*/
struct __db_jumptab __db_jump = {
- calloc, /* DB_FUNC_CALLOC */
close, /* DB_FUNC_CLOSE */
__os_dirfree, /* DB_FUNC_DIRFREE */
__os_dirlist, /* DB_FUNC_DIRLIST */
@@ -73,9 +72,11 @@ struct __db_jumptab __db_jump = {
NULL /* DB_FUNC_YIELD */
};
+int __db_tsl_spins; /* DB_TSL_SPINS */
+
/*
* db_jump_set --
- * Replace an interface.
+ * Replace functions for the DB package.
*/
int
db_jump_set(func, which)
@@ -84,70 +85,148 @@ db_jump_set(func, which)
{
switch (which) {
case DB_FUNC_CALLOC:
- __db_calloc = (void *(*) __P((size_t, size_t)))func;
- break;
+ /*
+ * XXX
+ * Obsolete, calloc is no longer called by DB.
+ */
+ break;
case DB_FUNC_CLOSE:
- __os_close = (int (*) __P((int)))func;
+ __db_jump.db_close = (int (*) __P((int)))func;
break;
case DB_FUNC_DIRFREE:
- __db_dirfree = (void (*) __P((char **, int)))func;
+ __db_jump.db_dirfree = (void (*) __P((char **, int)))func;
break;
case DB_FUNC_DIRLIST:
- __db_dirlist =
+ __db_jump.db_dirlist =
(int (*) __P((const char *, char ***, int *)))func;
break;
case DB_FUNC_EXISTS:
- __db_exists = (int (*) __P((const char *, int *)))func;
+ __db_jump.db_exists = (int (*) __P((const char *, int *)))func;
break;
case DB_FUNC_FREE:
- __db_free = (void (*) __P((void *)))func;
+ __db_jump.db_free = (void (*) __P((void *)))func;
break;
case DB_FUNC_FSYNC:
- __os_fsync = (int (*) __P((int)))func;
+ __db_jump.db_fsync = (int (*) __P((int)))func;
break;
case DB_FUNC_IOINFO:
- __db_ioinfo =
+ __db_jump.db_ioinfo =
(int (*) __P((const char *, int, off_t *, off_t *)))func;
break;
case DB_FUNC_MALLOC:
- __db_malloc = (void *(*) __P((size_t)))func;
+ __db_jump.db_malloc = (void *(*) __P((size_t)))func;
break;
case DB_FUNC_MAP:
- __db_map = (int (*) __P((int, size_t, int, int, void **)))func;
+ __db_jump.db_map =
+ (int (*) __P((int, size_t, int, int, void **)))func;
break;
case DB_FUNC_OPEN:
- __os_open = (int (*) __P((const char *, int, ...)))func;
+ __db_jump.db_open = (int (*) __P((const char *, int, ...)))func;
break;
case DB_FUNC_READ:
- __os_read = (ssize_t (*) __P((int, void *, size_t)))func;
+ __db_jump.db_read =
+ (ssize_t (*) __P((int, void *, size_t)))func;
break;
case DB_FUNC_REALLOC:
- __db_realloc = (void *(*) __P((void *, size_t)))func;
+ __db_jump.db_realloc = (void *(*) __P((void *, size_t)))func;
break;
case DB_FUNC_SEEK:
- __db_seek =
+ __db_jump.db_seek =
(int (*) __P((int, size_t, db_pgno_t, u_long, int)))func;
break;
case DB_FUNC_SLEEP:
- __db_sleep = (int (*) __P((u_long, u_long)))func;
+ __db_jump.db_sleep = (int (*) __P((u_long, u_long)))func;
break;
case DB_FUNC_STRDUP:
- __db_strdup = (char *(*) __P((const char *)))func;
+ __db_jump.db_strdup = (char *(*) __P((const char *)))func;
break;
case DB_FUNC_UNLINK:
- __os_unlink = (int (*) __P((const char *)))func;
+ __db_jump.db_unlink = (int (*) __P((const char *)))func;
break;
case DB_FUNC_UNMAP:
- __db_unmap = (int (*) __P((void *, size_t)))func;
+ __db_jump.db_unmap = (int (*) __P((void *, size_t)))func;
break;
case DB_FUNC_WRITE:
- __os_write = (ssize_t (*) __P((int, const void *, size_t)))func;
+ __db_jump.db_write =
+ (ssize_t (*) __P((int, const void *, size_t)))func;
break;
case DB_FUNC_YIELD:
- __db_yield = (int (*) __P((void)))func;
+ __db_jump.db_yield = (int (*) __P((void)))func;
+ break;
+ default:
+ return (EINVAL);
+ }
+ return (0);
+}
+
+/*
+ * db_value_set --
+ * Replace values for the DB package.
+ */
+int
+db_value_set(value, which)
+ int value, which;
+{
+ switch (which) {
+ case DB_TSL_SPINS:
+ if (value <= 0)
+ return (EINVAL);
+ __db_tsl_spins = value;
break;
default:
return (EINVAL);
}
return (0);
}
+
+/*
+ * XXX
+ * Correct for systems that return NULL when you allocate 0 bytes of memory.
+ * There are several places in DB where we allocate the number of bytes held
+ * by the key/data item, and it can be 0. Correct here so that malloc never
+ * returns a NULL for that reason.
+ */
+/*
+ * __db_calloc --
+ * The calloc(3) function for DB.
+ *
+ * PUBLIC: void *__db_calloc __P((size_t, size_t));
+ */
+void *
+__db_calloc(num, size)
+ size_t num, size;
+{
+ void *p;
+
+ size *= num;
+ if ((p = __db_jump.db_malloc(size == 0 ? 1 : size)) != NULL)
+ memset(p, 0, size);
+ return (p);
+}
+
+/*
+ * __db_malloc --
+ * The malloc(3) function for DB.
+ *
+ * PUBLIC: void *__db_malloc __P((size_t));
+ */
+void *
+__db_malloc(size)
+ size_t size;
+{
+ return (__db_jump.db_malloc(size == 0 ? 1 : size));
+}
+
+/*
+ * __db_realloc --
+ * The realloc(3) function for DB.
+ *
+ * PUBLIC: void *__db_realloc __P((void *, size_t));
+ */
+void *
+__db_realloc(ptr, size)
+ void *ptr;
+ size_t size;
+{
+ return (__db_jump.db_realloc(ptr, size == 0 ? 1 : size));
+}
diff --git a/db2/os/os_open.c b/db2/os/os_open.c
index 05784e4810..a628765556 100644
--- a/db2/os/os_open.c
+++ b/db2/os/os_open.c
@@ -8,7 +8,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)os_open.c 10.19 (Sleepycat) 10/28/97";
+static const char sccsid[] = "@(#)os_open.c 10.20 (Sleepycat) 11/27/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -38,6 +38,11 @@ __db_open(name, arg_flags, ok_flags, mode, fdp)
return (EINVAL);
flags = 0;
+
+ /*
+ * DB requires the semantic that two files opened at the same time
+ * with O_CREAT and O_EXCL set will return failure in at least one.
+ */
if (arg_flags & DB_CREATE)
flags |= O_CREAT;
diff --git a/db2/os/os_spin.c b/db2/os/os_spin.c
new file mode 100644
index 0000000000..fb693c2848
--- /dev/null
+++ b/db2/os/os_spin.c
@@ -0,0 +1,56 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)os_spin.c 10.3 (Sleepycat) 11/25/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <limits.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+
+/*
+ * __os_spin --
+ * Return the number of default spins before blocking.
+ *
+ * PUBLIC: int __os_spin __P((void));
+ */
+int
+__os_spin()
+{
+ extern int __db_tsl_spins;
+
+ /* If the application specified the spins, use its value. */
+ if (__db_tsl_spins != 0)
+ return (__db_tsl_spins);
+
+ /*
+ * XXX
+ * Sysconf: Solaris uses _SC_NPROCESSORS_ONLN to return the number
+ * of online processors. I don't know if this call is portable or
+ * not.
+ */
+#if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
+ {
+ long sys_val;
+
+ sys_val = sysconf(_SC_NPROCESSORS_ONLN);
+ if (sys_val > 0)
+ return (sys_val * 50);
+ }
+#endif
+
+ /* Default to a single processor. */
+ return (1);
+}
diff --git a/db2/txn/txn.c b/db2/txn/txn.c
index 55423f0470..e7a1798350 100644
--- a/db2/txn/txn.c
+++ b/db2/txn/txn.c
@@ -43,7 +43,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)txn.c 10.35 (Sleepycat) 11/2/97";
+static const char sccsid[] = "@(#)txn.c 10.37 (Sleepycat) 11/28/97";
#endif /* not lint */
@@ -101,11 +101,9 @@ __txn_create(dbenv, path, mode)
maxtxns = dbenv->tx_max != 0 ? dbenv->tx_max : 1000;
(void)time(&now);
- ret = __db_rcreate(dbenv, DB_APP_NONE, path,
- DEFAULT_TXN_FILE, mode, TXN_REGION_SIZE(maxtxns), &fd, &txn_region);
-
/* Region may have existed. If it didn't, the open will fail. */
- if (ret != 0)
+ if ((ret = __db_rcreate(dbenv, DB_APP_NONE, path, DEFAULT_TXN_FILE,
+ mode, TXN_REGION_SIZE(maxtxns), 0, &fd, &txn_region)) != 0)
return (ret);
txn_region->magic = DB_TXNMAGIC;
@@ -315,7 +313,10 @@ err:
return (ret);
}
-/* The db_txn(3) man page describes txn_commit. */
+/*
+ * txn_commit --
+ * Commit a transaction.
+ */
int
txn_commit(txnp)
DB_TXN *txnp;
@@ -337,7 +338,10 @@ txn_commit(txnp)
return (__txn_end(txnp, 1));
}
-/* The db_txn(3) man page describes txn_abort. */
+/*
+ * txn_abort --
+ * Abort a transcation.
+ */
int
txn_abort(txnp)
DB_TXN *txnp;
@@ -395,8 +399,8 @@ txn_id(txnp)
}
/*
- * The db_txn(3) man page describes txn_close. Currently the caller should
- * arrange a checkpoint before calling txn_close.
+ * txn_close --
+ * Close the transaction region, does not imply a checkpoint.
*/
int
txn_close(tmgrp)
@@ -439,8 +443,8 @@ txn_close(tmgrp)
}
/*
- * The db_txn(3) man page describes txn_unlink. Right now it is up to
- * txn_close to write the final checkpoint record.
+ * txn_unlink --
+ * Remove the transaction region.
*/
int
txn_unlink(path, force, dbenv)
@@ -666,12 +670,19 @@ do_ckp:
mgr->region->pending_ckp = ckp_lsn;
UNLOCK_TXNREGION(mgr);
- ret = memp_sync(mgr->dbenv->mp_info, &ckp_lsn);
- if (ret > 0) {
- __db_err(mgr->dbenv,
- "txn_checkpoint: system failure in memp_sync %s\n",
- strerror(ret));
- } else if (ret == 0 && mgr->dbenv->lg_info != NULL) {
+ if (mgr->dbenv->mp_info != NULL &&
+ (ret = memp_sync(mgr->dbenv->mp_info, &ckp_lsn)) != 0) {
+ /*
+ * ret < 0 means that there are still buffers to flush;
+ * the checkpoint is not complete. Back off and try again.
+ */
+ if (ret > 0)
+ __db_err(mgr->dbenv,
+ "txn_checkpoint: system failure in memp_sync %s\n",
+ strerror(ret));
+ return (ret);
+ }
+ if (mgr->dbenv->lg_info != NULL) {
LOCK_TXNREGION(mgr);
last_ckp = mgr->region->last_ckp;
ZERO_LSN(mgr->region->pending_ckp);
@@ -691,11 +702,7 @@ do_ckp:
(void)time(&mgr->region->time_ckp);
UNLOCK_TXNREGION(mgr);
}
- /*
- * ret < 0 means that there are still buffers to flush; the
- * checkpoint is not complete. Back off and try again.
- */
- return (ret);
+ return (0);
}
/*
diff --git a/db2/txn/txn_auto.c b/db2/txn/txn_auto.c
index 9edbc03eab..38627466a8 100644
--- a/db2/txn/txn_auto.c
+++ b/db2/txn/txn_auto.c
@@ -73,7 +73,6 @@ int __txn_regop_log(logp, txnid, ret_lsnp, flags,
* PUBLIC: int __txn_regop_print
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
-
int
__txn_regop_print(notused1, dbtp, lsnp, notused3, notused4)
DB_LOG *notused1;
@@ -202,7 +201,6 @@ int __txn_ckp_log(logp, txnid, ret_lsnp, flags,
* PUBLIC: int __txn_ckp_print
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
-
int
__txn_ckp_print(notused1, dbtp, lsnp, notused3, notused4)
DB_LOG *notused1;