summaryrefslogtreecommitdiff
path: root/db2/btree/bt_put.c
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>1997-11-12 00:06:02 +0000
committerUlrich Drepper <drepper@redhat.com>1997-11-12 00:06:02 +0000
commitcc3fa755128c70f8afc2d604936f33e4d7d652d8 (patch)
tree0c68c1298140e1524078e8bfdf5154fb6129b680 /db2/btree/bt_put.c
parent86187531d302de284b67ac162cf76c60c86bd7da (diff)
1997-11-11 21:30 Ulrich Drepper <drepper@cygnus.com> * include/sys/stat.h: Define stat, fstat, lstat and *64 variants as macros so the the library compiles correctly even without optimization. * io/fstat.c: Undef fstat. * io/fstat64.c: Undef fstat64 * io/lstat.c: Undef lstat. * io/lstat64.c: Undef lstat64 * io/stat.c: Undef stat. * io/stat64.c: Undef stat64 * io/fts.c: Include <include/sys/stat.h> to get macro definitions. * io/ftw.c: Likewise. * io/getdirname.c: Likewise. * Makefile (install): Run test-installation.pl if possible. * db2/Makefile: Update from db-2.3.12. * db2/db.h: Likewise. * db2/db_int.h: Likewise. * db2/btree/bt_cursor.c: Likewise. * db2/btree/bt_delete.c: Likewise. * db2/btree/bt_open.c: Likewise. * db2/btree/bt_put.c: Likewise. * db2/btree/bt_rec.c: Likewise. * db2/btree/bt_recno.c: Likewise. * db2/btree/bt_search.c: Likewise. * db2/btree/bt_split.c: Likewise. * db2/btree/bt_stat.c: Likewise. * db2/btree/btree.src: Likewise. * db2/btree/btree_auto.c: Likewise. * db2/btree/bt_cursor.c: Likewise. * db2/btree/bt_delete.c: Likewise. * db2/btree/bt_open.c: Likewise. * db2/btree/bt_put.c: Likewise. * db2/btree/bt_rec.c: Likewise. * db2/btree/bt_recno.c: Likewise. * db2/btree/bt_search.c: Likewise. * db2/btree/bt_split.c: Likewise. * db2/btree/bt_stat.c: Likewise. * db2/btree/btree.src: Likewise. * db2/btree/btree_auto.c: Likewise. * db2/common/db_appinit.c: Likewise. * db2/common/db_apprec.c: Likewise. * db2/common/db_byteorder.c: Likewise. * db2/common/db_region.c: Likewise. * db2/db/db.c: Likewise * db2/db/db.src: Likewise * db2/db/db_auto.c: Likewise * db2/db/db_dispatch.c: Likewise * db2/db/db_dup.c: Likewise * db2/db/db_overflow.c: Likewise * db2/db/db_pr.c: Likewise * db2/db/db_rec.c: Likewise * db2/db/db_ret.c: Likewise * db2/db/db_thread.c: Likewise * db2/db185/db185.c: Likewise. * db2/hash/hash.c: Likewise. * db2/hash/hash.src: Likewise. * db2/hash/hash_auto.c: Likewise. * db2/hash/hash_dup.c: Likewise. * db2/hash/hash_page.c: Likewise. * db2/hash/hash_rec.c: Likewise. * db2/include/btree_auto.h: Likewise. * db2/include/btree_ext.h: Likewise. * db2/include/clib_ext.h: Likewise. * db2/include/common_ext.h: Likewise. * db2/include/db.h.src: Likewise. * db2/include/db_am.h: Likewise. * db2/include/db_auto.h: Likewise. * db2/include/db_cxx.h: Likewise. * db2/include/db_ext.h: Likewise. * db2/include/db_int.h.src: Likewise. * db2/include/hash.h: Likewise. * db2/include/hash_auto.h: Likewise. * db2/include/hash_ext.h: Likewise. * db2/include/lock.h: Likewise. * db2/include/lock_ext.h: Likewise. * db2/include/log.h: Likewise. * db2/include/log_ext.h: Likewise. * db2/include/mp.h: Likewise. * db2/include/mp_ext.h: Likewise. * db2/include/mutex_ext.h: Likewise. * db2/include/os_ext.h: Likewise. * db2/include/os_func.h: Likewise. * db2/include/txn.h: Likewise. * db2/include/txn_ext.h: Likewise. * db2/lock/lock.c: Likewise. * db2/lock/lock_deadlock.c: Likewise. * db2/log/log.c: Likewise. * db2/log/log_archive.c: Likewise. * db2/log/log_auto.c: Likewise. * db2/log/log_findckp.c: Likewise. * db2/log/log_get.c: Likewise. * db2/log/log_put.c: Likewise. * db2/log/log_rec.c: Likewise. * db2/log/log_register.c: Likewise. * db2/mp/mp_bh.c: Likewise. * db2/mp/mp_fget.c: Likewise. * db2/mp/mp_fopen.c: Likewise. * db2/mp/mp_fput.c: Likewise. * db2/mp/mp_fset.c: Likewise. * db2/mp/mp_open.c: Likewise. * db2/mp/mp_pr.c: Likewise. * db2/mp/mp_region.c: Likewise. * db2/mp/mp_sync.c: Likewise. * db2/mutex/mutex.c: Likewise. * db2/os/os_abs.c: Likewise. * db2/os/os_dir.c: Likewise. * db2/os/os_fid.c: Likewise. * db2/os/os_fsync.c: Likewise. * db2/os/os_func.c: Likewise. * db2/os/os_map.c: Likewise. * db2/os/os_oflags.c: Likewise. * db2/os/os_open.c: Likewise. * db2/os/os_rpath.c: Likewise. * db2/os/os_rw.c: Likewise. * db2/os/os_seek.c: Likewise. * db2/os/os_sleep.c: Likewise. * db2/os/os_stat.c: Likewise. * db2/os/os_unlink.c: Likewise. * db2/progs/db_deadlock/db_deadlock.c: Likewise. * db2/progs/db_dump/db_dump.c: Likewise. * db2/progs/db_load/db_load.c: Likewise. * db2/progs/db_recover/db_recover.c: Likewise. * db2/progs/db_stat/db_stat.c: Likewise. * db2/txn/txn.c: Likewise. * db2/txn/txn_auto.c: Likewise. * db2/txn/txn_rec.c: Likewise. * db2/os/db_os_abs.c: Removed. * db2/os/db_os_dir.c: Removed. * db2/os/db_os_fid.c: Removed. * db2/os/db_os_lseek.c: Removed. * db2/os/db_os_mmap.c: Removed. * db2/os/db_os_open.c: Removed. * db2/os/db_os_rw.c: Removed. * db2/os/db_os_sleep.c: Removed. * db2/os/db_os_stat.c: Removed. * db2/os/db_os_unlink.c: Removed. * libio/stdio.h (fopen): Add __restrict to parameters. * manual/process.texi (system): Describe behaviour for NULL argument. * stdio-common/printf-parse.h: Parse hh modifier. * stdio-common/vfprintf.c: Handle hh modifier. * stdio-common/vfscanf.c: Likewise. * manual/stdio.texi: Describe hh modifier for scanf/printf. * math/complex.h: Don't define _Imaginary_I, but instead _Complex_I. gcc does no yet know the `imaginary' keyword. * math/test-math.c: Add little test for know gcc bug. * math/tgmath.h: Make complex versions of log10() only available if __USE_GNU. * stdlib/test-canon.c: Fix typo. * sysdeps/generic/setenv.c: Avoid compilation warnings. Reported by Jim Meyering. * sysdeps/generic/bits/errno.h: EILSEQ is an ISO C error number. * sysdeps/mach/hurd/bits/errno.h: Likewise. * sysdeps/standalone/bits/errno.h: Likewise. * sysdeps/unix/sysv/linux/bits/errno.h: Likewise. * sysdeps/i386/i586/memcpy.S: New file. * sysdeps/i386/i586/mempcpy.S: New file. * sysdeps/i386/i586/memset.S: Fix typo. * sysdeps/posix/getcwd.c: Define HAVE_MEMPCPY for _LIBC. Add casts. * sysdeps/posix/system.c: Add comment to explain code. * sysdeps/wordsize-32/inttypes.h: Include <stddef.h> for wchar_t. Define PTRDIFF_{MIN,MAX}, SIG_ATOMIC_{MIN,MAX}, SIZE_MAX, WCHAR_{MIN,MAX}, WINT_{MIN,MAX}. Define wcstoimax, wcstoumax. * sysdeps/wordsize-64/inttypes.h: Likewise. * wcsmbs/wchar.h: Define WCHAR_{MIN,MAX} if not already defined. Declare __wcsto{l,ul,ll,ull}_internal only if not already done. * time/Makefile (routines): Add strfxtime. * time/strftime.c: Implement %F and %f format. * time/strfxtime.c: New file. * time/time.h: Define new types and symbols from ISO C 9X. * time/mktime.c: Little comment correction. 1997-11-10 Andreas Jaeger <aj@arthur.rhein-neckar.de> * sysdeps/libm-ieee754/s_sincosl.c: Fix typo. * sysdeps/libm-ieee754/s_tanl.c: Fix typo. * sysdeps/libm-ieee754/s_floorl.c: Correct typos. * sysdeps/libm-ieee754/e_remainderl.c: Replace EXTRACT_LDOUBLE_WORDS by GET_LDOUBLE_WORDS. * sysdeps/libm-ieee754/e_atan2l.c: Replace EXTRACT_LDOUBLE_WORDS by GET_LDOUBLE_WORDS. * sysdeps/libm-ieee754/s_scalbnl.c: Replace ";" by "," for correct variable declaration. * sysdeps/libm-ieee754/s_scalblnl.c: Likewise. * sysdeps/libm-ieee754/s_lrint.c (__lrint): Correct function. * math/libm-test.c (sqrt_test): Add test for sqrt (0.25). (asin_test): Add more test. 1997-11-10 23:34 Ulrich Drepper <drepper@cygnus.com> * sysdeps/libm-ieee754/e_asin.c: Add braces to make code clearer and to not confuse the poor compiler. * sysdeps/libm-ieee754/e_asinf.c: Likewise. Reported by vertex@cagent.com. 1997-11-09 Andreas Schwab <schwab@issan.informatik.uni-dortmund.de> * elf/dl-lookup.c (do_lookup): Don't accept the base version if we require a specific one. * libio/oldfreopen.c: Bind old symbols to version GLIBC_2.0. * libio/oldiofopen.c: Likewise. * libio/oldstdfiles.c: Likewise. * libc.map: Export them. 1997-11-10 07:40 H.J. Lu <hjl@gnu.ai.mit.edu> * stdlib/exit.c (exit): Handle recursive calls to exit (). 1997-11-09 Andreas Schwab <schwab@issan.informatik.uni-dortmund.de> * sysdeps/m68k/fpu/s_llrint.c: Fixed to take double argument instead of long double. * sysdeps/m68k/fpu/s_llrintf.c: New file. * sysdeps/m68k/fpu/s_llrintl.c: New file. * sysdeps/libm-ieee754/s_llrint.c: Make compilable and fix overflow condition. * sysdeps/libm-ieee754/s_llrintf.c: Fix overflow condition. * sysdeps/libm-ieee754/s_llrintl.c: Likewise. * sysdeps/libm-ieee754/s_llround.c: Likewise. * sysdeps/libm-ieee754/s_llroundf.c: Likewise. * sysdeps/libm-ieee754/s_llroundl.c: Likewise. * sysdeps/libm-ieee754/s_lrint.c: Likewise. * sysdeps/libm-ieee754/s_lrintf.c: Likewise. * sysdeps/libm-ieee754/s_lrintl.c: Likewise. * sysdeps/libm-ieee754/s_lround.c: Likewise. * sysdeps/libm-ieee754/s_lroundf.c: Likewise. * sysdeps/libm-ieee754/s_lroundl.c: Likewise. * math/libm-test.c: Test all three variants of lrint and llrint. Fix typos in lround and llround tests. Add tests for boundary cases for lrint and llround. 1997-11-08 Andreas Schwab <schwab@issan.informatik.uni-dortmund.de> * manual/arith.texi: Misc doc fixes. * manual/ctype.texi: Likewise. * manual/pattern.texi: Likewise. * manual/terminal.texi: Likewise. 1997-11-08 Andreas Schwab <schwab@issan.informatik.uni-dortmund.de> * sysdeps/unix/sysv/linux/xstatconv.c: Use struct assignment instead of memcpy to let the compiler use whatever it regards as optimal. * sysdeps/unix/sysv/linux/alpha/xstatconv.c: Likewise. 1997-11-08 Andreas Schwab <schwab@issan.informatik.uni-dortmund.de> * sysdeps/unix/sysv/linux/Makefile (sysdep_headers) [$(subdir)=misc]: Add sys/prctl.h. * sysdeps/unix/sysv/linux/Dist: Distribute it. 1997-11-08 Andreas Schwab <schwab@issan.informatik.uni-dortmund.de> * io/ftwtest-sh: Don't use the unknown which command, instead try pwd as /bin/pwd and /usr/bin/pwd. 1997-11-08 Andreas Schwab <schwab@issan.informatik.uni-dortmund.de> * manual/maint.texi (Tools for Installation): Don't recommend broken version 3.76.1 of make. (Porting): Fix wording. 1997-11-06 06:13 H.J. Lu <hjl@gnu.ai.mit.edu> * config.make.in (build-pic-default): New, defined with pic_default. * configure.in (pic_default): New, set to yes if PIC is default. * Makeconfig (CPPFLAGS-.o, CPPFLAGS-.op, CPPFLAGS-.og, CPPFLAGS-.ob): Add -DPIC if $(build-pic-default) is yes. 1997-11-09 18:15 Ulrich Drepper <drepper@cygnus.com> * Makerules (libc.so): Fix typo. * csu/Makefile (CFLAGS-initfini.s): Correctly fix moving function definition. Patch by Zack Weinberg <zack@rabi.phys.columbia.edu>. * stdlib/strtod.c: Handle numbers like 0.0e10000 correctly which produce ±0.0. Reported by Joe Keane <jgk@jgk.org>. * sysdeps/libm-ieee754/s_ceill.c: Fix typos. * sysdeps/libm-ieee754/s_llrint.c: Correct code, it never worked. 1997-11-06 07:00 H.J. Lu <hjl@gnu.ai.mit.edu> * sysdeps/unix/sysv/i386/i686/time.S: Removed. 1997-11-08 14:07 Thorsten Kukuk <kukuk@vt.uni-paderborn.de> * nis/libnsl.map: Add __do_niscall2 for nis_cachemgr. * nis/nis_call.c: Set UDP resend timeout correct. * nis/nss_compat/compat-grp.c: Rewritten to make it faster. * nis/nss_compat/compat-pwd.c: Likewise. * nis/nss_compat/compat-spwd.c: Likewise. * nis/ypclnt.c: Fix UDP resend timeout, fix yp_bind/do_ypcall interaction. * inet/protocols/routed.h: Include sys/socket.h. * inet/protocols/talkd.h: Likewise. * inet/protocols/timed.h: Include rpc/types.h. * sunrpc/rpc/pmap_clnt.h: Include rpc/clnt.h. 1997-11-06 01:39 Ulrich Drepper <drepper@cygnus.com> * Makerules (libc.so): Add missing closing brace. 1997-11-05 Brendan Kehoe <brendan@lisa.cygnus.com> * libio.h (__P): Name its arg `p' instead of `params'. This was added solely to work around problems with the definition of __P in the Solaris math.h header.
Diffstat (limited to 'db2/btree/bt_put.c')
-rw-r--r--db2/btree/bt_put.c461
1 files changed, 296 insertions, 165 deletions
diff --git a/db2/btree/bt_put.c b/db2/btree/bt_put.c
index af09f76d41..b3d775bb0f 100644
--- a/db2/btree/bt_put.c
+++ b/db2/btree/bt_put.c
@@ -47,7 +47,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)bt_put.c 10.25 (Sleepycat) 9/17/97";
+static const char sccsid[] = "@(#)bt_put.c 10.31 (Sleepycat) 10/26/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -66,7 +66,10 @@ static const char sccsid[] = "@(#)bt_put.c 10.25 (Sleepycat) 9/17/97";
static int __bam_fixed __P((BTREE *, DBT *));
static int __bam_lookup __P((DB *, DBT *, int *));
static int __bam_ndup __P((DB *, PAGE *, u_int32_t));
-static int __bam_partial __P((DB *, DBT *, PAGE *, u_int32_t));
+static int __bam_ovput __P((DB *, PAGE *, u_int32_t, DBT *));
+static int __bam_partial __P((DB *, DBT *, PAGE *, u_int32_t, u_int32_t));
+static u_int32_t
+ __bam_partsize __P((DB *, DBT *, PAGE *, u_int32_t));
/*
* __bam_put --
@@ -334,21 +337,6 @@ slow: return (__bam_search(dbp, key, S_INSERT, 1, NULL, exactp));
}
/*
- * OVPUT --
- * Copy an overflow item onto a page.
- */
-#undef OVPUT
-#define OVPUT(h, indx, bo) do { \
- DBT __hdr; \
- memset(&__hdr, 0, sizeof(__hdr)); \
- __hdr.data = &bo; \
- __hdr.size = BOVERFLOW_SIZE; \
- if ((ret = __db_pitem(dbp, \
- h, indx, BOVERFLOW_SIZE, &__hdr, NULL)) != 0) \
- return (ret); \
-} while (0)
-
-/*
* __bam_iitem --
* Insert an item into the tree.
*
@@ -365,19 +353,18 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
{
BTREE *t;
BKEYDATA *bk;
- BOVERFLOW kbo, dbo;
DBT tdbt;
PAGE *h;
db_indx_t indx;
- u_int32_t have_bytes, need_bytes, needed;
- int bigkey, bigdata, dcopy, dupadjust, ret;
+ u_int32_t data_size, have_bytes, need_bytes, needed;
+ int bigkey, bigdata, dupadjust, replace, ret;
t = dbp->internal;
h = *hp;
indx = *indxp;
- dupadjust = 0;
bk = NULL; /* XXX: Shut the compiler up. */
+ dupadjust = replace = 0;
/*
* If it's a page of duplicates, call the common code to do the work.
@@ -385,7 +372,7 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
* !!!
* Here's where the hp and indxp are important. The duplicate code
* may decide to rework/rearrange the pages and indices we're using,
- * so the caller must understand that the stack has to change.
+ * so the caller must understand that the page stack may change.
*/
if (TYPE(h) == P_DUPLICATE) {
/* Adjust the index for the new item if it's a DB_AFTER op. */
@@ -401,24 +388,7 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
return (__db_dput(dbp, data, hp, indxp, __bam_new));
}
- /*
- * XXX
- * Handle partial puts.
- *
- * This is truly awful from a performance standput. We don't optimize
- * for partial puts at all, we delete the record and add it back in,
- * regardless of size or if we're simply overwriting current data.
- * The hash access method does this a lot better than we do, and we're
- * eventually going to have to fix it.
- */
- if (F_ISSET(data, DB_DBT_PARTIAL)) {
- tdbt = *data;
- if ((ret = __bam_partial(dbp, &tdbt, h, indx)) != 0)
- return (ret);
- data = &tdbt;
- }
-
- /* If it's a short fixed-length record, fix it up. */
+ /* Handle fixed-length records: build the real record. */
if (F_ISSET(dbp, DB_RE_FIXEDLEN) && data->size != t->bt_recno->re_len) {
tdbt = *data;
if ((ret = __bam_fixed(t, &tdbt)) != 0)
@@ -427,30 +397,15 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
}
/*
- * If the key or data item won't fit on a page, store it in the
- * overflow pages.
- *
- * !!!
- * From this point on, we have to recover the allocated overflow
- * pages on error.
+ * Figure out how much space the data will take, including if it's a
+ * partial record. If either of the key or data items won't fit on
+ * a page, we'll have to store them on overflow pages.
*/
- bigkey = bigdata = 0;
- if (LF_ISSET(BI_NEWKEY) && key->size > t->bt_ovflsize) {
- B_TSET(kbo.type, B_OVERFLOW, 0);
- kbo.tlen = key->size;
- if ((ret = __db_poff(dbp, key, &kbo.pgno, __bam_new)) != 0)
- goto err;
- bigkey = 1;
- }
- if (data->size > t->bt_ovflsize) {
- B_TSET(dbo.type, B_OVERFLOW, 0);
- dbo.tlen = data->size;
- if ((ret = __db_poff(dbp, data, &dbo.pgno, __bam_new)) != 0)
- goto err;
- bigdata = 1;
- }
+ bigkey = LF_ISSET(BI_NEWKEY) && key->size > t->bt_ovflsize;
+ data_size = F_ISSET(data, DB_DBT_PARTIAL) ?
+ __bam_partsize(dbp, data, h, indx) : data->size;
+ bigdata = data_size > t->bt_ovflsize;
- dcopy = 0;
needed = 0;
if (LF_ISSET(BI_NEWKEY)) {
/* If BI_NEWKEY is set we're adding a new key and data pair. */
@@ -461,7 +416,7 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
if (bigdata)
needed += BOVERFLOW_PSIZE;
else
- needed += BKEYDATA_PSIZE(data->size);
+ needed += BKEYDATA_PSIZE(data_size);
} else {
/*
* We're either overwriting the data item of a key/data pair
@@ -482,16 +437,8 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
if (bigdata)
need_bytes += BOVERFLOW_PSIZE;
else
- need_bytes += BKEYDATA_PSIZE(data->size);
+ need_bytes += BKEYDATA_PSIZE(data_size);
- /*
- * If we're overwriting a data item, we copy it if it's not a
- * special record type and it's the same size (including any
- * alignment) and do a delete/insert otherwise.
- */
- if (op == DB_CURRENT && !bigdata &&
- B_TYPE(bk->type) == B_KEYDATA && have_bytes == need_bytes)
- dcopy = 1;
if (have_bytes < need_bytes)
needed += need_bytes - have_bytes;
}
@@ -505,9 +452,15 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
* check in the btree split code, so we don't undo it there!?!?
*/
if (P_FREESPACE(h) < needed ||
- (t->bt_maxkey != 0 && NUM_ENT(h) > t->bt_maxkey)) {
- ret = DB_NEEDSPLIT;
- goto err;
+ (t->bt_maxkey != 0 && NUM_ENT(h) > t->bt_maxkey))
+ return (DB_NEEDSPLIT);
+
+ /* Handle partial puts: build the real record. */
+ if (F_ISSET(data, DB_DBT_PARTIAL)) {
+ tdbt = *data;
+ if ((ret = __bam_partial(dbp, &tdbt, h, indx, data_size)) != 0)
+ return (ret);
+ data = &tdbt;
}
/*
@@ -515,10 +468,10 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
*
* 1. Append a new key/data pair.
* 2. Insert a new key/data pair.
- * 3. Copy the data item.
- * 4. Delete/insert the data item.
- * 5. Append a new data item.
- * 6. Insert a new data item.
+ * 3. Append a new data item (a new duplicate).
+ * 4. Insert a new data item (a new duplicate).
+ * 5. Overflow item: delete and re-add the data item.
+ * 6. Replace the data item.
*/
if (LF_ISSET(BI_NEWKEY)) {
switch (op) {
@@ -533,42 +486,17 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
}
/* Add the key. */
- if (bigkey)
- OVPUT(h, indx, kbo);
- else {
- DBT __data;
- memset(&__data, 0, sizeof(__data));
- __data.data = key->data;
- __data.size = key->size;
+ if (bigkey) {
+ if ((ret = __bam_ovput(dbp, h, indx, key)) != 0)
+ return (ret);
+ } else
if ((ret = __db_pitem(dbp, h, indx,
- BKEYDATA_SIZE(key->size), NULL, &__data)) != 0)
- goto err;
- }
+ BKEYDATA_SIZE(key->size), NULL, key)) != 0)
+ return (ret);
++indx;
} else {
switch (op) {
- case DB_CURRENT: /* 3. Copy the data item. */
- /*
- * If we're not logging and it's possible, overwrite
- * the current item.
- *
- * XXX
- * We should add a separate logging message so that
- * we can do this anytime it's possible, including
- * for partial record puts.
- */
- if (dcopy && !DB_LOGGING(dbp)) {
- bk->len = data->size;
- memcpy(bk->data, data->data, data->size);
- goto done;
- }
- /* 4. Delete/insert the data item. */
- if (TYPE(h) == P_LBTREE)
- ++indx;
- if ((ret = __bam_ditem(dbp, h, indx)) != 0)
- goto err;
- break;
- case DB_AFTER: /* 5. Append a new data item. */
+ case DB_AFTER: /* 3. Append a new data item. */
if (TYPE(h) == P_LBTREE) {
/*
* Adjust the cursor and copy in the key for
@@ -576,7 +504,7 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
*/
if ((ret = __bam_adjindx(dbp,
h, indx + P_INDX, indx, 1)) != 0)
- goto err;
+ return (ret);
indx += 3;
dupadjust = 1;
@@ -589,7 +517,7 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
*indxp += 1;
}
break;
- case DB_BEFORE: /* 6. Insert a new data item. */
+ case DB_BEFORE: /* 4. Insert a new data item. */
if (TYPE(h) == P_LBTREE) {
/*
* Adjust the cursor and copy in the key for
@@ -597,43 +525,62 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
*/
if ((ret =
__bam_adjindx(dbp, h, indx, indx, 1)) != 0)
- goto err;
+ return (ret);
++indx;
dupadjust = 1;
} else
__bam_ca_di(dbp, h->pgno, indx, 1);
break;
+ case DB_CURRENT:
+ if (TYPE(h) == P_LBTREE)
+ ++indx;
+
+ /*
+ * 5. Delete/re-add the data item.
+ *
+ * If we're dealing with offpage items, we have to
+ * delete and then re-add the item.
+ */
+ if (bigdata || B_TYPE(bk->type) == B_OVERFLOW) {
+ if ((ret = __bam_ditem(dbp, h, indx)) != 0)
+ return (ret);
+ break;
+ }
+
+ /* 6. Replace the data item. */
+ replace = 1;
+ break;
default:
abort();
}
}
/* Add the data. */
- if (bigdata)
- OVPUT(h, indx, dbo);
- else {
+ if (bigdata) {
+ if ((ret = __bam_ovput(dbp, h, indx, data)) != 0)
+ return (ret);
+ } else {
BKEYDATA __bk;
- DBT __hdr, __data;
- memset(&__data, 0, sizeof(__data));
- __data.data = data->data;
- __data.size = data->size;
+ DBT __hdr;
if (LF_ISSET(BI_DELETED)) {
B_TSET(__bk.type, B_KEYDATA, 1);
- __bk.len = __data.size;
+ __bk.len = data->size;
__hdr.data = &__bk;
__hdr.size = SSZA(BKEYDATA, data);
ret = __db_pitem(dbp, h, indx,
- BKEYDATA_SIZE(__data.size), &__hdr, &__data);
- } else
+ BKEYDATA_SIZE(data->size), &__hdr, data);
+ } else if (replace)
+ ret = __bam_ritem(dbp, h, indx, data);
+ else
ret = __db_pitem(dbp, h, indx,
- BKEYDATA_SIZE(data->size), NULL, &__data);
+ BKEYDATA_SIZE(data->size), NULL, data);
if (ret != 0)
- goto err;
+ return (ret);
}
-done: ++t->lstat.bt_added;
+ ++t->lstat.bt_added;
ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY);
@@ -645,22 +592,206 @@ done: ++t->lstat.bt_added;
if (dupadjust && P_FREESPACE(h) <= dbp->pgsize / 2) {
--indx;
if ((ret = __bam_ndup(dbp, h, indx)) != 0)
- goto err;
+ return (ret);
}
if (t->bt_recno != NULL)
F_SET(t->bt_recno, RECNO_MODIFIED);
- if (0) {
-err: if (bigkey)
- (void)__db_doff(dbp, kbo.pgno, __bam_free);
- if (bigdata)
- (void)__db_doff(dbp, dbo.pgno, __bam_free);
- }
return (ret);
}
/*
+ * __bam_partsize --
+ * Figure out how much space a partial data item is in total.
+ */
+static u_int32_t
+__bam_partsize(dbp, data, h, indx)
+ DB *dbp;
+ DBT *data;
+ PAGE *h;
+ u_int32_t indx;
+{
+ BKEYDATA *bk;
+ u_int32_t nbytes;
+
+ /*
+ * Figure out how much total space we'll need. If the record doesn't
+ * already exist, it's simply the data we're provided.
+ */
+ if (indx >= NUM_ENT(h))
+ return (data->doff + data->size);
+
+ /*
+ * Otherwise, it's the data provided plus any already existing data
+ * that we're not replacing.
+ */
+ bk = GET_BKEYDATA(h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0));
+ nbytes =
+ B_TYPE(bk->type) == B_OVERFLOW ? ((BOVERFLOW *)bk)->tlen : bk->len;
+
+ /*
+ * There are really two cases here:
+ *
+ * Case 1: We are replacing some bytes that do not exist (i.e., they
+ * are past the end of the record). In this case the number of bytes
+ * we are replacing is irrelevant and all we care about is how many
+ * bytes we are going to add from offset. So, the new record length
+ * is going to be the size of the new bytes (size) plus wherever those
+ * new bytes begin (doff).
+ *
+ * Case 2: All the bytes we are replacing exist. Therefore, the new
+ * size is the oldsize (nbytes) minus the bytes we are replacing (dlen)
+ * plus the bytes we are adding (size).
+ */
+ if (nbytes < data->doff + data->dlen) /* Case 1 */
+ return (data->doff + data->size);
+
+ return (nbytes + data->size - data->dlen); /* Case 2 */
+}
+
+/*
+ * OVPUT --
+ * Copy an overflow item onto a page.
+ */
+#undef OVPUT
+#define OVPUT(h, indx, bo) do { \
+ DBT __hdr; \
+ memset(&__hdr, 0, sizeof(__hdr)); \
+ __hdr.data = &bo; \
+ __hdr.size = BOVERFLOW_SIZE; \
+ if ((ret = __db_pitem(dbp, \
+ h, indx, BOVERFLOW_SIZE, &__hdr, NULL)) != 0) \
+ return (ret); \
+} while (0)
+
+/*
+ * __bam_ovput --
+ * Build an overflow item and put it on the page.
+ */
+static int
+__bam_ovput(dbp, h, indx, item)
+ DB *dbp;
+ PAGE *h;
+ u_int32_t indx;
+ DBT *item;
+{
+ BOVERFLOW bo;
+ int ret;
+
+ B_TSET(bo.type, B_OVERFLOW, 0);
+ bo.tlen = item->size;
+ if ((ret = __db_poff(dbp, item, &bo.pgno, __bam_new)) != 0)
+ return (ret);
+
+ OVPUT(h, indx, bo);
+
+ return (0);
+}
+
+/*
+ * __bam_ritem --
+ * Replace an item on a page.
+ *
+ * PUBLIC: int __bam_ritem __P((DB *, PAGE *, u_int32_t, DBT *));
+ */
+int
+__bam_ritem(dbp, h, indx, data)
+ DB *dbp;
+ PAGE *h;
+ u_int32_t indx;
+ DBT *data;
+{
+ BKEYDATA *bk;
+ DBT orig, repl;
+ db_indx_t lo, ln, min, off, prefix, suffix;
+ int32_t nbytes;
+ int cnt, ret;
+ u_int8_t *p, *t;
+
+ /*
+ * Replace a single item onto a page. The logic figuring out where
+ * to insert and whether it fits is handled in the caller. All we do
+ * here is manage the page shuffling.
+ */
+ bk = GET_BKEYDATA(h, indx);
+
+ /* Log the change. */
+ if (DB_LOGGING(dbp)) {
+ /*
+ * We might as well check to see if the two data items share
+ * a common prefix and suffix -- it can save us a lot of log
+ * message if they're large.
+ */
+ min = data->size < bk->len ? data->size : bk->len;
+ for (prefix = 0,
+ p = bk->data, t = data->data;
+ prefix < min && *p == *t; ++prefix, ++p, ++t)
+ ;
+
+ min -= prefix;
+ for (suffix = 0,
+ p = (u_int8_t *)bk->data + bk->len - 1,
+ t = (u_int8_t *)data->data + data->size - 1;
+ suffix < min && *p == *t; ++suffix, --p, --t)
+ ;
+
+ /* We only log the parts of the keys that have changed. */
+ orig.data = (u_int8_t *)bk->data + prefix;
+ orig.size = bk->len - (prefix + suffix);
+ repl.data = (u_int8_t *)data->data + prefix;
+ repl.size = data->size - (prefix + suffix);
+ if ((ret = __bam_repl_log(dbp->dbenv->lg_info, dbp->txn,
+ &LSN(h), 0, dbp->log_fileid, PGNO(h), &LSN(h),
+ (u_int32_t)indx, (u_int32_t)B_DISSET(bk->type),
+ &orig, &repl, (u_int32_t)prefix, (u_int32_t)suffix)) != 0)
+ return (ret);
+ }
+
+ /*
+ * Set references to the first in-use byte on the page and the
+ * first byte of the item being replaced.
+ */
+ p = (u_int8_t *)h + HOFFSET(h);
+ t = (u_int8_t *)bk;
+
+ /*
+ * If the entry is growing in size, shift the beginning of the data
+ * part of the page down. If the entry is shrinking in size, shift
+ * the beginning of the data part of the page up. Use memmove(3),
+ * the regions overlap.
+ */
+ lo = BKEYDATA_SIZE(bk->len);
+ ln = BKEYDATA_SIZE(data->size);
+ if (lo != ln) {
+ nbytes = lo - ln; /* Signed difference. */
+ if (p == t) /* First index is fast. */
+ h->inp[indx] += nbytes;
+ else { /* Else, shift the page. */
+ memmove(p + nbytes, p, t - p);
+
+ /* Adjust the indices' offsets. */
+ off = h->inp[indx];
+ for (cnt = 0; cnt < NUM_ENT(h); ++cnt)
+ if (h->inp[cnt] <= off)
+ h->inp[cnt] += nbytes;
+ }
+
+ /* Clean up the page and adjust the item's reference. */
+ HOFFSET(h) += nbytes;
+ t += nbytes;
+ }
+
+ /* Copy the new item onto the page. */
+ bk = (BKEYDATA *)t;
+ B_TSET(bk->type, B_KEYDATA, 0);
+ bk->len = data->size;
+ memcpy(bk->data, data->data, data->size);
+
+ return (0);
+}
+
+/*
* __bam_ndup --
* Check to see if the duplicate set at indx should have its own page.
* If it should, create it.
@@ -766,16 +897,21 @@ __bam_fixed(t, dbt)
rp = t->bt_recno;
/*
- * If using fixed-length records, and the record is long, return
- * EINVAL. If it's short, pad it out. Use the record data return
- * memory, it's only short-term.
+ * If database contains fixed-length records, and the record is long,
+ * return EINVAL.
*/
if (dbt->size > rp->re_len)
return (EINVAL);
+
+ /*
+ * The caller checked to see if it was just right, so we know it's
+ * short. Pad it out. We use the record data return memory, it's
+ * only a short-term use.
+ */
if (t->bt_rdata.ulen < rp->re_len) {
t->bt_rdata.data = t->bt_rdata.data == NULL ?
- (void *)malloc(rp->re_len) :
- (void *)realloc(t->bt_rdata.data, rp->re_len);
+ (void *)__db_malloc(rp->re_len) :
+ (void *)__db_realloc(t->bt_rdata.data, rp->re_len);
if (t->bt_rdata.data == NULL) {
t->bt_rdata.ulen = 0;
return (ENOMEM);
@@ -786,12 +922,16 @@ __bam_fixed(t, dbt)
memset((u_int8_t *)t->bt_rdata.data + dbt->size,
rp->re_pad, rp->re_len - dbt->size);
- /* Set the DBT to reference our new record. */
+ /*
+ * Clean up our flags and other information just in case, and
+ * change the caller's DBT to reference our created record.
+ */
t->bt_rdata.size = rp->re_len;
t->bt_rdata.dlen = 0;
t->bt_rdata.doff = 0;
t->bt_rdata.flags = 0;
*dbt = t->bt_rdata;
+
return (0);
}
@@ -800,47 +940,28 @@ __bam_fixed(t, dbt)
* Build the real record for a partial put.
*/
static int
-__bam_partial(dbp, dbt, h, indx)
+__bam_partial(dbp, dbt, h, indx, nbytes)
DB *dbp;
DBT *dbt;
PAGE *h;
- u_int32_t indx;
+ u_int32_t indx, nbytes;
{
BTREE *t;
BKEYDATA *bk, tbk;
BOVERFLOW *bo;
DBT copy;
- u_int32_t len, nbytes, tlen;
+ u_int32_t len, tlen;
int ret;
u_int8_t *p;
bo = NULL; /* XXX: Shut the compiler up. */
t = dbp->internal;
- /*
- * Figure out how much total space we'll need. Worst case is where
- * the record is 0 bytes long, in which case doff causes the record
- * to extend, and the put data is appended to it.
- */
- if (indx < NUM_ENT(h)) {
- bk = GET_BKEYDATA(h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0));
- if (B_TYPE(bk->type) == B_OVERFLOW) {
- bo = (BOVERFLOW *)bk;
- nbytes = bo->tlen;
- } else
- nbytes = bk->len;
- } else {
- bk = &tbk;
- B_TSET(bk->type, B_KEYDATA, 0);
- nbytes = bk->len = 0;
- }
- nbytes += dbt->doff + dbt->size + dbt->dlen;
-
- /* Allocate the space. */
+ /* We use the record data return memory, it's only a short-term use. */
if (t->bt_rdata.ulen < nbytes) {
t->bt_rdata.data = t->bt_rdata.data == NULL ?
- (void *)malloc(nbytes) :
- (void *)realloc(t->bt_rdata.data, nbytes);
+ (void *)__db_malloc(nbytes) :
+ (void *)__db_realloc(t->bt_rdata.data, nbytes);
if (t->bt_rdata.data == NULL) {
t->bt_rdata.ulen = 0;
return (ENOMEM);
@@ -848,6 +969,16 @@ __bam_partial(dbp, dbt, h, indx)
t->bt_rdata.ulen = nbytes;
}
+ /* Find the current record. */
+ if (indx < NUM_ENT(h)) {
+ bk = GET_BKEYDATA(h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0));
+ bo = (BOVERFLOW *)bk;
+ } else {
+ bk = &tbk;
+ B_TSET(bk->type, B_KEYDATA, 0);
+ bk->len = 0;
+ }
+
/* We use nul bytes for extending the record, get it over with. */
memset(t->bt_rdata.data, 0, nbytes);