summaryrefslogtreecommitdiff
path: root/db2/common/db_region.c
diff options
context:
space:
mode:
Diffstat (limited to 'db2/common/db_region.c')
-rw-r--r--db2/common/db_region.c565
1 files changed, 565 insertions, 0 deletions
diff --git a/db2/common/db_region.c b/db2/common/db_region.c
new file mode 100644
index 0000000000..51f8f4465c
--- /dev/null
+++ b/db2/common/db_region.c
@@ -0,0 +1,565 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ * Sleepycat Software. All rights reserved.
+ */
+/*
+ * Copyright (c) 1995, 1996
+ * The President and Fellows of Harvard University. All rights reserved.
+ *
+ * This code is derived from software contributed to Harvard by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db_region.c 10.12 (Sleepycat) 7/26/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "common_ext.h"
+
+static int __db_rmap __P((DB_ENV *, int, size_t, void *));
+
+/*
+ * __db_rcreate --
+ *
+ * Common interface for creating a shared region. Handles synchronization
+ * across multiple processes.
+ *
+ * The dbenv contains the environment for this process, including naming
+ * information. The path argument represents the parameters passed to
+ * the open routines and may be either a file or a directory. If it is
+ * a directory, it must exist. If it is a file, then the file parameter
+ * must be NULL, otherwise, file is the name to be created inside the
+ * directory path.
+ *
+ * The function returns a pointer to the shared region that has been mapped
+ * into memory, NULL on error.
+ *
+ * PUBLIC: int __db_rcreate __P((DB_ENV *, APPNAME,
+ * PUBLIC: const char *, const char *, int, size_t, int *, void *));
+ */
+int
+__db_rcreate(dbenv, appname, path, file, mode, size, fdp, retp)
+ DB_ENV *dbenv;
+ APPNAME appname;
+ const char *path, *file;
+ int mode, *fdp;
+ size_t size;
+ void *retp;
+{
+ RLAYOUT *rp;
+ int fd, ret;
+ char *name;
+
+ fd = -1;
+ rp = NULL;
+
+ /*
+ * Get the filename -- note, if it's a temporary file, it will
+ * be created by the underlying temporary file creation code,
+ * so we have to check the file descriptor to be sure it's an
+ * error.
+ */
+ if ((ret = __db_appname(dbenv, appname, path, file, &fd, &name)) != 0)
+ return (ret);
+
+ /*
+ * Now open the file. We need to make sure that multiple processes
+ * that attempt to create the region at the same time are properly
+ * ordered, so we open it O_EXCL and O_CREAT so two simultaneous
+ * attempts to create the region will return failure in one of the
+ * attempts.
+ */
+ if (fd == -1 && (ret = __db_fdopen(name,
+ DB_CREATE | DB_EXCL, DB_CREATE | DB_EXCL, mode, &fd)) != 0) {
+ if (ret != EEXIST)
+ __db_err(dbenv,
+ "region create: %s: %s", name, strerror(ret));
+ goto err;
+ }
+ *fdp = fd;
+
+ /* Grow the region to the correct size. */
+ if ((ret = __db_rgrow(dbenv, fd, size)) != 0)
+ goto err;
+
+ /* Map the region in. */
+ if ((ret = __db_rmap(dbenv, fd, size, &rp)) != 0)
+ goto err;
+
+ /*
+ * Initialize the common information.
+ *
+ * !!!
+ * We have to order the region creates so that two processes don't try
+ * to simultaneously create the region and so that processes that are
+ * joining the region never see inconsistent data. We'd like to play
+ * file permissions games, but we can't because WNT filesystems won't
+ * open a file mode 0.
+ *
+ * So, the process that's creating the region always acquires the lock
+ * before the setting the version number. Any process joining always
+ * checks the version number before attempting to acquire the lock.
+ *
+ * We have to check the version number first, because if the version
+ * number has not been written, it's possible that the mutex has not
+ * been initialized in which case an attempt to get it could lead to
+ * random behavior. If the version number isn't there (the file size
+ * is too small) or it's 0, we know that the region is being created.
+ */
+ (void)__db_mutex_init(&rp->lock, MUTEX_LOCK_OFFSET(rp, &rp->lock));
+ (void)__db_mutex_lock(&rp->lock,
+ fd, dbenv == NULL ? NULL : dbenv->db_yield);
+
+ rp->refcnt = 1;
+ rp->size = size;
+ rp->flags = 0;
+ db_version(&rp->majver, &rp->minver, &rp->patch);
+
+ if (name != NULL)
+ FREES(name);
+
+ *(void **)retp = rp;
+ return (0);
+
+err: if (fd != -1) {
+ if (rp != NULL)
+ (void)__db_munmap(rp, rp->size);
+ (void)__db_unlink(name);
+ (void)__db_close(fd);
+ }
+ if (name != NULL)
+ FREES(name);
+ return (ret);
+}
+
+/*
+ * __db_ropen --
+ * Construct the name of a file, open it and map it in.
+ *
+ * PUBLIC: int __db_ropen __P((DB_ENV *,
+ * PUBLIC: APPNAME, const char *, const char *, int, int *, void *));
+ */
+int
+__db_ropen(dbenv, appname, path, file, flags, fdp, retp)
+ DB_ENV *dbenv;
+ APPNAME appname;
+ const char *path, *file;
+ int flags, *fdp;
+ void *retp;
+{
+ RLAYOUT *rp;
+ off_t size1, size2;
+ int fd, ret;
+ char *name;
+
+ fd = -1;
+ rp = NULL;
+
+ /* Get the filename. */
+ if ((ret = __db_appname(dbenv, appname, path, file, NULL, &name)) != 0)
+ return (ret);
+
+ /* Open the file. */
+ if ((ret = __db_fdopen(name, flags, DB_MUTEXDEBUG, 0, &fd)) != 0) {
+ __db_err(dbenv, "region open: %s: %s", name, strerror(ret));
+ goto err2;
+ }
+
+ *fdp = fd;
+
+ /*
+ * Map the file in. We have to do things in a strange order so that
+ * we don't get into a situation where the file was just created and
+ * isn't yet initialized. See the comment in __db_rcreate() above.
+ *
+ * XXX
+ * We'd like to test to see if the file is too big to mmap. Since we
+ * don't know what size or type off_t's or size_t's are, or the largest
+ * unsigned integral type is, or what random insanity the local C
+ * compiler will perpetrate, doing the comparison in a portable way is
+ * flatly impossible. Hope that mmap fails if the file is too large.
+ *
+ */
+ if ((ret = __db_stat(dbenv, name, fd, &size1, NULL)) != 0)
+ goto err2;
+
+ /* Check to make sure the first block has been written. */
+ if ((size_t) size1 < sizeof(RLAYOUT)) {
+ ret = EAGAIN;
+ goto err2;
+ }
+
+ /* Map in whatever is there. */
+ if ((ret = __db_rmap(dbenv, fd, size1, &rp)) != 0)
+ goto err2;
+
+ /*
+ * Check to make sure the region has been initialized. We can't just
+ * grab the lock because the lock may not have been initialized yet.
+ */
+ if (rp->majver == 0) {
+ ret = EAGAIN;
+ goto err2;
+ }
+
+ /* Get the region lock. */
+ if (!LF_ISSET(DB_MUTEXDEBUG))
+ (void)__db_mutex_lock(&rp->lock,
+ fd, dbenv == NULL ? NULL : dbenv->db_yield);
+
+ /*
+ * The file may have been half-written if we were descheduled between
+ * getting the size of the file and checking the major version. Check
+ * to make sure we got the entire file.
+ */
+ if ((ret = __db_stat(dbenv, name, fd, &size2, NULL)) != 0)
+ goto err1;
+ if (size1 != size2) {
+ ret = EAGAIN;
+ goto err1;
+ }
+
+ /* The file may have just been deleted. */
+ if (F_ISSET(rp, DB_R_DELETED)) {
+ ret = EAGAIN;
+ goto err1;
+ }
+
+ /* Increment the reference count. */
+ ++rp->refcnt;
+
+ /* Release the lock. */
+ if (!LF_ISSET(DB_MUTEXDEBUG))
+ (void)__db_mutex_unlock(&rp->lock, fd);
+
+ FREES(name);
+
+ *(void **)retp = rp;
+ return (0);
+
+err1: if (!LF_ISSET(DB_MUTEXDEBUG))
+ (void)__db_mutex_unlock(&rp->lock, fd);
+err2: if (rp != NULL)
+ (void)__db_munmap(rp, rp->size);
+ if (fd != -1)
+ (void)__db_close(fd);
+ FREES(name);
+ return (ret);
+}
+
+/*
+ * __db_rclose --
+ * Close a shared memory region.
+ *
+ * PUBLIC: int __db_rclose __P((DB_ENV *, int, void *));
+ */
+int
+__db_rclose(dbenv, fd, ptr)
+ DB_ENV *dbenv;
+ int fd;
+ void *ptr;
+{
+ RLAYOUT *rp;
+ int ret, t_ret;
+ const char *fail;
+
+ rp = ptr;
+ fail = NULL;
+
+ /* Get the lock. */
+ if ((ret = __db_mutex_lock(&rp->lock,
+ fd, dbenv == NULL ? NULL : dbenv->db_yield)) != 0) {
+ fail = "lock get";
+ goto err;
+ }
+
+ /* Decrement the reference count. */
+ --rp->refcnt;
+
+ /* Release the lock. */
+ if ((t_ret = __db_mutex_unlock(&rp->lock, fd)) != 0 && fail == NULL) {
+ ret = t_ret;
+ fail = "lock release";
+ }
+
+ /* Discard the region. */
+ if ((t_ret = __db_munmap(ptr, rp->size)) != 0 && fail == NULL) {
+ ret = t_ret;
+ fail = "munmap";
+ }
+
+ if ((t_ret = __db_close(fd)) != 0 && fail == NULL) {
+ ret = t_ret;
+ fail = "close";
+ }
+
+ if (fail == NULL)
+ return (0);
+
+err: __db_err(dbenv, "region detach: %s: %s", fail, strerror(ret));
+ return (ret);
+}
+
+/*
+ * __db_runlink --
+ * Remove a shared memory region.
+ *
+ * PUBLIC: int __db_runlink __P((DB_ENV *,
+ * PUBLIC: APPNAME, const char *, const char *, int));
+ */
+int
+__db_runlink(dbenv, appname, path, file, force)
+ DB_ENV *dbenv;
+ APPNAME appname;
+ const char *path, *file;
+ int force;
+{
+ RLAYOUT *rp;
+ int cnt, fd, ret, t_ret;
+ char *name;
+
+ rp = NULL;
+
+ /* Get the filename. */
+ if ((ret = __db_appname(dbenv, appname, path, file, NULL, &name)) != 0)
+ return (ret);
+
+ /* If the file doesn't exist, we're done. */
+ if (__db_exists(name, NULL))
+ return (0); /* XXX: ENOENT? */
+
+ /*
+ * If we're called with a force flag, try and unlink the file. This
+ * may not succeed if the file is currently open, but there's nothing
+ * we can do about that. There is a race condition between the check
+ * for existence above and the actual unlink. If someone else snuck
+ * in and removed it before we do the remove, then we might get an
+ * ENOENT error. If we get the ENOENT, we treat it as success, just
+ * as we do above.
+ */
+ if (force) {
+ if ((ret = __db_unlink(name)) != 0 && ret != ENOENT)
+ goto err1;
+ FREES(name);
+ return (0);
+ }
+
+ /* Open and lock the region. */
+ if ((ret = __db_ropen(dbenv, appname, path, file, 0, &fd, &rp)) != 0)
+ goto err1;
+ (void)__db_mutex_lock(&rp->lock,
+ fd, dbenv == NULL ? NULL : dbenv->db_yield);
+
+ /* If the region is currently being deleted, fail. */
+ if (F_ISSET(rp, DB_R_DELETED)) {
+ ret = ENOENT; /* XXX: ENOENT? */
+ goto err2;
+ }
+
+ /* If the region is currently in use by someone else, fail. */
+ if (rp->refcnt > 1) {
+ ret = EBUSY;
+ goto err2;
+ }
+
+ /* Set the delete flag. */
+ F_SET(rp, DB_R_DELETED);
+
+ /* Release the lock and close the region. */
+ (void)__db_mutex_unlock(&rp->lock, fd);
+ if ((t_ret = __db_rclose(dbenv, fd, rp)) != 0 && ret == 0)
+ goto err1;
+
+ /*
+ * Unlink the region. There's a race here -- other threads or
+ * processes might be opening the region while we're trying to
+ * remove it. They'll fail, because we've set the DELETED flag,
+ * but they could still stop us from succeeding in the unlink.
+ */
+ for (cnt = 5; cnt > 0; --cnt) {
+ if ((ret = __db_unlink(name)) == 0)
+ break;
+ (void)__db_sleep(0, 250000);
+ }
+ if (ret == 0) {
+ FREES(name);
+ return (0);
+ }
+
+ /* Not a clue. Try to clear the DB_R_DELETED flag. */
+ if ((ret = __db_ropen(dbenv, appname, path, file, 0, &fd, &rp)) != 0)
+ goto err1;
+ (void)__db_mutex_lock(&rp->lock,
+ fd, dbenv == NULL ? NULL : dbenv->db_yield);
+ F_CLR(rp, DB_R_DELETED);
+ /* FALLTHROUGH */
+
+err2: (void)__db_mutex_unlock(&rp->lock, fd);
+ (void)__db_rclose(dbenv, fd, rp);
+err1: __db_err(dbenv, "region unlink: %s: %s", name, strerror(ret));
+ FREES(name);
+ return (ret);
+}
+
+/*
+ * DB creates all regions on 4K boundaries so that we don't make the
+ * underlying VM unhappy.
+ */
+#define __DB_VMPAGESIZE (4 * 1024)
+
+/*
+ * __db_rgrow --
+ * Extend a region by a specified amount.
+ *
+ * PUBLIC: int __db_rgrow __P((DB_ENV *, int, size_t));
+ */
+int
+__db_rgrow(dbenv, fd, incr)
+ DB_ENV *dbenv;
+ int fd;
+ size_t incr;
+{
+#ifdef MMAP_INIT_NEEDED
+ size_t i;
+#endif
+ ssize_t nw;
+ int ret;
+ char buf[__DB_VMPAGESIZE];
+
+ /* Seek to the end of the region. */
+ if ((ret = __db_lseek(fd, 0, 0, 0, SEEK_END)) != 0)
+ goto err;
+
+ /* Write nuls to the new bytes. */
+ memset(buf, 0, sizeof(buf));
+
+ /*
+ * Historically, some systems required that all of the bytes of the
+ * region be written before you could mmap it and access it randomly.
+ */
+#ifdef MMAP_INIT_NEEDED
+ /* Extend the region by writing each new page. */
+ for (i = 0; i < incr; i += __DB_VMPAGESIZE) {
+ if ((ret = __db_write(fd, buf, sizeof(buf), &nw)) != 0)
+ goto err;
+ if (nw != sizeof(buf))
+ goto eio;
+ }
+#else
+ /*
+ * Extend the region by writing the last page.
+ *
+ * Round off the increment to the next page boundary.
+ */
+ incr += __DB_VMPAGESIZE - 1;
+ incr -= incr % __DB_VMPAGESIZE;
+
+ /* Write the last page, not the page after the last. */
+ if ((ret = __db_lseek(fd, 0, 0, incr - __DB_VMPAGESIZE, SEEK_CUR)) != 0)
+ goto err;
+ if ((ret = __db_write(fd, buf, sizeof(buf), &nw)) != 0)
+ goto err;
+ if (nw != sizeof(buf))
+ goto eio;
+#endif
+ return (0);
+
+eio: ret = EIO;
+err: __db_err(dbenv, "region grow: %s", strerror(ret));
+ return (ret);
+}
+
+/*
+ * __db_rremap --
+ * Unmap the old region and map in a new region of a new size. If
+ * either call fails, returns NULL, else returns the address of the
+ * new region.
+ *
+ * PUBLIC: int __db_rremap __P((DB_ENV *, void *, size_t, size_t, int, void *));
+ */
+int
+__db_rremap(dbenv, ptr, oldsize, newsize, fd, retp)
+ DB_ENV *dbenv;
+ void *ptr, *retp;
+ size_t oldsize, newsize;
+ int fd;
+{
+ int ret;
+
+ if ((ret = __db_munmap(ptr, oldsize)) != 0) {
+ __db_err(dbenv, "region remap: munmap: %s", strerror(ret));
+ return (ret);
+ }
+
+ return (__db_rmap(dbenv, fd, newsize, retp));
+}
+
+/*
+ * __db_rmap --
+ * Attach to a shared memory region.
+ */
+static int
+__db_rmap(dbenv, fd, size, retp)
+ DB_ENV *dbenv;
+ int fd;
+ size_t size;
+ void *retp;
+{
+ RLAYOUT *rp;
+ int ret;
+
+ if ((ret = __db_mmap(fd, size, 0, 0, &rp)) != 0) {
+ __db_err(dbenv, "region map: mmap %s", strerror(ret));
+ return (ret);
+ }
+ if (rp->size < size)
+ rp->size = size;
+
+ *(void **)retp = rp;
+ return (0);
+}