/*- * See the file LICENSE for redistribution information. * * Copyright (c) 1996, 1997 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint static const char sccsid[] = "@(#)mp_fopen.c 10.24 (Sleepycat) 8/20/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include #include #include #include #include #include #include #endif #include "db_int.h" #include "shqueue.h" #include "db_shash.h" #include "mp.h" #include "common_ext.h" static int __memp_mf_close __P((DB_MPOOL *, DB_MPOOLFILE *)); static int __memp_mf_open __P((DB_MPOOL *, DB_MPOOLFILE *, int, int, size_t, int, DBT *, u_int8_t *, int, MPOOLFILE **)); /* * memp_fopen -- * Open a backing file for the memory pool. */ int memp_fopen(dbmp, path, ftype, flags, mode, pagesize, lsn_offset, pgcookie, fileid, retp) DB_MPOOL *dbmp; const char *path; int ftype, flags, mode, lsn_offset; size_t pagesize; DBT *pgcookie; u_int8_t *fileid; DB_MPOOLFILE **retp; { int ret; /* Validate arguments. */ if ((ret = __db_fchk(dbmp->dbenv, "memp_fopen", flags, DB_CREATE | DB_NOMMAP | DB_RDONLY)) != 0) return (ret); return (__memp_fopen(dbmp, path, ftype, flags, mode, pagesize, lsn_offset, pgcookie, fileid, 1, retp)); } /* * __memp_fopen -- * Open a backing file for the memory pool; internal version. * * PUBLIC: int __memp_fopen __P((DB_MPOOL *, const char *, int, int, * PUBLIC: int, size_t, int, DBT *, u_int8_t *, int, DB_MPOOLFILE **)); */ int __memp_fopen(dbmp, path, ftype, flags, mode, pagesize, lsn_offset, pgcookie, fileid, needlock, retp) DB_MPOOL *dbmp; const char *path; int ftype, flags, mode, lsn_offset, needlock; size_t pagesize; DBT *pgcookie; u_int8_t *fileid; DB_MPOOLFILE **retp; { DB_ENV *dbenv; DB_MPOOLFILE *dbmfp; MPOOLFILE *mfp; off_t size; int ret; dbenv = dbmp->dbenv; ret = 0; /* Require a non-zero pagesize. */ if (pagesize == 0) { __db_err(dbenv, "memp_fopen: pagesize not specified"); return (EINVAL); } /* Allocate and initialize the per-process structure. */ if ((dbmfp = (DB_MPOOLFILE *)calloc(1, sizeof(DB_MPOOLFILE))) == NULL) { __db_err(dbenv, "%s: %s", path == NULL ? TEMPORARY : path, strerror(ENOMEM)); return (ENOMEM); } LOCKINIT(dbmp, &dbmfp->mutex); dbmfp->dbmp = dbmp; dbmfp->fd = -1; if (LF_ISSET(DB_RDONLY)) F_SET(dbmfp, MP_READONLY); if (path == NULL) { if (LF_ISSET(DB_RDONLY)) { __db_err(dbenv, "memp_fopen: temporary files can't be readonly"); ret = EINVAL; goto err; } dbmfp->path = (char *) TEMPORARY; F_SET(dbmfp, MP_PATH_TEMP); } else { /* Calculate the real name for this file. */ if ((ret = __db_appname(dbenv, DB_APP_DATA, NULL, path, NULL, &dbmfp->path)) != 0) goto err; F_SET(dbmfp, MP_PATH_ALLOC); /* Open the file. */ if ((ret = __db_fdopen(dbmfp->path, LF_ISSET(DB_CREATE | DB_RDONLY), DB_CREATE | DB_RDONLY, mode, &dbmfp->fd)) != 0) { __db_err(dbenv, "%s: %s", dbmfp->path, strerror(ret)); goto err; } /* Don't permit files that aren't a multiple of the pagesize. */ if ((ret = __db_stat(dbenv, dbmfp->path, dbmfp->fd, &size, NULL)) != 0) goto err; if (size % pagesize) { __db_err(dbenv, "%s: file size not a multiple of the pagesize", dbmfp->path); ret = EINVAL; goto err; } } /* Find/allocate the shared file object. */ if (needlock) LOCKREGION(dbmp); ret = __memp_mf_open(dbmp, dbmfp, ftype, F_ISSET(dbmfp, MP_READONLY), pagesize, lsn_offset, pgcookie, fileid, F_ISSET(dbmfp, MP_PATH_TEMP), &mfp); if (needlock) UNLOCKREGION(dbmp); if (ret != 0) goto err; dbmfp->mfp = mfp; /* * If a file: * * + is read-only * + doesn't require any pgin/pgout support * + is less than mp_mmapsize bytes in size. * + and the DB_NOMMAP flag wasn't set * * we can mmap it instead of reading/writing buffers. Don't do error * checking based on the mmap call failure. We want to do normal I/O * on the file if the reason we failed was because the file was on an * NFS mounted partition, and we can fail in buffer I/O just as easily * as here. * * XXX * We'd like to test to see if the file is too big to mmap. Since we * don't know what size or type off_t's or size_t's are, or the largest * unsigned integral type is, or what random insanity the local C * compiler will perpetrate, doing the comparison in a portable way is * flatly impossible. Hope that mmap fails if the file is too large. */ #define DB_MAXMMAPSIZE (10 * 1024 * 1024) /* 10 Mb. */ dbmfp->addr = NULL; mfp->can_mmap = F_ISSET(dbmfp, MP_READONLY) && ftype == 0 && !LF_ISSET(DB_NOMMAP) && path != NULL && size <= (dbenv == NULL || dbenv->mp_mmapsize == 0 ? DB_MAXMMAPSIZE : (off_t)dbenv->mp_mmapsize); if (mfp->can_mmap) { dbmfp->len = size; if (__db_mmap(dbmfp->fd, dbmfp->len, 1, 1, &dbmfp->addr) != 0) { mfp->can_mmap = 0; dbmfp->addr = NULL; } } LOCKHANDLE(dbmp, &dbmp->mutex); TAILQ_INSERT_TAIL(&dbmp->dbmfq, dbmfp, q); UNLOCKHANDLE(dbmp, &dbmp->mutex); *retp = dbmfp; return (0); err: if (F_ISSET(dbmfp, MP_PATH_ALLOC)) FREES(dbmfp->path); if (dbmfp->fd != -1) (void)__db_close(dbmfp->fd); if (dbmfp != NULL) FREE(dbmfp, sizeof(DB_MPOOLFILE)); return (ret); } /* * __memp_mf_open -- * Open an MPOOLFILE. */ static int __memp_mf_open(dbmp, dbmfp, ftype, readonly, pagesize, lsn_offset, pgcookie, fileid, istemp, retp) DB_MPOOL *dbmp; DB_MPOOLFILE *dbmfp; int ftype, readonly, lsn_offset, istemp; size_t pagesize; DBT *pgcookie; u_int8_t *fileid; MPOOLFILE **retp; { MPOOLFILE *mfp; int ret; u_int8_t idbuf[DB_FILE_ID_LEN]; void *p; /* Temporary files can't match previous files. */ if (istemp) goto alloc; /* * Get the file id if we weren't give one. Generated file id's don't * use timestamps, otherwise there'd be no chance of anyone joining * the party. */ if (fileid == NULL) { if ((ret = __db_fileid(dbmp->dbenv, dbmfp->path, 0, idbuf)) != 0) return (ret); fileid = idbuf; } /* Walk the list of MPOOLFILE's, looking for a matching file. */ for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile); mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) if (!memcmp(fileid, ADDR(dbmp, mfp->fileid_off), DB_FILE_ID_LEN)) { if (ftype != mfp->ftype || pagesize != mfp->stat.st_pagesize) { __db_err(dbmp->dbenv, "%s: ftype or pagesize changed", dbmfp->path); ret = EINVAL; mfp = NULL; goto ret1; } /* * Found it: increment the reference count and update * the mmap-able status. */ ++mfp->ref; if (!readonly) mfp->can_mmap = 0; goto ret1; } /* Allocate a new MPOOLFILE. */ alloc: if ((ret = __memp_ralloc(dbmp, sizeof(MPOOLFILE), NULL, &mfp)) != 0) goto ret1; /* Initialize the structure. */ memset(mfp, 0, sizeof(MPOOLFILE)); mfp->ref = 1; mfp->ftype = ftype; mfp->lsn_off = lsn_offset; mfp->stat.st_pagesize = pagesize; /* Copy the file path into shared memory. */ if ((ret = __memp_ralloc(dbmp, strlen(dbmfp->path) + 1, &mfp->path_off, &p)) != 0) goto err; memcpy(p, dbmfp->path, strlen(dbmfp->path) + 1); /* Copy the file identification string into shared memory. */ if (istemp) mfp->fileid_off = 0; else { if ((ret = __memp_ralloc(dbmp, DB_FILE_ID_LEN, &mfp->fileid_off, &p)) != 0) goto err; memcpy(p, fileid, DB_FILE_ID_LEN); } /* Copy the page cookie into shared memory. */ if (pgcookie == NULL || pgcookie->size == 0) { mfp->pgcookie_len = 0; mfp->pgcookie_off = 0; } else { if ((ret = __memp_ralloc(dbmp, pgcookie->size, &mfp->pgcookie_off, &p)) != 0) goto err; memcpy(p, pgcookie->data, pgcookie->size); mfp->pgcookie_len = pgcookie->size; } /* Prepend the MPOOLFILE to the list of MPOOLFILE's. */ SH_TAILQ_INSERT_HEAD(&dbmp->mp->mpfq, mfp, q, __mpoolfile); if (0) { err: if (mfp->path_off != 0) __db_shalloc_free(dbmp->addr, ADDR(dbmp, mfp->path_off)); if (!istemp) __db_shalloc_free(dbmp->addr, ADDR(dbmp, mfp->fileid_off)); if (mfp != NULL) __db_shalloc_free(dbmp->addr, mfp); mfp = NULL; } ret1: *retp = mfp; return (0); } /* * memp_fclose -- * Close a backing file for the memory pool. */ int memp_fclose(dbmfp) DB_MPOOLFILE *dbmfp; { DB_MPOOL *dbmp; int ret, t_ret; dbmp = dbmfp->dbmp; ret = 0; /* Complain if pinned blocks never returned. */ if (dbmfp->pinref != 0) __db_err(dbmp->dbenv, "%s: close: %lu blocks left pinned", dbmfp->path, (u_long)dbmfp->pinref); /* Remove the DB_MPOOLFILE structure from the list. */ LOCKHANDLE(dbmp, &dbmp->mutex); TAILQ_REMOVE(&dbmp->dbmfq, dbmfp, q); UNLOCKHANDLE(dbmp, &dbmp->mutex); /* Close the underlying MPOOLFILE. */ (void)__memp_mf_close(dbmp, dbmfp); /* Discard any mmap information. */ if (dbmfp->addr != NULL && (ret = __db_munmap(dbmfp->addr, dbmfp->len)) != 0) __db_err(dbmp->dbenv, "%s: %s", dbmfp->path, strerror(ret)); /* Close the file; temporary files may not yet have been created. */ if (dbmfp->fd != -1 && (t_ret = __db_close(dbmfp->fd)) != 0) { __db_err(dbmp->dbenv, "%s: %s", dbmfp->path, strerror(t_ret)); if (ret != 0) t_ret = ret; } /* Potentially allocated path. */ if (F_ISSET(dbmfp, MP_PATH_ALLOC)) FREES(dbmfp->path); /* Free the DB_MPOOLFILE structure. */ FREE(dbmfp, sizeof(DB_MPOOLFILE)); return (ret); } /* * __memp_mf_close -- * Close down an MPOOLFILE. */ static int __memp_mf_close(dbmp, dbmfp) DB_MPOOL *dbmp; DB_MPOOLFILE *dbmfp; { BH *bhp, *nbhp; MPOOL *mp; MPOOLFILE *mfp; size_t mf_offset; mp = dbmp->mp; mfp = dbmfp->mfp; LOCKREGION(dbmp); /* If more than a single reference, simply decrement. */ if (mfp->ref > 1) { --mfp->ref; goto ret1; } /* * Move any BH's held by the file to the free list. We don't free the * memory itself because we may be discarding the memory pool, and it's * fairly expensive to reintegrate the buffers back into the region for * no purpose. */ mf_offset = OFFSET(dbmp, mfp); for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh); bhp != NULL; bhp = nbhp) { nbhp = SH_TAILQ_NEXT(bhp, q, __bh); #ifdef DEBUG_NO_DIRTY /* Complain if we find any blocks that were left dirty. */ if (F_ISSET(bhp, BH_DIRTY)) __db_err(dbmp->dbenv, "%s: close: pgno %lu left dirty; ref %lu", dbmfp->path, (u_long)bhp->pgno, (u_long)bhp->ref); #endif if (bhp->mf_offset == mf_offset) { __memp_bhfree(dbmp, mfp, bhp, 0); SH_TAILQ_INSERT_HEAD(&mp->bhfq, bhp, q, __bh); } } /* Delete from the list of MPOOLFILEs. */ SH_TAILQ_REMOVE(&mp->mpfq, mfp, q, __mpoolfile); /* Free the space. */ __db_shalloc_free(dbmp->addr, mfp); __db_shalloc_free(dbmp->addr, ADDR(dbmp, mfp->path_off)); if (mfp->fileid_off != 0) __db_shalloc_free(dbmp->addr, ADDR(dbmp, mfp->fileid_off)); if (mfp->pgcookie_off != 0) __db_shalloc_free(dbmp->addr, ADDR(dbmp, mfp->pgcookie_off)); ret1: UNLOCKREGION(dbmp); return (0); }