From 8e8a4603b5422c9145880e73b23bc4c2c4de0098 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Fri, 1 Feb 2008 11:59:09 -0800 Subject: ocfs2: Move slot map access into slot_map.c journal.c and dlmglue.c would refresh the slot map by hand. Instead, have the update and clear functions do the work inside slot_map.c. The eventual result is to make ocfs2_slot_info defined privately in slot_map.c Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/journal.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/ocfs2/journal.c') diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index f31c7e8c19c..c2e654ee703 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -1123,8 +1123,7 @@ static int ocfs2_recover_node(struct ocfs2_super *osb, /* Likewise, this would be a strange but ultimately not so * harmful place to get an error... */ - ocfs2_clear_slot(si, slot_num); - status = ocfs2_update_disk_slots(osb, si); + status = ocfs2_clear_slot(osb, slot_num); if (status < 0) mlog_errno(status); -- cgit v1.2.3 From d85b20e4b300edfd290f21fc2d790ba16d2f225b Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 1 Feb 2008 12:01:05 -0800 Subject: ocfs2: Make ocfs2_slot_info private. Just use osb_lock around the ocfs2_slot_info data. This allows us to take the ocfs2_slot_info structure private in slot_info.c. All access is now via accessors. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/journal.c | 24 ++++++++-------- fs/ocfs2/ocfs2.h | 1 + fs/ocfs2/slot_map.c | 81 +++++++++++++++++++++++++++++++++++++---------------- fs/ocfs2/slot_map.h | 25 +++-------------- 4 files changed, 74 insertions(+), 57 deletions(-) (limited to 'fs/ocfs2/journal.c') diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index c2e654ee703..ed0c6d0850d 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -1079,7 +1079,6 @@ static int ocfs2_recover_node(struct ocfs2_super *osb, { int status = 0; int slot_num; - struct ocfs2_slot_info *si = osb->slot_info; struct ocfs2_dinode *la_copy = NULL; struct ocfs2_dinode *tl_copy = NULL; @@ -1092,8 +1091,8 @@ static int ocfs2_recover_node(struct ocfs2_super *osb, * case we should've called ocfs2_journal_load instead. */ BUG_ON(osb->node_num == node_num); - slot_num = ocfs2_node_num_to_slot(si, node_num); - if (slot_num == OCFS2_INVALID_SLOT) { + slot_num = ocfs2_node_num_to_slot(osb, node_num); + if (slot_num == -ENOENT) { status = 0; mlog(0, "no slot for this node, so no recovery required.\n"); goto done; @@ -1183,23 +1182,24 @@ bail: * slot info struct has been updated from disk. */ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) { - int status, i, node_num; - struct ocfs2_slot_info *si = osb->slot_info; + unsigned int node_num; + int status, i; /* This is called with the super block cluster lock, so we * know that the slot map can't change underneath us. */ - spin_lock(&si->si_lock); - for(i = 0; i < si->si_num_slots; i++) { + spin_lock(&osb->osb_lock); + for (i = 0; i < osb->max_slots; i++) { if (i == osb->slot_num) continue; - if (ocfs2_is_empty_slot(si, i)) + + status = ocfs2_slot_to_node_num_locked(osb, i, &node_num); + if (status == -ENOENT) continue; - node_num = si->si_global_node_nums[i]; if (ocfs2_node_map_test_bit(osb, &osb->recovery_map, node_num)) continue; - spin_unlock(&si->si_lock); + spin_unlock(&osb->osb_lock); /* Ok, we have a slot occupied by another node which * is not in the recovery map. We trylock his journal @@ -1215,9 +1215,9 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) goto bail; } - spin_lock(&si->si_lock); + spin_lock(&osb->osb_lock); } - spin_unlock(&si->si_lock); + spin_unlock(&osb->osb_lock); status = 0; bail: diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 6546cef212e..ee3f675a421 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -179,6 +179,7 @@ enum ocfs2_mount_options #define OCFS2_DEFAULT_ATIME_QUANTUM 60 struct ocfs2_journal; +struct ocfs2_slot_info; struct ocfs2_super { struct task_struct *commit_task; diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index f5727b8cc91..762360d95e9 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c @@ -42,13 +42,25 @@ #include "buffer_head_io.h" +struct ocfs2_slot_info { + struct inode *si_inode; + struct buffer_head *si_bh; + unsigned int si_num_slots; + unsigned int si_size; + s16 si_global_node_nums[OCFS2_MAX_SLOTS]; +}; + + static s16 __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, s16 global); static void __ocfs2_fill_slot(struct ocfs2_slot_info *si, s16 slot_num, s16 node_num); -/* post the slot information on disk into our slot_info struct. */ +/* + * Post the slot information on disk into our slot_info struct. + * Must be protected by osb_lock. + */ static void ocfs2_update_slot_info(struct ocfs2_slot_info *si) { int i; @@ -56,13 +68,10 @@ static void ocfs2_update_slot_info(struct ocfs2_slot_info *si) /* we don't read the slot block here as ocfs2_super_lock * should've made sure we have the most recent copy. */ - spin_lock(&si->si_lock); disk_info = (__le16 *) si->si_bh->b_data; for (i = 0; i < si->si_size; i++) si->si_global_node_nums[i] = le16_to_cpu(disk_info[i]); - - spin_unlock(&si->si_lock); } int ocfs2_refresh_slot_info(struct ocfs2_super *osb) @@ -76,8 +85,11 @@ int ocfs2_refresh_slot_info(struct ocfs2_super *osb) bh = si->si_bh; ret = ocfs2_read_block(osb, bh->b_blocknr, &bh, 0, si->si_inode); - if (ret == 0) + if (ret == 0) { + spin_lock(&osb->osb_lock); ocfs2_update_slot_info(si); + spin_unlock(&osb->osb_lock); + } return ret; } @@ -90,10 +102,10 @@ static int ocfs2_update_disk_slots(struct ocfs2_super *osb, int status, i; __le16 *disk_info = (__le16 *) si->si_bh->b_data; - spin_lock(&si->si_lock); + spin_lock(&osb->osb_lock); for (i = 0; i < si->si_size; i++) disk_info[i] = cpu_to_le16(si->si_global_node_nums[i]); - spin_unlock(&si->si_lock); + spin_unlock(&osb->osb_lock); status = ocfs2_write_block(osb, si->si_bh, si->si_inode); if (status < 0) @@ -119,7 +131,8 @@ static s16 __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, return ret; } -static s16 __ocfs2_find_empty_slot(struct ocfs2_slot_info *si, s16 preferred) +static s16 __ocfs2_find_empty_slot(struct ocfs2_slot_info *si, + s16 preferred) { int i; s16 ret = OCFS2_INVALID_SLOT; @@ -141,15 +154,36 @@ out: return ret; } -s16 ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, - s16 global) +int ocfs2_node_num_to_slot(struct ocfs2_super *osb, unsigned int node_num) { - s16 ret; + s16 slot; + struct ocfs2_slot_info *si = osb->slot_info; - spin_lock(&si->si_lock); - ret = __ocfs2_node_num_to_slot(si, global); - spin_unlock(&si->si_lock); - return ret; + spin_lock(&osb->osb_lock); + slot = __ocfs2_node_num_to_slot(si, node_num); + spin_unlock(&osb->osb_lock); + + if (slot == OCFS2_INVALID_SLOT) + return -ENOENT; + + return slot; +} + +int ocfs2_slot_to_node_num_locked(struct ocfs2_super *osb, int slot_num, + unsigned int *node_num) +{ + struct ocfs2_slot_info *si = osb->slot_info; + + assert_spin_locked(&osb->osb_lock); + + BUG_ON(slot_num < 0); + BUG_ON(slot_num > osb->max_slots); + + if (si->si_global_node_nums[slot_num] == OCFS2_INVALID_SLOT) + return -ENOENT; + + *node_num = si->si_global_node_nums[slot_num]; + return 0; } static void __ocfs2_free_slot_info(struct ocfs2_slot_info *si) @@ -184,9 +218,9 @@ int ocfs2_clear_slot(struct ocfs2_super *osb, s16 slot_num) if (si == NULL) return 0; - spin_lock(&si->si_lock); + spin_lock(&osb->osb_lock); __ocfs2_fill_slot(si, slot_num, OCFS2_INVALID_SLOT); - spin_unlock(&si->si_lock); + spin_unlock(&osb->osb_lock); return ocfs2_update_disk_slots(osb, osb->slot_info); } @@ -206,7 +240,6 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb) goto bail; } - spin_lock_init(&si->si_lock); si->si_num_slots = osb->max_slots; si->si_size = OCFS2_MAX_SLOTS; @@ -235,7 +268,7 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb) si->si_inode = inode; si->si_bh = bh; - osb->slot_info = si; + osb->slot_info = (struct ocfs2_slot_info *)si; bail: if (status < 0 && si) __ocfs2_free_slot_info(si); @@ -261,9 +294,9 @@ int ocfs2_find_slot(struct ocfs2_super *osb) si = osb->slot_info; + spin_lock(&osb->osb_lock); ocfs2_update_slot_info(si); - spin_lock(&si->si_lock); /* search for ourselves first and take the slot if it already * exists. Perhaps we need to mark this in a variable for our * own journal recovery? Possibly not, though we certainly @@ -274,7 +307,7 @@ int ocfs2_find_slot(struct ocfs2_super *osb) * one. */ slot = __ocfs2_find_empty_slot(si, osb->preferred_slot); if (slot == OCFS2_INVALID_SLOT) { - spin_unlock(&si->si_lock); + spin_unlock(&osb->osb_lock); mlog(ML_ERROR, "no free slots available!\n"); status = -EINVAL; goto bail; @@ -285,7 +318,7 @@ int ocfs2_find_slot(struct ocfs2_super *osb) __ocfs2_fill_slot(si, slot, osb->node_num); osb->slot_num = slot; - spin_unlock(&si->si_lock); + spin_unlock(&osb->osb_lock); mlog(0, "taking node slot %d\n", osb->slot_num); @@ -306,12 +339,12 @@ void ocfs2_put_slot(struct ocfs2_super *osb) if (!si) return; + spin_lock(&osb->osb_lock); ocfs2_update_slot_info(si); - spin_lock(&si->si_lock); __ocfs2_fill_slot(si, osb->slot_num, OCFS2_INVALID_SLOT); osb->slot_num = OCFS2_INVALID_SLOT; - spin_unlock(&si->si_lock); + spin_unlock(&osb->osb_lock); status = ocfs2_update_disk_slots(osb, si); if (status < 0) { diff --git a/fs/ocfs2/slot_map.h b/fs/ocfs2/slot_map.h index b029ffdc8ea..5118e89c84e 100644 --- a/fs/ocfs2/slot_map.h +++ b/fs/ocfs2/slot_map.h @@ -27,16 +27,6 @@ #ifndef SLOTMAP_H #define SLOTMAP_H -struct ocfs2_slot_info { - spinlock_t si_lock; - - struct inode *si_inode; - struct buffer_head *si_bh; - unsigned int si_num_slots; - unsigned int si_size; - s16 si_global_node_nums[OCFS2_MAX_SLOTS]; -}; - int ocfs2_init_slot_info(struct ocfs2_super *osb); void ocfs2_free_slot_info(struct ocfs2_super *osb); @@ -45,17 +35,10 @@ void ocfs2_put_slot(struct ocfs2_super *osb); int ocfs2_refresh_slot_info(struct ocfs2_super *osb); -s16 ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, - s16 global); -int ocfs2_clear_slot(struct ocfs2_super *osb, s16 slot_num); +int ocfs2_node_num_to_slot(struct ocfs2_super *osb, unsigned int node_num); +int ocfs2_slot_to_node_num_locked(struct ocfs2_super *osb, int slot_num, + unsigned int *node_num); -static inline int ocfs2_is_empty_slot(struct ocfs2_slot_info *si, - int slot_num) -{ - BUG_ON(slot_num == OCFS2_INVALID_SLOT); - assert_spin_locked(&si->si_lock); - - return si->si_global_node_nums[slot_num] == OCFS2_INVALID_SLOT; -} +int ocfs2_clear_slot(struct ocfs2_super *osb, s16 slot_num); #endif -- cgit v1.2.3 From 553abd046af609191a91af7289d87d477adc659f Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 1 Feb 2008 12:03:57 -0800 Subject: ocfs2: Change the recovery map to an array of node numbers. The old recovery map was a bitmap of node numbers. This was sufficient for the maximum node number of 254. Going forward, we want node numbers to be UINT32. Thus, we need a new recovery map. Note that we can't keep track of slots here. We must write down the node number to recovery *before* we get the locks needed to convert a node number into a slot number. The recovery map is now an array of unsigned ints, max_slots in size. It moves to journal.c with the rest of recovery. Because it needs to be initialized, we move all of recovery initialization into a new function, ocfs2_recovery_init(). This actually cleans up ocfs2_initialize_super() a little as well. Following on, recovery cleaup becomes part of ocfs2_recovery_exit(). A number of node map functions are rendered obsolete and are removed. Finally, waiting on recovery is wrapped in a function rather than naked checks on the recovery_event. This is a cleanup from Mark. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/dlmglue.c | 6 +- fs/ocfs2/heartbeat.c | 111 ------------------------------- fs/ocfs2/heartbeat.h | 14 ---- fs/ocfs2/journal.c | 181 ++++++++++++++++++++++++++++++++++++++++++++++----- fs/ocfs2/journal.h | 4 ++ fs/ocfs2/ocfs2.h | 3 +- fs/ocfs2/super.c | 33 +++------- 7 files changed, 182 insertions(+), 170 deletions(-) (limited to 'fs/ocfs2/journal.c') diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 1a80fa9e7c9..15a5167e051 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -1950,8 +1950,7 @@ int ocfs2_inode_lock_full(struct inode *inode, goto local; if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) - wait_event(osb->recovery_event, - ocfs2_node_map_is_empty(osb, &osb->recovery_map)); + ocfs2_wait_for_recovery(osb); lockres = &OCFS2_I(inode)->ip_inode_lockres; level = ex ? LKM_EXMODE : LKM_PRMODE; @@ -1974,8 +1973,7 @@ int ocfs2_inode_lock_full(struct inode *inode, * committed to owning this lock so we don't allow signals to * abort the operation. */ if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) - wait_event(osb->recovery_event, - ocfs2_node_map_is_empty(osb, &osb->recovery_map)); + ocfs2_wait_for_recovery(osb); local: /* diff --git a/fs/ocfs2/heartbeat.c b/fs/ocfs2/heartbeat.c index 0758daf64da..80de2397c16 100644 --- a/fs/ocfs2/heartbeat.c +++ b/fs/ocfs2/heartbeat.c @@ -48,7 +48,6 @@ static inline void __ocfs2_node_map_set_bit(struct ocfs2_node_map *map, int bit); static inline void __ocfs2_node_map_clear_bit(struct ocfs2_node_map *map, int bit); -static inline int __ocfs2_node_map_is_empty(struct ocfs2_node_map *map); /* special case -1 for now * TODO: should *really* make sure the calling func never passes -1!! */ @@ -62,7 +61,6 @@ static void ocfs2_node_map_init(struct ocfs2_node_map *map) void ocfs2_init_node_maps(struct ocfs2_super *osb) { spin_lock_init(&osb->node_map_lock); - ocfs2_node_map_init(&osb->recovery_map); ocfs2_node_map_init(&osb->osb_recovering_orphan_dirs); } @@ -192,112 +190,3 @@ int ocfs2_node_map_test_bit(struct ocfs2_super *osb, return ret; } -static inline int __ocfs2_node_map_is_empty(struct ocfs2_node_map *map) -{ - int bit; - bit = find_next_bit(map->map, map->num_nodes, 0); - if (bit < map->num_nodes) - return 0; - return 1; -} - -int ocfs2_node_map_is_empty(struct ocfs2_super *osb, - struct ocfs2_node_map *map) -{ - int ret; - BUG_ON(map->num_nodes == 0); - spin_lock(&osb->node_map_lock); - ret = __ocfs2_node_map_is_empty(map); - spin_unlock(&osb->node_map_lock); - return ret; -} - -#if 0 - -static void __ocfs2_node_map_dup(struct ocfs2_node_map *target, - struct ocfs2_node_map *from) -{ - BUG_ON(from->num_nodes == 0); - ocfs2_node_map_init(target); - __ocfs2_node_map_set(target, from); -} - -/* returns 1 if bit is the only bit set in target, 0 otherwise */ -int ocfs2_node_map_is_only(struct ocfs2_super *osb, - struct ocfs2_node_map *target, - int bit) -{ - struct ocfs2_node_map temp; - int ret; - - spin_lock(&osb->node_map_lock); - __ocfs2_node_map_dup(&temp, target); - __ocfs2_node_map_clear_bit(&temp, bit); - ret = __ocfs2_node_map_is_empty(&temp); - spin_unlock(&osb->node_map_lock); - - return ret; -} - -static void __ocfs2_node_map_set(struct ocfs2_node_map *target, - struct ocfs2_node_map *from) -{ - int num_longs, i; - - BUG_ON(target->num_nodes != from->num_nodes); - BUG_ON(target->num_nodes == 0); - - num_longs = BITS_TO_LONGS(target->num_nodes); - for (i = 0; i < num_longs; i++) - target->map[i] = from->map[i]; -} - -#endif /* 0 */ - -/* Returns whether the recovery bit was actually set - it may not be - * if a node is still marked as needing recovery */ -int ocfs2_recovery_map_set(struct ocfs2_super *osb, - int num) -{ - int set = 0; - - spin_lock(&osb->node_map_lock); - - if (!test_bit(num, osb->recovery_map.map)) { - __ocfs2_node_map_set_bit(&osb->recovery_map, num); - set = 1; - } - - spin_unlock(&osb->node_map_lock); - - return set; -} - -void ocfs2_recovery_map_clear(struct ocfs2_super *osb, - int num) -{ - ocfs2_node_map_clear_bit(osb, &osb->recovery_map, num); -} - -int ocfs2_node_map_iterate(struct ocfs2_super *osb, - struct ocfs2_node_map *map, - int idx) -{ - int i = idx; - - idx = O2NM_INVALID_NODE_NUM; - spin_lock(&osb->node_map_lock); - if ((i != O2NM_INVALID_NODE_NUM) && - (i >= 0) && - (i < map->num_nodes)) { - while(i < map->num_nodes) { - if (test_bit(i, map->map)) { - idx = i; - break; - } - i++; - } - } - spin_unlock(&osb->node_map_lock); - return idx; -} diff --git a/fs/ocfs2/heartbeat.h b/fs/ocfs2/heartbeat.h index eac63aed761..98d8ffc995b 100644 --- a/fs/ocfs2/heartbeat.h +++ b/fs/ocfs2/heartbeat.h @@ -33,8 +33,6 @@ void ocfs2_stop_heartbeat(struct ocfs2_super *osb); /* node map functions - used to keep track of mounted and in-recovery * nodes. */ -int ocfs2_node_map_is_empty(struct ocfs2_super *osb, - struct ocfs2_node_map *map); void ocfs2_node_map_set_bit(struct ocfs2_super *osb, struct ocfs2_node_map *map, int bit); @@ -44,17 +42,5 @@ void ocfs2_node_map_clear_bit(struct ocfs2_super *osb, int ocfs2_node_map_test_bit(struct ocfs2_super *osb, struct ocfs2_node_map *map, int bit); -int ocfs2_node_map_iterate(struct ocfs2_super *osb, - struct ocfs2_node_map *map, - int idx); -static inline int ocfs2_node_map_first_set_bit(struct ocfs2_super *osb, - struct ocfs2_node_map *map) -{ - return ocfs2_node_map_iterate(osb, map, 0); -} -int ocfs2_recovery_map_set(struct ocfs2_super *osb, - int num); -void ocfs2_recovery_map_clear(struct ocfs2_super *osb, - int num); #endif /* OCFS2_HEARTBEAT_H */ diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index ed0c6d0850d..ca4c0ea5a4c 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -64,6 +64,137 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, int slot); static int ocfs2_commit_thread(void *arg); + +/* + * The recovery_list is a simple linked list of node numbers to recover. + * It is protected by the recovery_lock. + */ + +struct ocfs2_recovery_map { + int rm_used; + unsigned int *rm_entries; +}; + +int ocfs2_recovery_init(struct ocfs2_super *osb) +{ + struct ocfs2_recovery_map *rm; + + mutex_init(&osb->recovery_lock); + osb->disable_recovery = 0; + osb->recovery_thread_task = NULL; + init_waitqueue_head(&osb->recovery_event); + + rm = kzalloc(sizeof(struct ocfs2_recovery_map) + + osb->max_slots * sizeof(unsigned int), + GFP_KERNEL); + if (!rm) { + mlog_errno(-ENOMEM); + return -ENOMEM; + } + + rm->rm_entries = (unsigned int *)((char *)rm + + sizeof(struct ocfs2_recovery_map)); + osb->recovery_map = rm; + + return 0; +} + +/* we can't grab the goofy sem lock from inside wait_event, so we use + * memory barriers to make sure that we'll see the null task before + * being woken up */ +static int ocfs2_recovery_thread_running(struct ocfs2_super *osb) +{ + mb(); + return osb->recovery_thread_task != NULL; +} + +void ocfs2_recovery_exit(struct ocfs2_super *osb) +{ + struct ocfs2_recovery_map *rm; + + /* disable any new recovery threads and wait for any currently + * running ones to exit. Do this before setting the vol_state. */ + mutex_lock(&osb->recovery_lock); + osb->disable_recovery = 1; + mutex_unlock(&osb->recovery_lock); + wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb)); + + /* At this point, we know that no more recovery threads can be + * launched, so wait for any recovery completion work to + * complete. */ + flush_workqueue(ocfs2_wq); + + /* + * Now that recovery is shut down, and the osb is about to be + * freed, the osb_lock is not taken here. + */ + rm = osb->recovery_map; + /* XXX: Should we bug if there are dirty entries? */ + + kfree(rm); +} + +static int __ocfs2_recovery_map_test(struct ocfs2_super *osb, + unsigned int node_num) +{ + int i; + struct ocfs2_recovery_map *rm = osb->recovery_map; + + assert_spin_locked(&osb->osb_lock); + + for (i = 0; i < rm->rm_used; i++) { + if (rm->rm_entries[i] == node_num) + return 1; + } + + return 0; +} + +/* Behaves like test-and-set. Returns the previous value */ +static int ocfs2_recovery_map_set(struct ocfs2_super *osb, + unsigned int node_num) +{ + struct ocfs2_recovery_map *rm = osb->recovery_map; + + spin_lock(&osb->osb_lock); + if (__ocfs2_recovery_map_test(osb, node_num)) { + spin_unlock(&osb->osb_lock); + return 1; + } + + /* XXX: Can this be exploited? Not from o2dlm... */ + BUG_ON(rm->rm_used >= osb->max_slots); + + rm->rm_entries[rm->rm_used] = node_num; + rm->rm_used++; + spin_unlock(&osb->osb_lock); + + return 0; +} + +static void ocfs2_recovery_map_clear(struct ocfs2_super *osb, + unsigned int node_num) +{ + int i; + struct ocfs2_recovery_map *rm = osb->recovery_map; + + spin_lock(&osb->osb_lock); + + for (i = 0; i < rm->rm_used; i++) { + if (rm->rm_entries[i] == node_num) + break; + } + + if (i < rm->rm_used) { + /* XXX: be careful with the pointer math */ + memmove(&(rm->rm_entries[i]), &(rm->rm_entries[i + 1]), + (rm->rm_used - i - 1) * sizeof(unsigned int)); + rm->rm_used--; + } + + spin_unlock(&osb->osb_lock); +} + static int ocfs2_commit_cache(struct ocfs2_super *osb) { int status = 0; @@ -650,6 +781,23 @@ bail: return status; } +static int ocfs2_recovery_completed(struct ocfs2_super *osb) +{ + int empty; + struct ocfs2_recovery_map *rm = osb->recovery_map; + + spin_lock(&osb->osb_lock); + empty = (rm->rm_used == 0); + spin_unlock(&osb->osb_lock); + + return empty; +} + +void ocfs2_wait_for_recovery(struct ocfs2_super *osb) +{ + wait_event(osb->recovery_event, ocfs2_recovery_completed(osb)); +} + /* * JBD Might read a cached version of another nodes journal file. We * don't want this as this file changes often and we get no @@ -848,6 +996,7 @@ static int __ocfs2_recovery_thread(void *arg) { int status, node_num; struct ocfs2_super *osb = arg; + struct ocfs2_recovery_map *rm = osb->recovery_map; mlog_entry_void(); @@ -863,26 +1012,29 @@ restart: goto bail; } - while(!ocfs2_node_map_is_empty(osb, &osb->recovery_map)) { - node_num = ocfs2_node_map_first_set_bit(osb, - &osb->recovery_map); - if (node_num == O2NM_INVALID_NODE_NUM) { - mlog(0, "Out of nodes to recover.\n"); - break; - } + spin_lock(&osb->osb_lock); + while (rm->rm_used) { + /* It's always safe to remove entry zero, as we won't + * clear it until ocfs2_recover_node() has succeeded. */ + node_num = rm->rm_entries[0]; + spin_unlock(&osb->osb_lock); status = ocfs2_recover_node(osb, node_num); - if (status < 0) { + if (!status) { + ocfs2_recovery_map_clear(osb, node_num); + } else { mlog(ML_ERROR, "Error %d recovering node %d on device (%u,%u)!\n", status, node_num, MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); mlog(ML_ERROR, "Volume requires unmount.\n"); - continue; } - ocfs2_recovery_map_clear(osb, node_num); + spin_lock(&osb->osb_lock); } + spin_unlock(&osb->osb_lock); + mlog(0, "All nodes recovered\n"); + ocfs2_super_unlock(osb, 1); /* We always run recovery on our own orphan dir - the dead @@ -893,8 +1045,7 @@ restart: bail: mutex_lock(&osb->recovery_lock); - if (!status && - !ocfs2_node_map_is_empty(osb, &osb->recovery_map)) { + if (!status && !ocfs2_recovery_completed(osb)) { mutex_unlock(&osb->recovery_lock); goto restart; } @@ -924,8 +1075,8 @@ void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) /* People waiting on recovery will wait on * the recovery map to empty. */ - if (!ocfs2_recovery_map_set(osb, node_num)) - mlog(0, "node %d already be in recovery.\n", node_num); + if (ocfs2_recovery_map_set(osb, node_num)) + mlog(0, "node %d already in recovery map.\n", node_num); mlog(0, "starting recovery thread...\n"); @@ -1197,7 +1348,7 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) if (status == -ENOENT) continue; - if (ocfs2_node_map_test_bit(osb, &osb->recovery_map, node_num)) + if (__ocfs2_recovery_map_test(osb, node_num)) continue; spin_unlock(&osb->osb_lock); diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 220f3e818e7..db82be2532e 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -134,6 +134,10 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb, /* Exported only for the journal struct init code in super.c. Do not call. */ void ocfs2_complete_recovery(struct work_struct *work); +void ocfs2_wait_for_recovery(struct ocfs2_super *osb); + +int ocfs2_recovery_init(struct ocfs2_super *osb); +void ocfs2_recovery_exit(struct ocfs2_super *osb); /* * Journal Control: diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index ee3f675a421..c6ed8c35de0 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -180,6 +180,7 @@ enum ocfs2_mount_options struct ocfs2_journal; struct ocfs2_slot_info; +struct ocfs2_recovery_map; struct ocfs2_super { struct task_struct *commit_task; @@ -191,7 +192,6 @@ struct ocfs2_super struct ocfs2_slot_info *slot_info; spinlock_t node_map_lock; - struct ocfs2_node_map recovery_map; u64 root_blkno; u64 system_dir_blkno; @@ -226,6 +226,7 @@ struct ocfs2_super atomic_t vol_state; struct mutex recovery_lock; + struct ocfs2_recovery_map *recovery_map; struct task_struct *recovery_thread_task; int disable_recovery; wait_queue_head_t checkpoint_event; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index fad37af2af9..1a4c7c7850f 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1224,15 +1224,6 @@ leave: return status; } -/* we can't grab the goofy sem lock from inside wait_event, so we use - * memory barriers to make sure that we'll see the null task before - * being woken up */ -static int ocfs2_recovery_thread_running(struct ocfs2_super *osb) -{ - mb(); - return osb->recovery_thread_task != NULL; -} - static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) { int tmp; @@ -1249,17 +1240,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) ocfs2_truncate_log_shutdown(osb); - /* disable any new recovery threads and wait for any currently - * running ones to exit. Do this before setting the vol_state. */ - mutex_lock(&osb->recovery_lock); - osb->disable_recovery = 1; - mutex_unlock(&osb->recovery_lock); - wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb)); - - /* At this point, we know that no more recovery threads can be - * launched, so wait for any recovery completion work to - * complete. */ - flush_workqueue(ocfs2_wq); + /* This will disable recovery and flush any recovery work. */ + ocfs2_recovery_exit(osb); ocfs2_journal_shutdown(osb); @@ -1368,7 +1350,6 @@ static int ocfs2_initialize_super(struct super_block *sb, osb->s_sectsize_bits = blksize_bits(sector_size); BUG_ON(!osb->s_sectsize_bits); - init_waitqueue_head(&osb->recovery_event); spin_lock_init(&osb->dc_task_lock); init_waitqueue_head(&osb->dc_event); osb->dc_work_sequence = 0; @@ -1388,10 +1369,12 @@ static int ocfs2_initialize_super(struct super_block *sb, snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); - mutex_init(&osb->recovery_lock); - - osb->disable_recovery = 0; - osb->recovery_thread_task = NULL; + status = ocfs2_recovery_init(osb); + if (status) { + mlog(ML_ERROR, "Unable to initialize recovery state\n"); + mlog_errno(status); + goto bail; + } init_waitqueue_head(&osb->checkpoint_event); atomic_set(&osb->needs_checkpoint, 0); -- cgit v1.2.3 From fc881fa0d59596c02f8707b5572567c369d4789a Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 1 Feb 2008 12:04:48 -0800 Subject: ocfs2: De-magic the in-memory slot map. The in-memory slot map uses the same magic as the on-disk one. There is a special value to mark a slot as invalid. It relies on the size of certain types and so on. Write a new in-memory map that keeps validity as a separate field. Outside of the I/O functions, OCFS2_INVALID_SLOT now means what it is supposed to. It also is no longer tied to the type size. This also means that only the I/O functions refer to 16bit quantities. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/journal.c | 2 +- fs/ocfs2/ocfs2.h | 6 +-- fs/ocfs2/slot_map.c | 130 +++++++++++++++++++++++++++++----------------------- fs/ocfs2/slot_map.h | 2 +- 4 files changed, 77 insertions(+), 63 deletions(-) (limited to 'fs/ocfs2/journal.c') diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index ca4c0ea5a4c..bffd2d714f6 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -71,7 +71,7 @@ static int ocfs2_commit_thread(void *arg); */ struct ocfs2_recovery_map { - int rm_used; + unsigned int rm_used; unsigned int *rm_entries; }; diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index c6ed8c35de0..95f783dbe8b 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -216,10 +216,10 @@ struct ocfs2_super unsigned long s_mount_opt; unsigned int s_atime_quantum; - u16 max_slots; + unsigned int max_slots; s16 node_num; - s16 slot_num; - s16 preferred_slot; + int slot_num; + int preferred_slot; int s_sectsize_bits; int s_clustersize; int s_clustersize_bits; diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index 5bddee11009..65a61bfa3f2 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c @@ -42,21 +42,41 @@ #include "buffer_head_io.h" + +struct ocfs2_slot { + int sl_valid; + unsigned int sl_node_num; +}; + struct ocfs2_slot_info { struct inode *si_inode; unsigned int si_blocks; struct buffer_head **si_bh; unsigned int si_num_slots; - unsigned int si_size; - s16 si_global_node_nums[OCFS2_MAX_SLOTS]; + struct ocfs2_slot *si_slots; }; -static s16 __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, - s16 global); -static void __ocfs2_fill_slot(struct ocfs2_slot_info *si, - s16 slot_num, - s16 node_num); +static int __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, + unsigned int node_num); + +static void ocfs2_invalidate_slot(struct ocfs2_slot_info *si, + int slot_num) +{ + BUG_ON((slot_num < 0) || (slot_num >= si->si_num_slots)); + si->si_slots[slot_num].sl_valid = 0; +} + +static void ocfs2_set_slot(struct ocfs2_slot_info *si, + int slot_num, unsigned int node_num) +{ + BUG_ON((slot_num < 0) || (slot_num >= si->si_num_slots)); + BUG_ON((node_num == O2NM_INVALID_NODE_NUM) || + (node_num >= O2NM_MAX_NODES)); + + si->si_slots[slot_num].sl_valid = 1; + si->si_slots[slot_num].sl_node_num = node_num; +} /* * Post the slot information on disk into our slot_info struct. @@ -71,8 +91,12 @@ static void ocfs2_update_slot_info(struct ocfs2_slot_info *si) * should've made sure we have the most recent copy. */ disk_info = (__le16 *) si->si_bh[0]->b_data; - for (i = 0; i < si->si_size; i++) - si->si_global_node_nums[i] = le16_to_cpu(disk_info[i]); + for (i = 0; i < si->si_num_slots; i++) { + if (le16_to_cpu(disk_info[i]) == (u16)OCFS2_INVALID_SLOT) + ocfs2_invalidate_slot(si, i); + else + ocfs2_set_slot(si, i, le16_to_cpu(disk_info[i])); + } } int ocfs2_refresh_slot_info(struct ocfs2_super *osb) @@ -114,8 +138,13 @@ static int ocfs2_update_disk_slots(struct ocfs2_super *osb, __le16 *disk_info = (__le16 *) si->si_bh[0]->b_data; spin_lock(&osb->osb_lock); - for (i = 0; i < si->si_size; i++) - disk_info[i] = cpu_to_le16(si->si_global_node_nums[i]); + for (i = 0; i < si->si_num_slots; i++) { + if (si->si_slots[i].sl_valid) + disk_info[i] = + cpu_to_le16(si->si_slots[i].sl_node_num); + else + disk_info[i] = cpu_to_le16(OCFS2_INVALID_SLOT); + } spin_unlock(&osb->osb_lock); status = ocfs2_write_block(osb, si->si_bh[0], si->si_inode); @@ -147,39 +176,39 @@ static int ocfs2_slot_map_physical_size(struct ocfs2_super *osb, return 0; } -/* try to find global node in the slot info. Returns - * OCFS2_INVALID_SLOT if nothing is found. */ -static s16 __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, - s16 global) +/* try to find global node in the slot info. Returns -ENOENT + * if nothing is found. */ +static int __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, + unsigned int node_num) { - int i; - s16 ret = OCFS2_INVALID_SLOT; + int i, ret = -ENOENT; for(i = 0; i < si->si_num_slots; i++) { - if (global == si->si_global_node_nums[i]) { - ret = (s16) i; + if (si->si_slots[i].sl_valid && + (node_num == si->si_slots[i].sl_node_num)) { + ret = i; break; } } + return ret; } -static s16 __ocfs2_find_empty_slot(struct ocfs2_slot_info *si, - s16 preferred) +static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si, + int preferred) { - int i; - s16 ret = OCFS2_INVALID_SLOT; + int i, ret = -ENOSPC; - if (preferred >= 0 && preferred < si->si_num_slots) { - if (OCFS2_INVALID_SLOT == si->si_global_node_nums[preferred]) { + if ((preferred >= 0) && (preferred < si->si_num_slots)) { + if (!si->si_slots[preferred].sl_valid) { ret = preferred; goto out; } } for(i = 0; i < si->si_num_slots; i++) { - if (OCFS2_INVALID_SLOT == si->si_global_node_nums[i]) { - ret = (s16) i; + if (!si->si_slots[i].sl_valid) { + ret = i; break; } } @@ -189,16 +218,13 @@ out: int ocfs2_node_num_to_slot(struct ocfs2_super *osb, unsigned int node_num) { - s16 slot; + int slot; struct ocfs2_slot_info *si = osb->slot_info; spin_lock(&osb->osb_lock); slot = __ocfs2_node_num_to_slot(si, node_num); spin_unlock(&osb->osb_lock); - if (slot == OCFS2_INVALID_SLOT) - return -ENOENT; - return slot; } @@ -212,10 +238,10 @@ int ocfs2_slot_to_node_num_locked(struct ocfs2_super *osb, int slot_num, BUG_ON(slot_num < 0); BUG_ON(slot_num > osb->max_slots); - if (si->si_global_node_nums[slot_num] == OCFS2_INVALID_SLOT) + if (!si->si_slots[slot_num].sl_valid) return -ENOENT; - *node_num = si->si_global_node_nums[slot_num]; + *node_num = si->si_slots[slot_num].sl_node_num; return 0; } @@ -241,19 +267,7 @@ static void __ocfs2_free_slot_info(struct ocfs2_slot_info *si) kfree(si); } -static void __ocfs2_fill_slot(struct ocfs2_slot_info *si, - s16 slot_num, - s16 node_num) -{ - BUG_ON(slot_num == OCFS2_INVALID_SLOT); - BUG_ON(slot_num >= si->si_num_slots); - BUG_ON((node_num != O2NM_INVALID_NODE_NUM) && - (node_num >= O2NM_MAX_NODES)); - - si->si_global_node_nums[slot_num] = node_num; -} - -int ocfs2_clear_slot(struct ocfs2_super *osb, s16 slot_num) +int ocfs2_clear_slot(struct ocfs2_super *osb, int slot_num) { struct ocfs2_slot_info *si = osb->slot_info; @@ -261,7 +275,7 @@ int ocfs2_clear_slot(struct ocfs2_super *osb, s16 slot_num) return 0; spin_lock(&osb->osb_lock); - __ocfs2_fill_slot(si, slot_num, OCFS2_INVALID_SLOT); + ocfs2_invalidate_slot(si, slot_num); spin_unlock(&osb->osb_lock); return ocfs2_update_disk_slots(osb, osb->slot_info); @@ -324,11 +338,13 @@ bail: int ocfs2_init_slot_info(struct ocfs2_super *osb) { - int status, i; + int status; struct inode *inode = NULL; struct ocfs2_slot_info *si; - si = kzalloc(sizeof(struct ocfs2_slot_info), GFP_KERNEL); + si = kzalloc(sizeof(struct ocfs2_slot_info) + + (sizeof(struct ocfs2_slot) * osb->max_slots), + GFP_KERNEL); if (!si) { status = -ENOMEM; mlog_errno(status); @@ -336,10 +352,8 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb) } si->si_num_slots = osb->max_slots; - si->si_size = OCFS2_MAX_SLOTS; - - for(i = 0; i < si->si_num_slots; i++) - si->si_global_node_nums[i] = OCFS2_INVALID_SLOT; + si->si_slots = (struct ocfs2_slot *)((char *)si + + sizeof(struct ocfs2_slot_info)); inode = ocfs2_get_system_file_inode(osb, SLOT_MAP_SYSTEM_INODE, OCFS2_INVALID_SLOT); @@ -375,7 +389,7 @@ void ocfs2_free_slot_info(struct ocfs2_super *osb) int ocfs2_find_slot(struct ocfs2_super *osb) { int status; - s16 slot; + int slot; struct ocfs2_slot_info *si; mlog_entry_void(); @@ -390,11 +404,11 @@ int ocfs2_find_slot(struct ocfs2_super *osb) * own journal recovery? Possibly not, though we certainly * need to warn to the user */ slot = __ocfs2_node_num_to_slot(si, osb->node_num); - if (slot == OCFS2_INVALID_SLOT) { + if (slot < 0) { /* if no slot yet, then just take 1st available * one. */ slot = __ocfs2_find_empty_slot(si, osb->preferred_slot); - if (slot == OCFS2_INVALID_SLOT) { + if (slot < 0) { spin_unlock(&osb->osb_lock); mlog(ML_ERROR, "no free slots available!\n"); status = -EINVAL; @@ -404,7 +418,7 @@ int ocfs2_find_slot(struct ocfs2_super *osb) mlog(ML_NOTICE, "slot %d is already allocated to this node!\n", slot); - __ocfs2_fill_slot(si, slot, osb->node_num); + ocfs2_set_slot(si, slot, osb->node_num); osb->slot_num = slot; spin_unlock(&osb->osb_lock); @@ -430,7 +444,7 @@ void ocfs2_put_slot(struct ocfs2_super *osb) spin_lock(&osb->osb_lock); ocfs2_update_slot_info(si); - __ocfs2_fill_slot(si, osb->slot_num, OCFS2_INVALID_SLOT); + ocfs2_invalidate_slot(si, osb->slot_num); osb->slot_num = OCFS2_INVALID_SLOT; spin_unlock(&osb->osb_lock); diff --git a/fs/ocfs2/slot_map.h b/fs/ocfs2/slot_map.h index 5118e89c84e..601c95fd700 100644 --- a/fs/ocfs2/slot_map.h +++ b/fs/ocfs2/slot_map.h @@ -39,6 +39,6 @@ int ocfs2_node_num_to_slot(struct ocfs2_super *osb, unsigned int node_num); int ocfs2_slot_to_node_num_locked(struct ocfs2_super *osb, int slot_num, unsigned int *node_num); -int ocfs2_clear_slot(struct ocfs2_super *osb, s16 slot_num); +int ocfs2_clear_slot(struct ocfs2_super *osb, int slot_num); #endif -- cgit v1.2.3 From b1f3550fa1471b691ad6c2f35b5b22e93eaa5855 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 4 Mar 2008 15:21:05 -0800 Subject: ocfs2: Use BUG_ON if (...) BUG(); should be replaced with BUG_ON(...) when the test has no side-effects to allow a definition of BUG_ON that drops the code completely. The semantic patch that makes this change is as follows: (http://www.emn.fr/x-info/coccinelle/) // @ disable unlikely @ expression E,f; @@ ( if (<... f(...) ...>) { BUG(); } | - if (unlikely(E)) { BUG(); } + BUG_ON(E); ) @@ expression E,f; @@ ( if (<... f(...) ...>) { BUG(); } | - if (E) { BUG(); } + BUG_ON(E); ) // Signed-off-by: Julia Lawall Signed-off-by: Andrew Morton Signed-off-by: Mark Fasheh --- fs/ocfs2/alloc.c | 3 +-- fs/ocfs2/journal.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'fs/ocfs2/journal.c') diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index a26882144e8..41f84c92094 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -1029,8 +1029,7 @@ static void ocfs2_rotate_leaf(struct ocfs2_extent_list *el, BUG_ON(!next_free); /* The tree code before us didn't allow enough room in the leaf. */ - if (el->l_next_free_rec == el->l_count && !has_empty) - BUG(); + BUG_ON(el->l_next_free_rec == el->l_count && !has_empty); /* * The easiest way to approach this is to just remove the diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index bffd2d714f6..9698338adc3 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -717,8 +717,7 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local) mlog_entry_void(); - if (!journal) - BUG(); + BUG_ON(!journal); osb = journal->j_osb; -- cgit v1.2.3