summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_zone_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_zone_alloc.c')
-rw-r--r--fs/xfs/xfs_zone_alloc.c130
1 files changed, 114 insertions, 16 deletions
diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c
index b7b2820ec0ef..fd4c60a050e6 100644
--- a/fs/xfs/xfs_zone_alloc.c
+++ b/fs/xfs/xfs_zone_alloc.c
@@ -424,6 +424,7 @@ static struct xfs_open_zone *
xfs_init_open_zone(
struct xfs_rtgroup *rtg,
xfs_rgblock_t write_pointer,
+ enum rw_hint write_hint,
bool is_gc)
{
struct xfs_open_zone *oz;
@@ -434,6 +435,7 @@ xfs_init_open_zone(
oz->oz_rtg = rtg;
oz->oz_write_pointer = write_pointer;
oz->oz_written = write_pointer;
+ oz->oz_write_hint = write_hint;
oz->oz_is_gc = is_gc;
/*
@@ -453,6 +455,7 @@ xfs_init_open_zone(
struct xfs_open_zone *
xfs_open_zone(
struct xfs_mount *mp,
+ enum rw_hint write_hint,
bool is_gc)
{
struct xfs_zone_info *zi = mp->m_zone_info;
@@ -465,12 +468,13 @@ xfs_open_zone(
return NULL;
set_current_state(TASK_RUNNING);
- return xfs_init_open_zone(to_rtg(xg), 0, is_gc);
+ return xfs_init_open_zone(to_rtg(xg), 0, write_hint, is_gc);
}
static struct xfs_open_zone *
xfs_try_open_zone(
- struct xfs_mount *mp)
+ struct xfs_mount *mp,
+ enum rw_hint write_hint)
{
struct xfs_zone_info *zi = mp->m_zone_info;
struct xfs_open_zone *oz;
@@ -487,7 +491,7 @@ xfs_try_open_zone(
*/
zi->zi_nr_open_zones++;
spin_unlock(&zi->zi_open_zones_lock);
- oz = xfs_open_zone(mp, false);
+ oz = xfs_open_zone(mp, write_hint, false);
spin_lock(&zi->zi_open_zones_lock);
if (!oz) {
zi->zi_nr_open_zones--;
@@ -510,17 +514,79 @@ xfs_try_open_zone(
return oz;
}
+/*
+ * For data with short or medium lifetime, try to colocated it into an
+ * already open zone with a matching temperature.
+ */
+static bool
+xfs_colocate_eagerly(
+ enum rw_hint file_hint)
+{
+ switch (file_hint) {
+ case WRITE_LIFE_MEDIUM:
+ case WRITE_LIFE_SHORT:
+ case WRITE_LIFE_NONE:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool
+xfs_good_hint_match(
+ struct xfs_open_zone *oz,
+ enum rw_hint file_hint)
+{
+ switch (oz->oz_write_hint) {
+ case WRITE_LIFE_LONG:
+ case WRITE_LIFE_EXTREME:
+ /* colocate long and extreme */
+ if (file_hint == WRITE_LIFE_LONG ||
+ file_hint == WRITE_LIFE_EXTREME)
+ return true;
+ break;
+ case WRITE_LIFE_MEDIUM:
+ /* colocate medium with medium */
+ if (file_hint == WRITE_LIFE_MEDIUM)
+ return true;
+ break;
+ case WRITE_LIFE_SHORT:
+ case WRITE_LIFE_NONE:
+ case WRITE_LIFE_NOT_SET:
+ /* colocate short and none */
+ if (file_hint <= WRITE_LIFE_SHORT)
+ return true;
+ break;
+ }
+ return false;
+}
+
static bool
xfs_try_use_zone(
struct xfs_zone_info *zi,
- struct xfs_open_zone *oz)
+ enum rw_hint file_hint,
+ struct xfs_open_zone *oz,
+ bool lowspace)
{
if (oz->oz_write_pointer == rtg_blocks(oz->oz_rtg))
return false;
+ if (!lowspace && !xfs_good_hint_match(oz, file_hint))
+ return false;
if (!atomic_inc_not_zero(&oz->oz_ref))
return false;
/*
+ * If we have a hint set for the data, use that for the zone even if
+ * some data was written already without any hint set, but don't change
+ * the temperature after that as that would make little sense without
+ * tracking per-temperature class written block counts, which is
+ * probably overkill anyway.
+ */
+ if (file_hint != WRITE_LIFE_NOT_SET &&
+ oz->oz_write_hint == WRITE_LIFE_NOT_SET)
+ oz->oz_write_hint = file_hint;
+
+ /*
* If we couldn't match by inode or life time we just pick the first
* zone with enough space above. For that we want the least busy zone
* for some definition of "least" busy. For now this simple LRU
@@ -534,14 +600,16 @@ xfs_try_use_zone(
static struct xfs_open_zone *
xfs_select_open_zone_lru(
- struct xfs_zone_info *zi)
+ struct xfs_zone_info *zi,
+ enum rw_hint file_hint,
+ bool lowspace)
{
struct xfs_open_zone *oz;
lockdep_assert_held(&zi->zi_open_zones_lock);
list_for_each_entry(oz, &zi->zi_open_zones, oz_entry)
- if (xfs_try_use_zone(zi, oz))
+ if (xfs_try_use_zone(zi, file_hint, oz, lowspace))
return oz;
cond_resched_lock(&zi->zi_open_zones_lock);
@@ -550,20 +618,28 @@ xfs_select_open_zone_lru(
static struct xfs_open_zone *
xfs_select_open_zone_mru(
- struct xfs_zone_info *zi)
+ struct xfs_zone_info *zi,
+ enum rw_hint file_hint)
{
struct xfs_open_zone *oz;
lockdep_assert_held(&zi->zi_open_zones_lock);
list_for_each_entry_reverse(oz, &zi->zi_open_zones, oz_entry)
- if (xfs_try_use_zone(zi, oz))
+ if (xfs_try_use_zone(zi, file_hint, oz, false))
return oz;
cond_resched_lock(&zi->zi_open_zones_lock);
return NULL;
}
+static inline enum rw_hint xfs_inode_write_hint(struct xfs_inode *ip)
+{
+ if (xfs_has_nolifetime(ip->i_mount))
+ return WRITE_LIFE_NOT_SET;
+ return VFS_I(ip)->i_write_hint;
+}
+
/*
* Try to pack inodes that are written back after they were closed tight instead
* of trying to open new zones for them or spread them to the least recently
@@ -587,6 +663,7 @@ static inline bool xfs_zoned_pack_tight(struct xfs_inode *ip)
static struct xfs_open_zone *
xfs_select_zone_nowait(
struct xfs_mount *mp,
+ enum rw_hint write_hint,
bool pack_tight)
{
struct xfs_zone_info *zi = mp->m_zone_info;
@@ -595,20 +672,38 @@ xfs_select_zone_nowait(
if (xfs_is_shutdown(mp))
return NULL;
+ /*
+ * Try to fill up open zones with matching temperature if available. It
+ * is better to try to co-locate data when this is favorable, so we can
+ * activate empty zones when it is statistically better to separate
+ * data.
+ */
spin_lock(&zi->zi_open_zones_lock);
- if (pack_tight)
- oz = xfs_select_open_zone_mru(zi);
+ if (xfs_colocate_eagerly(write_hint))
+ oz = xfs_select_open_zone_lru(zi, write_hint, false);
+ else if (pack_tight)
+ oz = xfs_select_open_zone_mru(zi, write_hint);
if (oz)
goto out_unlock;
/*
* See if we can open a new zone and use that.
*/
- oz = xfs_try_open_zone(mp);
+ oz = xfs_try_open_zone(mp, write_hint);
if (oz)
goto out_unlock;
- oz = xfs_select_open_zone_lru(zi);
+ /*
+ * Try to colocate cold data with other cold data if we failed to open a
+ * new zone for it.
+ */
+ if (write_hint != WRITE_LIFE_NOT_SET &&
+ !xfs_colocate_eagerly(write_hint))
+ oz = xfs_select_open_zone_lru(zi, write_hint, false);
+ if (!oz)
+ oz = xfs_select_open_zone_lru(zi, WRITE_LIFE_NOT_SET, false);
+ if (!oz)
+ oz = xfs_select_open_zone_lru(zi, WRITE_LIFE_NOT_SET, true);
out_unlock:
spin_unlock(&zi->zi_open_zones_lock);
return oz;
@@ -617,19 +712,20 @@ out_unlock:
static struct xfs_open_zone *
xfs_select_zone(
struct xfs_mount *mp,
+ enum rw_hint write_hint,
bool pack_tight)
{
struct xfs_zone_info *zi = mp->m_zone_info;
DEFINE_WAIT (wait);
struct xfs_open_zone *oz;
- oz = xfs_select_zone_nowait(mp, pack_tight);
+ oz = xfs_select_zone_nowait(mp, write_hint, pack_tight);
if (oz)
return oz;
for (;;) {
prepare_to_wait(&zi->zi_zone_wait, &wait, TASK_UNINTERRUPTIBLE);
- oz = xfs_select_zone_nowait(mp, pack_tight);
+ oz = xfs_select_zone_nowait(mp, write_hint, pack_tight);
if (oz)
break;
schedule();
@@ -707,6 +803,7 @@ xfs_zone_alloc_and_submit(
{
struct xfs_inode *ip = XFS_I(ioend->io_inode);
struct xfs_mount *mp = ip->i_mount;
+ enum rw_hint write_hint = xfs_inode_write_hint(ip);
bool pack_tight = xfs_zoned_pack_tight(ip);
unsigned int alloc_len;
struct iomap_ioend *split;
@@ -724,7 +821,7 @@ xfs_zone_alloc_and_submit(
*oz = xfs_last_used_zone(ioend);
if (!*oz) {
select_zone:
- *oz = xfs_select_zone(mp, pack_tight);
+ *oz = xfs_select_zone(mp, write_hint, pack_tight);
if (!*oz)
goto out_error;
}
@@ -862,7 +959,8 @@ xfs_init_zone(
struct xfs_open_zone *oz;
atomic_inc(&rtg_group(rtg)->xg_active_ref);
- oz = xfs_init_open_zone(rtg, write_pointer, false);
+ oz = xfs_init_open_zone(rtg, write_pointer, WRITE_LIFE_NOT_SET,
+ false);
list_add_tail(&oz->oz_entry, &zi->zi_open_zones);
zi->zi_nr_open_zones++;