diff options
Diffstat (limited to 'mm/page-writeback.c')
-rw-r--r-- | mm/page-writeback.c | 319 |
1 files changed, 173 insertions, 146 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 8a1c920901292..acff24e9fae4a 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -139,6 +139,8 @@ struct dirty_throttle_control { unsigned long wb_bg_thresh; unsigned long pos_ratio; + bool freerun; + bool dirty_exceeded; }; /* @@ -859,6 +861,34 @@ static void mdtc_calc_avail(struct dirty_throttle_control *mdtc, mdtc->avail = filepages + min(headroom, other_clean); } +static inline bool dtc_is_global(struct dirty_throttle_control *dtc) +{ + return mdtc_gdtc(dtc) == NULL; +} + +/* + * Dirty background will ignore pages being written as we're trying to + * decide whether to put more under writeback. + */ +static void domain_dirty_avail(struct dirty_throttle_control *dtc, + bool include_writeback) +{ + if (dtc_is_global(dtc)) { + dtc->avail = global_dirtyable_memory(); + dtc->dirty = global_node_page_state(NR_FILE_DIRTY); + if (include_writeback) + dtc->dirty += global_node_page_state(NR_WRITEBACK); + } else { + unsigned long filepages = 0, headroom = 0, writeback = 0; + + mem_cgroup_wb_stats(dtc->wb, &filepages, &headroom, &dtc->dirty, + &writeback); + if (include_writeback) + dtc->dirty += writeback; + mdtc_calc_avail(dtc, filepages, headroom); + } +} + /** * __wb_calc_thresh - @wb's share of dirty threshold * @dtc: dirty_throttle_context of interest @@ -921,16 +951,9 @@ unsigned long cgwb_calc_thresh(struct bdi_writeback *wb) { struct dirty_throttle_control gdtc = { GDTC_INIT_NO_WB }; struct dirty_throttle_control mdtc = { MDTC_INIT(wb, &gdtc) }; - unsigned long filepages = 0, headroom = 0, writeback = 0; - gdtc.avail = global_dirtyable_memory(); - gdtc.dirty = global_node_page_state(NR_FILE_DIRTY) + - global_node_page_state(NR_WRITEBACK); - - mem_cgroup_wb_stats(wb, &filepages, &headroom, - &mdtc.dirty, &writeback); - mdtc.dirty += writeback; - mdtc_calc_avail(&mdtc, filepages, headroom); + domain_dirty_avail(&gdtc, true); + domain_dirty_avail(&mdtc, true); domain_dirty_limits(&mdtc); return __wb_calc_thresh(&mdtc, mdtc.thresh); @@ -1703,6 +1726,100 @@ static inline void wb_dirty_limits(struct dirty_throttle_control *dtc) } } +static unsigned long domain_poll_intv(struct dirty_throttle_control *dtc, + bool strictlimit) +{ + unsigned long dirty, thresh; + + if (strictlimit) { + dirty = dtc->wb_dirty; + thresh = dtc->wb_thresh; + } else { + dirty = dtc->dirty; + thresh = dtc->thresh; + } + + return dirty_poll_interval(dirty, thresh); +} + +/* + * Throttle it only when the background writeback cannot catch-up. This avoids + * (excessively) small writeouts when the wb limits are ramping up in case of + * !strictlimit. + * + * In strictlimit case make decision based on the wb counters and limits. Small + * writeouts when the wb limits are ramping up are the price we consciously pay + * for strictlimit-ing. + */ +static void domain_dirty_freerun(struct dirty_throttle_control *dtc, + bool strictlimit) +{ + unsigned long dirty, thresh, bg_thresh; + + if (unlikely(strictlimit)) { + wb_dirty_limits(dtc); + dirty = dtc->wb_dirty; + thresh = dtc->wb_thresh; + bg_thresh = dtc->wb_bg_thresh; + } else { + dirty = dtc->dirty; + thresh = dtc->thresh; + bg_thresh = dtc->bg_thresh; + } + dtc->freerun = dirty <= dirty_freerun_ceiling(thresh, bg_thresh); +} + +static void balance_domain_limits(struct dirty_throttle_control *dtc, + bool strictlimit) +{ + domain_dirty_avail(dtc, true); + domain_dirty_limits(dtc); + domain_dirty_freerun(dtc, strictlimit); +} + +static void wb_dirty_freerun(struct dirty_throttle_control *dtc, + bool strictlimit) +{ + dtc->freerun = false; + + /* was already handled in domain_dirty_freerun */ + if (strictlimit) + return; + + wb_dirty_limits(dtc); + /* + * LOCAL_THROTTLE tasks must not be throttled when below the per-wb + * freerun ceiling. + */ + if (!(current->flags & PF_LOCAL_THROTTLE)) + return; + + dtc->freerun = dtc->wb_dirty < + dirty_freerun_ceiling(dtc->wb_thresh, dtc->wb_bg_thresh); +} + +static inline void wb_dirty_exceeded(struct dirty_throttle_control *dtc, + bool strictlimit) +{ + dtc->dirty_exceeded = (dtc->wb_dirty > dtc->wb_thresh) && + ((dtc->dirty > dtc->thresh) || strictlimit); +} + +/* + * The limits fields dirty_exceeded and pos_ratio won't be updated if wb is + * in freerun state. Please don't use these invalid fields in freerun case. + */ +static void balance_wb_limits(struct dirty_throttle_control *dtc, + bool strictlimit) +{ + wb_dirty_freerun(dtc, strictlimit); + if (dtc->freerun) + return; + + wb_dirty_exceeded(dtc, strictlimit); + wb_position_ratio(dtc); +} + /* * balance_dirty_pages() must be called by processes which are generating dirty * data. It looks at the number of dirty pages in the machine and will force @@ -1725,7 +1842,6 @@ static int balance_dirty_pages(struct bdi_writeback *wb, long max_pause; long min_pause; int nr_dirtied_pause; - bool dirty_exceeded = false; unsigned long task_ratelimit; unsigned long dirty_ratelimit; struct backing_dev_info *bdi = wb->bdi; @@ -1735,53 +1851,16 @@ static int balance_dirty_pages(struct bdi_writeback *wb, for (;;) { unsigned long now = jiffies; - unsigned long dirty, thresh, bg_thresh; - unsigned long m_dirty = 0; /* stop bogus uninit warnings */ - unsigned long m_thresh = 0; - unsigned long m_bg_thresh = 0; nr_dirty = global_node_page_state(NR_FILE_DIRTY); - gdtc->avail = global_dirtyable_memory(); - gdtc->dirty = nr_dirty + global_node_page_state(NR_WRITEBACK); - - domain_dirty_limits(gdtc); - - if (unlikely(strictlimit)) { - wb_dirty_limits(gdtc); - - dirty = gdtc->wb_dirty; - thresh = gdtc->wb_thresh; - bg_thresh = gdtc->wb_bg_thresh; - } else { - dirty = gdtc->dirty; - thresh = gdtc->thresh; - bg_thresh = gdtc->bg_thresh; - } + balance_domain_limits(gdtc, strictlimit); if (mdtc) { - unsigned long filepages, headroom, writeback; - /* * If @wb belongs to !root memcg, repeat the same * basic calculations for the memcg domain. */ - mem_cgroup_wb_stats(wb, &filepages, &headroom, - &mdtc->dirty, &writeback); - mdtc->dirty += writeback; - mdtc_calc_avail(mdtc, filepages, headroom); - - domain_dirty_limits(mdtc); - - if (unlikely(strictlimit)) { - wb_dirty_limits(mdtc); - m_dirty = mdtc->wb_dirty; - m_thresh = mdtc->wb_thresh; - m_bg_thresh = mdtc->wb_bg_thresh; - } else { - m_dirty = mdtc->dirty; - m_thresh = mdtc->thresh; - m_bg_thresh = mdtc->bg_thresh; - } + balance_domain_limits(mdtc, strictlimit); } /* @@ -1798,31 +1877,21 @@ static int balance_dirty_pages(struct bdi_writeback *wb, wb_start_background_writeback(wb); /* - * Throttle it only when the background writeback cannot - * catch-up. This avoids (excessively) small writeouts - * when the wb limits are ramping up in case of !strictlimit. - * - * In strictlimit case make decision based on the wb counters - * and limits. Small writeouts when the wb limits are ramping - * up are the price we consciously pay for strictlimit-ing. - * * If memcg domain is in effect, @dirty should be under * both global and memcg freerun ceilings. */ - if (dirty <= dirty_freerun_ceiling(thresh, bg_thresh) && - (!mdtc || - m_dirty <= dirty_freerun_ceiling(m_thresh, m_bg_thresh))) { + if (gdtc->freerun && (!mdtc || mdtc->freerun)) { unsigned long intv; unsigned long m_intv; free_running: - intv = dirty_poll_interval(dirty, thresh); + intv = domain_poll_intv(gdtc, strictlimit); m_intv = ULONG_MAX; current->dirty_paused_when = now; current->nr_dirtied = 0; if (mdtc) - m_intv = dirty_poll_interval(m_dirty, m_thresh); + m_intv = domain_poll_intv(mdtc, strictlimit); current->nr_dirtied_pause = min(intv, m_intv); break; } @@ -1837,24 +1906,9 @@ free_running: * Calculate global domain's pos_ratio and select the * global dtc by default. */ - if (!strictlimit) { - wb_dirty_limits(gdtc); - - if ((current->flags & PF_LOCAL_THROTTLE) && - gdtc->wb_dirty < - dirty_freerun_ceiling(gdtc->wb_thresh, - gdtc->wb_bg_thresh)) - /* - * LOCAL_THROTTLE tasks must not be throttled - * when below the per-wb freerun ceiling. - */ - goto free_running; - } - - dirty_exceeded = (gdtc->wb_dirty > gdtc->wb_thresh) && - ((gdtc->dirty > gdtc->thresh) || strictlimit); - - wb_position_ratio(gdtc); + balance_wb_limits(gdtc, strictlimit); + if (gdtc->freerun) + goto free_running; sdtc = gdtc; if (mdtc) { @@ -1864,31 +1918,15 @@ free_running: * both global and memcg domains. Choose the one * w/ lower pos_ratio. */ - if (!strictlimit) { - wb_dirty_limits(mdtc); - - if ((current->flags & PF_LOCAL_THROTTLE) && - mdtc->wb_dirty < - dirty_freerun_ceiling(mdtc->wb_thresh, - mdtc->wb_bg_thresh)) - /* - * LOCAL_THROTTLE tasks must not be - * throttled when below the per-wb - * freerun ceiling. - */ - goto free_running; - } - dirty_exceeded |= (mdtc->wb_dirty > mdtc->wb_thresh) && - ((mdtc->dirty > mdtc->thresh) || strictlimit); - - wb_position_ratio(mdtc); + balance_wb_limits(mdtc, strictlimit); + if (mdtc->freerun) + goto free_running; if (mdtc->pos_ratio < gdtc->pos_ratio) sdtc = mdtc; } - if (dirty_exceeded != wb->dirty_exceeded) - wb->dirty_exceeded = dirty_exceeded; - + wb->dirty_exceeded = gdtc->dirty_exceeded || + (mdtc && mdtc->dirty_exceeded); if (time_is_before_jiffies(READ_ONCE(wb->bw_time_stamp) + BANDWIDTH_INTERVAL)) __wb_update_bandwidth(gdtc, mdtc, true); @@ -2109,6 +2147,35 @@ void balance_dirty_pages_ratelimited(struct address_space *mapping) } EXPORT_SYMBOL(balance_dirty_pages_ratelimited); +/* + * Similar to wb_dirty_limits, wb_bg_dirty_limits also calculates dirty + * and thresh, but it's for background writeback. + */ +static void wb_bg_dirty_limits(struct dirty_throttle_control *dtc) +{ + struct bdi_writeback *wb = dtc->wb; + + dtc->wb_bg_thresh = __wb_calc_thresh(dtc, dtc->bg_thresh); + if (dtc->wb_bg_thresh < 2 * wb_stat_error()) + dtc->wb_dirty = wb_stat_sum(wb, WB_RECLAIMABLE); + else + dtc->wb_dirty = wb_stat(wb, WB_RECLAIMABLE); +} + +static bool domain_over_bg_thresh(struct dirty_throttle_control *dtc) +{ + domain_dirty_avail(dtc, false); + domain_dirty_limits(dtc); + if (dtc->dirty > dtc->bg_thresh) + return true; + + wb_bg_dirty_limits(dtc); + if (dtc->wb_dirty > dtc->wb_bg_thresh) + return true; + + return false; +} + /** * wb_over_bg_thresh - does @wb need to be written back? * @wb: bdi_writeback of interest @@ -2120,54 +2187,14 @@ EXPORT_SYMBOL(balance_dirty_pages_ratelimited); */ bool wb_over_bg_thresh(struct bdi_writeback *wb) { - struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) }; - struct dirty_throttle_control mdtc_stor = { MDTC_INIT(wb, &gdtc_stor) }; - struct dirty_throttle_control * const gdtc = &gdtc_stor; - struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ? - &mdtc_stor : NULL; - unsigned long reclaimable; - unsigned long thresh; - - /* - * Similar to balance_dirty_pages() but ignores pages being written - * as we're trying to decide whether to put more under writeback. - */ - gdtc->avail = global_dirtyable_memory(); - gdtc->dirty = global_node_page_state(NR_FILE_DIRTY); - domain_dirty_limits(gdtc); - - if (gdtc->dirty > gdtc->bg_thresh) - return true; - - thresh = __wb_calc_thresh(gdtc, gdtc->bg_thresh); - if (thresh < 2 * wb_stat_error()) - reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE); - else - reclaimable = wb_stat(wb, WB_RECLAIMABLE); + struct dirty_throttle_control gdtc = { GDTC_INIT(wb) }; + struct dirty_throttle_control mdtc = { MDTC_INIT(wb, &gdtc) }; - if (reclaimable > thresh) + if (domain_over_bg_thresh(&gdtc)) return true; - if (mdtc) { - unsigned long filepages, headroom, writeback; - - mem_cgroup_wb_stats(wb, &filepages, &headroom, &mdtc->dirty, - &writeback); - mdtc_calc_avail(mdtc, filepages, headroom); - domain_dirty_limits(mdtc); /* ditto, ignore writeback */ - - if (mdtc->dirty > mdtc->bg_thresh) - return true; - - thresh = __wb_calc_thresh(mdtc, mdtc->bg_thresh); - if (thresh < 2 * wb_stat_error()) - reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE); - else - reclaimable = wb_stat(wb, WB_RECLAIMABLE); - - if (reclaimable > thresh) - return true; - } + if (mdtc_valid(&mdtc)) + return domain_over_bg_thresh(&mdtc); return false; } |