summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/sched.h1
-rw-r--r--include/trace/events/writeback.h14
-rw-r--r--kernel/fork.c1
-rw-r--r--mm/page-writeback.c36
4 files changed, 45 insertions, 7 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1c4f3e9b9bc..984c3b29597 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1527,6 +1527,7 @@ struct task_struct {
*/
int nr_dirtied;
int nr_dirtied_pause;
+ unsigned long dirty_paused_when; /* start of a write-and-pause period */
#ifdef CONFIG_LATENCYTOP
int latency_record_count;
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 99d1d0decf8..8588a891802 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -300,12 +300,13 @@ TRACE_EVENT(balance_dirty_pages,
unsigned long dirty_ratelimit,
unsigned long task_ratelimit,
unsigned long dirtied,
+ unsigned long period,
long pause,
unsigned long start_time),
TP_ARGS(bdi, thresh, bg_thresh, dirty, bdi_thresh, bdi_dirty,
dirty_ratelimit, task_ratelimit,
- dirtied, pause, start_time),
+ dirtied, period, pause, start_time),
TP_STRUCT__entry(
__array( char, bdi, 32)
@@ -320,6 +321,8 @@ TRACE_EVENT(balance_dirty_pages,
__field(unsigned int, dirtied_pause)
__field(unsigned long, paused)
__field( long, pause)
+ __field(unsigned long, period)
+ __field( long, think)
),
TP_fast_assign(
@@ -336,6 +339,9 @@ TRACE_EVENT(balance_dirty_pages,
__entry->task_ratelimit = KBps(task_ratelimit);
__entry->dirtied = dirtied;
__entry->dirtied_pause = current->nr_dirtied_pause;
+ __entry->think = current->dirty_paused_when == 0 ? 0 :
+ (long)(jiffies - current->dirty_paused_when) * 1000/HZ;
+ __entry->period = period * 1000 / HZ;
__entry->pause = pause * 1000 / HZ;
__entry->paused = (jiffies - start_time) * 1000 / HZ;
),
@@ -346,7 +352,7 @@ TRACE_EVENT(balance_dirty_pages,
"bdi_setpoint=%lu bdi_dirty=%lu "
"dirty_ratelimit=%lu task_ratelimit=%lu "
"dirtied=%u dirtied_pause=%u "
- "paused=%lu pause=%ld",
+ "paused=%lu pause=%ld period=%lu think=%ld",
__entry->bdi,
__entry->limit,
__entry->setpoint,
@@ -358,7 +364,9 @@ TRACE_EVENT(balance_dirty_pages,
__entry->dirtied,
__entry->dirtied_pause,
__entry->paused, /* ms */
- __entry->pause /* ms */
+ __entry->pause, /* ms */
+ __entry->period, /* ms */
+ __entry->think /* ms */
)
);
diff --git a/kernel/fork.c b/kernel/fork.c
index da4a6a10d08..f8668cf6a32 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1296,6 +1296,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->nr_dirtied = 0;
p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10);
+ p->dirty_paused_when = 0;
/*
* Ok, make it visible to the rest of the system.
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 96b3e7aa705..49193215582 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1016,6 +1016,7 @@ static void balance_dirty_pages(struct address_space *mapping,
unsigned long background_thresh;
unsigned long dirty_thresh;
unsigned long bdi_thresh;
+ long period;
long pause = 0;
long uninitialized_var(max_pause);
bool dirty_exceeded = false;
@@ -1026,6 +1027,8 @@ static void balance_dirty_pages(struct address_space *mapping,
unsigned long start_time = jiffies;
for (;;) {
+ unsigned long now = jiffies;
+
/*
* Unstable writes are a feature of certain networked
* filesystems (i.e. NFS) in which data may have been
@@ -1045,8 +1048,11 @@ static void balance_dirty_pages(struct address_space *mapping,
*/
freerun = dirty_freerun_ceiling(dirty_thresh,
background_thresh);
- if (nr_dirty <= freerun)
+ if (nr_dirty <= freerun) {
+ current->dirty_paused_when = now;
+ current->nr_dirtied = 0;
break;
+ }
if (unlikely(!writeback_in_progress(bdi)))
bdi_start_background_writeback(bdi);
@@ -1104,10 +1110,21 @@ static void balance_dirty_pages(struct address_space *mapping,
task_ratelimit = ((u64)dirty_ratelimit * pos_ratio) >>
RATELIMIT_CALC_SHIFT;
if (unlikely(task_ratelimit == 0)) {
+ period = max_pause;
pause = max_pause;
goto pause;
}
- pause = HZ * pages_dirtied / task_ratelimit;
+ period = HZ * pages_dirtied / task_ratelimit;
+ pause = period;
+ if (current->dirty_paused_when)
+ pause -= now - current->dirty_paused_when;
+ /*
+ * For less than 1s think time (ext3/4 may block the dirtier
+ * for up to 800ms from time to time on 1-HDD; so does xfs,
+ * however at much less frequency), try to compensate it in
+ * future periods by updating the virtual time; otherwise just
+ * do a reset, as it may be a light dirtier.
+ */
if (unlikely(pause <= 0)) {
trace_balance_dirty_pages(bdi,
dirty_thresh,
@@ -1118,8 +1135,16 @@ static void balance_dirty_pages(struct address_space *mapping,
dirty_ratelimit,
task_ratelimit,
pages_dirtied,
+ period,
pause,
start_time);
+ if (pause < -HZ) {
+ current->dirty_paused_when = now;
+ current->nr_dirtied = 0;
+ } else if (period) {
+ current->dirty_paused_when += period;
+ current->nr_dirtied = 0;
+ }
pause = 1; /* avoid resetting nr_dirtied_pause below */
break;
}
@@ -1135,11 +1160,15 @@ pause:
dirty_ratelimit,
task_ratelimit,
pages_dirtied,
+ period,
pause,
start_time);
__set_current_state(TASK_KILLABLE);
io_schedule_timeout(pause);
+ current->dirty_paused_when = now + pause;
+ current->nr_dirtied = 0;
+
/*
* This is typically equal to (nr_dirty < dirty_thresh) and can
* also keep "1000+ dd on a slow USB stick" under control.
@@ -1167,11 +1196,10 @@ pause:
if (!dirty_exceeded && bdi->dirty_exceeded)
bdi->dirty_exceeded = 0;
- current->nr_dirtied = 0;
if (pause == 0) { /* in freerun area */
current->nr_dirtied_pause =
dirty_poll_interval(nr_dirty, dirty_thresh);
- } else if (pause <= max_pause / 4 &&
+ } else if (period <= max_pause / 4 &&
pages_dirtied >= current->nr_dirtied_pause) {
current->nr_dirtied_pause = clamp_val(
dirty_ratelimit * (max_pause / 2) / HZ,