summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/bcachefs/bcachefs.h1
-rw-r--r--fs/bcachefs/bcachefs_format.h1
-rw-r--r--fs/bcachefs/error.c34
-rw-r--r--fs/bcachefs/error.h7
-rw-r--r--fs/bcachefs/opts.h5
-rw-r--r--fs/bcachefs/super-io.c3
6 files changed, 41 insertions, 10 deletions
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index d2c3f59a668f2..8abefc9940160 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -536,6 +536,7 @@ struct bch_dev {
*/
struct bch_member_cpu mi;
atomic64_t errors[BCH_MEMBER_ERROR_NR];
+ unsigned long write_errors_start;
__uuid_t uuid;
char name[BDEVNAME_SIZE];
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index a6cc817ccd87f..7a5b0d211a82a 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -860,6 +860,7 @@ LE64_BITMASK(BCH_SB_VERSION_INCOMPAT, struct bch_sb, flags[5], 32, 48);
LE64_BITMASK(BCH_SB_VERSION_INCOMPAT_ALLOWED,
struct bch_sb, flags[5], 48, 64);
LE64_BITMASK(BCH_SB_SHARD_INUMS_NBITS, struct bch_sb, flags[6], 0, 4);
+LE64_BITMASK(BCH_SB_WRITE_ERROR_TIMEOUT,struct bch_sb, flags[6], 4, 14);
static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb)
{
diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c
index 3f93a5a6bbfa2..6d68c89a49b21 100644
--- a/fs/bcachefs/error.c
+++ b/fs/bcachefs/error.c
@@ -54,25 +54,41 @@ void bch2_io_error_work(struct work_struct *work)
{
struct bch_dev *ca = container_of(work, struct bch_dev, io_error_work);
struct bch_fs *c = ca->fs;
- bool dev;
+
+ /* XXX: if it's reads or checksums that are failing, set it to failed */
down_write(&c->state_lock);
- dev = bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_ro,
- BCH_FORCE_IF_DEGRADED);
- if (dev
- ? __bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_ro,
- BCH_FORCE_IF_DEGRADED)
- : bch2_fs_emergency_read_only(c))
+ unsigned long write_errors_start = READ_ONCE(ca->write_errors_start);
+
+ if (write_errors_start &&
+ time_after(jiffies,
+ write_errors_start + c->opts.write_error_timeout * HZ)) {
+ if (ca->mi.state >= BCH_MEMBER_STATE_ro)
+ goto out;
+
+ bool dev = !__bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_ro,
+ BCH_FORCE_IF_DEGRADED);
+
bch_err(ca,
- "too many IO errors, setting %s RO",
+ "writes erroring for %u seconds, setting %s ro",
+ c->opts.write_error_timeout,
dev ? "device" : "filesystem");
+ if (!dev)
+ bch2_fs_emergency_read_only(c);
+
+ }
+out:
up_write(&c->state_lock);
}
void bch2_io_error(struct bch_dev *ca, enum bch_member_error_type type)
{
atomic64_inc(&ca->errors[type]);
- //queue_work(system_long_wq, &ca->io_error_work);
+
+ if (type == BCH_MEMBER_ERROR_write && !ca->write_errors_start)
+ ca->write_errors_start = jiffies;
+
+ queue_work(system_long_wq, &ca->io_error_work);
}
enum ask_yn {
diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h
index a57b9f18d0602..7d3f0e2a5fd6f 100644
--- a/fs/bcachefs/error.h
+++ b/fs/bcachefs/error.h
@@ -226,8 +226,13 @@ static inline void bch2_account_io_success_fail(struct bch_dev *ca,
enum bch_member_error_type type,
bool success)
{
- if (!success)
+ if (likely(success)) {
+ if (type == BCH_MEMBER_ERROR_write &&
+ ca->write_errors_start)
+ ca->write_errors_start = 0;
+ } else {
bch2_io_error(ca, type);
+ }
}
static inline void bch2_account_io_completion(struct bch_dev *ca,
diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h
index 071a92ec8a14c..afb89d318d24e 100644
--- a/fs/bcachefs/opts.h
+++ b/fs/bcachefs/opts.h
@@ -145,6 +145,11 @@ enum fsck_err_opts {
OPT_STR(bch2_error_actions), \
BCH_SB_ERROR_ACTION, BCH_ON_ERROR_fix_safe, \
NULL, "Action to take on filesystem error") \
+ x(write_error_timeout, u16, \
+ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
+ OPT_UINT(1, 300), \
+ BCH_SB_WRITE_ERROR_TIMEOUT, 30, \
+ NULL, "Number of consecutive write errors allowed before kicking out a device")\
x(metadata_replicas, u8, \
OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
OPT_UINT(1, BCH_REPLICAS_MAX), \
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index 918e4e7704dde..ee32d043414ae 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -454,6 +454,9 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb,
if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_disk_accounting_v2)
SET_BCH_SB_PROMOTE_WHOLE_EXTENTS(sb, true);
+
+ if (!BCH_SB_WRITE_ERROR_TIMEOUT(sb))
+ SET_BCH_SB_WRITE_ERROR_TIMEOUT(sb, 30);
}
#ifdef __KERNEL__