diff options
-rw-r--r-- | fs/bcachefs/bcachefs.h | 1 | ||||
-rw-r--r-- | fs/bcachefs/bcachefs_format.h | 1 | ||||
-rw-r--r-- | fs/bcachefs/error.c | 34 | ||||
-rw-r--r-- | fs/bcachefs/error.h | 7 | ||||
-rw-r--r-- | fs/bcachefs/opts.h | 5 | ||||
-rw-r--r-- | fs/bcachefs/super-io.c | 3 |
6 files changed, 41 insertions, 10 deletions
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index d2c3f59a668f2..8abefc9940160 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -536,6 +536,7 @@ struct bch_dev { */ struct bch_member_cpu mi; atomic64_t errors[BCH_MEMBER_ERROR_NR]; + unsigned long write_errors_start; __uuid_t uuid; char name[BDEVNAME_SIZE]; diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index a6cc817ccd87f..7a5b0d211a82a 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -860,6 +860,7 @@ LE64_BITMASK(BCH_SB_VERSION_INCOMPAT, struct bch_sb, flags[5], 32, 48); LE64_BITMASK(BCH_SB_VERSION_INCOMPAT_ALLOWED, struct bch_sb, flags[5], 48, 64); LE64_BITMASK(BCH_SB_SHARD_INUMS_NBITS, struct bch_sb, flags[6], 0, 4); +LE64_BITMASK(BCH_SB_WRITE_ERROR_TIMEOUT,struct bch_sb, flags[6], 4, 14); static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb) { diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index 3f93a5a6bbfa2..6d68c89a49b21 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -54,25 +54,41 @@ void bch2_io_error_work(struct work_struct *work) { struct bch_dev *ca = container_of(work, struct bch_dev, io_error_work); struct bch_fs *c = ca->fs; - bool dev; + + /* XXX: if it's reads or checksums that are failing, set it to failed */ down_write(&c->state_lock); - dev = bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_ro, - BCH_FORCE_IF_DEGRADED); - if (dev - ? __bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_ro, - BCH_FORCE_IF_DEGRADED) - : bch2_fs_emergency_read_only(c)) + unsigned long write_errors_start = READ_ONCE(ca->write_errors_start); + + if (write_errors_start && + time_after(jiffies, + write_errors_start + c->opts.write_error_timeout * HZ)) { + if (ca->mi.state >= BCH_MEMBER_STATE_ro) + goto out; + + bool dev = !__bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_ro, + BCH_FORCE_IF_DEGRADED); + bch_err(ca, - "too many IO errors, setting %s RO", + "writes erroring for %u seconds, setting %s ro", + c->opts.write_error_timeout, dev ? "device" : "filesystem"); + if (!dev) + bch2_fs_emergency_read_only(c); + + } +out: up_write(&c->state_lock); } void bch2_io_error(struct bch_dev *ca, enum bch_member_error_type type) { atomic64_inc(&ca->errors[type]); - //queue_work(system_long_wq, &ca->io_error_work); + + if (type == BCH_MEMBER_ERROR_write && !ca->write_errors_start) + ca->write_errors_start = jiffies; + + queue_work(system_long_wq, &ca->io_error_work); } enum ask_yn { diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h index a57b9f18d0602..7d3f0e2a5fd6f 100644 --- a/fs/bcachefs/error.h +++ b/fs/bcachefs/error.h @@ -226,8 +226,13 @@ static inline void bch2_account_io_success_fail(struct bch_dev *ca, enum bch_member_error_type type, bool success) { - if (!success) + if (likely(success)) { + if (type == BCH_MEMBER_ERROR_write && + ca->write_errors_start) + ca->write_errors_start = 0; + } else { bch2_io_error(ca, type); + } } static inline void bch2_account_io_completion(struct bch_dev *ca, diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 071a92ec8a14c..afb89d318d24e 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -145,6 +145,11 @@ enum fsck_err_opts { OPT_STR(bch2_error_actions), \ BCH_SB_ERROR_ACTION, BCH_ON_ERROR_fix_safe, \ NULL, "Action to take on filesystem error") \ + x(write_error_timeout, u16, \ + OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ + OPT_UINT(1, 300), \ + BCH_SB_WRITE_ERROR_TIMEOUT, 30, \ + NULL, "Number of consecutive write errors allowed before kicking out a device")\ x(metadata_replicas, u8, \ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_UINT(1, BCH_REPLICAS_MAX), \ diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 918e4e7704dde..ee32d043414ae 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -454,6 +454,9 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_disk_accounting_v2) SET_BCH_SB_PROMOTE_WHOLE_EXTENTS(sb, true); + + if (!BCH_SB_WRITE_ERROR_TIMEOUT(sb)) + SET_BCH_SB_WRITE_ERROR_TIMEOUT(sb, 30); } #ifdef __KERNEL__ |