summaryrefslogtreecommitdiff
path: root/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/libxfs/xfs_ag.h15
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c8
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c8
-rw-r--r--fs/xfs/libxfs/xfs_btree.h1
-rw-r--r--fs/xfs/libxfs/xfs_dir2_leaf.c9
-rw-r--r--fs/xfs/libxfs/xfs_errortag.h18
-rw-r--r--fs/xfs/libxfs/xfs_format.h22
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c2
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h60
-rw-r--r--fs/xfs/libxfs/xfs_refcount.c432
-rw-r--r--fs/xfs/libxfs/xfs_refcount.h40
-rw-r--r--fs/xfs/libxfs/xfs_refcount_btree.c15
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c9
-rw-r--r--fs/xfs/libxfs/xfs_sb.c4
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c4
-rw-r--r--fs/xfs/libxfs/xfs_types.h30
-rw-r--r--fs/xfs/scrub/agheader.c47
-rw-r--r--fs/xfs/scrub/agheader_repair.c81
-rw-r--r--fs/xfs/scrub/alloc.c4
-rw-r--r--fs/xfs/scrub/attr.c11
-rw-r--r--fs/xfs/scrub/bitmap.c11
-rw-r--r--fs/xfs/scrub/bmap.c147
-rw-r--r--fs/xfs/scrub/btree.c14
-rw-r--r--fs/xfs/scrub/common.c48
-rw-r--r--fs/xfs/scrub/common.h2
-rw-r--r--fs/xfs/scrub/dabtree.c4
-rw-r--r--fs/xfs/scrub/dir.c10
-rw-r--r--fs/xfs/scrub/fscounters.c109
-rw-r--r--fs/xfs/scrub/ialloc.c5
-rw-r--r--fs/xfs/scrub/inode.c2
-rw-r--r--fs/xfs/scrub/quota.c8
-rw-r--r--fs/xfs/scrub/refcount.c84
-rw-r--r--fs/xfs/scrub/repair.c51
-rw-r--r--fs/xfs/scrub/scrub.c6
-rw-r--r--fs/xfs/scrub/scrub.h18
-rw-r--r--fs/xfs/scrub/symlink.c2
-rw-r--r--fs/xfs/xfs_acl.c3
-rw-r--r--fs/xfs/xfs_acl.h2
-rw-r--r--fs/xfs/xfs_aops.c32
-rw-r--r--fs/xfs/xfs_attr_item.c67
-rw-r--r--fs/xfs/xfs_bmap_item.c54
-rw-r--r--fs/xfs/xfs_bmap_util.c10
-rw-r--r--fs/xfs/xfs_bmap_util.h2
-rw-r--r--fs/xfs/xfs_buf.c1
-rw-r--r--fs/xfs/xfs_buf_item.c2
-rw-r--r--fs/xfs/xfs_error.c57
-rw-r--r--fs/xfs/xfs_error.h13
-rw-r--r--fs/xfs/xfs_extfree_item.c94
-rw-r--r--fs/xfs/xfs_extfree_item.h16
-rw-r--r--fs/xfs/xfs_file.c9
-rw-r--r--fs/xfs/xfs_fsmap.c4
-rw-r--r--fs/xfs/xfs_icache.c6
-rw-r--r--fs/xfs/xfs_inode.c4
-rw-r--r--fs/xfs/xfs_ioctl.c4
-rw-r--r--fs/xfs/xfs_iomap.c191
-rw-r--r--fs/xfs/xfs_iomap.h6
-rw-r--r--fs/xfs/xfs_iops.c20
-rw-r--r--fs/xfs/xfs_log.c46
-rw-r--r--fs/xfs/xfs_log_recover.c10
-rw-r--r--fs/xfs/xfs_mount.c15
-rw-r--r--fs/xfs/xfs_ondisk.h23
-rw-r--r--fs/xfs/xfs_pnfs.c6
-rw-r--r--fs/xfs/xfs_qm.c16
-rw-r--r--fs/xfs/xfs_refcount_item.c57
-rw-r--r--fs/xfs/xfs_reflink.c8
-rw-r--r--fs/xfs/xfs_rmap_item.c70
-rw-r--r--fs/xfs/xfs_rtalloc.c60
-rw-r--r--fs/xfs/xfs_super.c14
-rw-r--r--fs/xfs/xfs_sysfs.h7
-rw-r--r--fs/xfs/xfs_trace.c2
-rw-r--r--fs/xfs/xfs_trace.h134
-rw-r--r--fs/xfs/xfs_trans_ail.c7
-rw-r--r--fs/xfs/xfs_xattr.c2
73 files changed, 1701 insertions, 714 deletions
diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h
index 517a138faa66..191b22b9a35b 100644
--- a/fs/xfs/libxfs/xfs_ag.h
+++ b/fs/xfs/libxfs/xfs_ag.h
@@ -133,6 +133,21 @@ xfs_verify_agbno(struct xfs_perag *pag, xfs_agblock_t agbno)
return true;
}
+static inline bool
+xfs_verify_agbext(
+ struct xfs_perag *pag,
+ xfs_agblock_t agbno,
+ xfs_agblock_t len)
+{
+ if (agbno + len <= agbno)
+ return false;
+
+ if (!xfs_verify_agbno(pag, agbno))
+ return false;
+
+ return xfs_verify_agbno(pag, agbno + len - 1);
+}
+
/*
* Verify that an AG inode number pointer neither points outside the AG
* nor points at static metadata.
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 6261599bb389..989cf341779b 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -263,11 +263,7 @@ xfs_alloc_get_rec(
goto out_bad_rec;
/* check for valid extent range, including overflow */
- if (!xfs_verify_agbno(pag, *bno))
- goto out_bad_rec;
- if (*bno > *bno + *len)
- goto out_bad_rec;
- if (!xfs_verify_agbno(pag, *bno + *len - 1))
+ if (!xfs_verify_agbext(pag, *bno, *len))
goto out_bad_rec;
return 0;
@@ -1520,7 +1516,7 @@ xfs_alloc_ag_vextent_lastblock(
#ifdef DEBUG
/* Randomly don't execute the first algorithm. */
- if (prandom_u32_max(2))
+ if (get_random_u32_below(2))
return 0;
#endif
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 49d0d4ea63fc..0d56a8d862e8 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4058,7 +4058,7 @@ xfs_bmap_alloc_userdata(
* the busy list.
*/
bma->datatype = XFS_ALLOC_NOBUSY;
- if (whichfork == XFS_DATA_FORK) {
+ if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) {
bma->datatype |= XFS_ALLOC_USERDATA;
if (bma->offset == 0)
bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
@@ -4551,7 +4551,8 @@ xfs_bmapi_convert_delalloc(
* the extent. Just return the real extent at this offset.
*/
if (!isnullstartblock(bma.got.br_startblock)) {
- xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags);
+ xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags,
+ xfs_iomap_inode_sequence(ip, flags));
*seq = READ_ONCE(ifp->if_seq);
goto out_trans_cancel;
}
@@ -4599,7 +4600,8 @@ xfs_bmapi_convert_delalloc(
XFS_STATS_INC(mp, xs_xstrat_quick);
ASSERT(!isnullstartblock(bma.got.br_startblock));
- xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags);
+ xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags,
+ xfs_iomap_inode_sequence(ip, flags));
*seq = READ_ONCE(ifp->if_seq);
if (whichfork == XFS_COW_FORK)
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index eef27858a013..29c4b4ccb909 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -556,7 +556,6 @@ xfs_btree_islastblock(
struct xfs_buf *bp;
block = xfs_btree_get_block(cur, level, &bp);
- ASSERT(block && xfs_btree_check_block(cur, block, level, bp) == 0);
if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK);
diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
index d9b66306a9a7..cb9e950a911d 100644
--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
@@ -146,6 +146,8 @@ xfs_dir3_leaf_check_int(
xfs_dir2_leaf_tail_t *ltp;
int stale;
int i;
+ bool isleaf1 = (hdr->magic == XFS_DIR2_LEAF1_MAGIC ||
+ hdr->magic == XFS_DIR3_LEAF1_MAGIC);
ltp = xfs_dir2_leaf_tail_p(geo, leaf);
@@ -158,8 +160,7 @@ xfs_dir3_leaf_check_int(
return __this_address;
/* Leaves and bests don't overlap in leaf format. */
- if ((hdr->magic == XFS_DIR2_LEAF1_MAGIC ||
- hdr->magic == XFS_DIR3_LEAF1_MAGIC) &&
+ if (isleaf1 &&
(char *)&hdr->ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp))
return __this_address;
@@ -175,6 +176,10 @@ xfs_dir3_leaf_check_int(
}
if (hdr->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
stale++;
+ if (isleaf1 && xfs_dir2_dataptr_to_db(geo,
+ be32_to_cpu(hdr->ents[i].address)) >=
+ be32_to_cpu(ltp->bestcount))
+ return __this_address;
}
if (hdr->stale != stale)
return __this_address;
diff --git a/fs/xfs/libxfs/xfs_errortag.h b/fs/xfs/libxfs/xfs_errortag.h
index 5362908164b0..01a9e86b3037 100644
--- a/fs/xfs/libxfs/xfs_errortag.h
+++ b/fs/xfs/libxfs/xfs_errortag.h
@@ -40,13 +40,12 @@
#define XFS_ERRTAG_REFCOUNT_FINISH_ONE 25
#define XFS_ERRTAG_BMAP_FINISH_ONE 26
#define XFS_ERRTAG_AG_RESV_CRITICAL 27
+
/*
- * DEBUG mode instrumentation to test and/or trigger delayed allocation
- * block killing in the event of failed writes. When enabled, all
- * buffered writes are silenty dropped and handled as if they failed.
- * All delalloc blocks in the range of the write (including pre-existing
- * delalloc blocks!) are tossed as part of the write failure error
- * handling sequence.
+ * Drop-writes support removed because write error handling cannot trash
+ * pre-existing delalloc extents in any useful way anymore. We retain the
+ * definition so that we can reject it as an invalid value in
+ * xfs_errortag_valid().
*/
#define XFS_ERRTAG_DROP_WRITES 28
#define XFS_ERRTAG_LOG_BAD_CRC 29
@@ -62,7 +61,9 @@
#define XFS_ERRTAG_LARP 39
#define XFS_ERRTAG_DA_LEAF_SPLIT 40
#define XFS_ERRTAG_ATTR_LEAF_TO_NODE 41
-#define XFS_ERRTAG_MAX 42
+#define XFS_ERRTAG_WB_DELAY_MS 42
+#define XFS_ERRTAG_WRITE_DELAY_MS 43
+#define XFS_ERRTAG_MAX 44
/*
* Random factors for above tags, 1 means always, 2 means 1/2 time, etc.
@@ -95,7 +96,6 @@
#define XFS_RANDOM_REFCOUNT_FINISH_ONE 1
#define XFS_RANDOM_BMAP_FINISH_ONE 1
#define XFS_RANDOM_AG_RESV_CRITICAL 4
-#define XFS_RANDOM_DROP_WRITES 1
#define XFS_RANDOM_LOG_BAD_CRC 1
#define XFS_RANDOM_LOG_ITEM_PIN 1
#define XFS_RANDOM_BUF_LRU_REF 2
@@ -109,5 +109,7 @@
#define XFS_RANDOM_LARP 1
#define XFS_RANDOM_DA_LEAF_SPLIT 1
#define XFS_RANDOM_ATTR_LEAF_TO_NODE 1
+#define XFS_RANDOM_WB_DELAY_MS 3000
+#define XFS_RANDOM_WRITE_DELAY_MS 3000
#endif /* __XFS_ERRORTAG_H_ */
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index b55bdfa9c8a8..371dc07233e0 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -1564,20 +1564,6 @@ struct xfs_rmap_rec {
#define RMAPBT_UNUSED_OFFSET_BITLEN 7
#define RMAPBT_OFFSET_BITLEN 54
-#define XFS_RMAP_ATTR_FORK (1 << 0)
-#define XFS_RMAP_BMBT_BLOCK (1 << 1)
-#define XFS_RMAP_UNWRITTEN (1 << 2)
-#define XFS_RMAP_KEY_FLAGS (XFS_RMAP_ATTR_FORK | \
- XFS_RMAP_BMBT_BLOCK)
-#define XFS_RMAP_REC_FLAGS (XFS_RMAP_UNWRITTEN)
-struct xfs_rmap_irec {
- xfs_agblock_t rm_startblock; /* extent start block */
- xfs_extlen_t rm_blockcount; /* extent length */
- uint64_t rm_owner; /* extent owner */
- uint64_t rm_offset; /* offset within the owner */
- unsigned int rm_flags; /* state flags */
-};
-
/*
* Key structure
*
@@ -1626,7 +1612,7 @@ unsigned int xfs_refc_block(struct xfs_mount *mp);
* on the startblock. This speeds up mount time deletion of stale
* staging extents because they're all at the right side of the tree.
*/
-#define XFS_REFC_COW_START ((xfs_agblock_t)(1U << 31))
+#define XFS_REFC_COWFLAG (1U << 31)
#define REFCNTBT_COWFLAG_BITLEN 1
#define REFCNTBT_AGBLOCK_BITLEN 31
@@ -1640,12 +1626,6 @@ struct xfs_refcount_key {
__be32 rc_startblock; /* starting block number */
};
-struct xfs_refcount_irec {
- xfs_agblock_t rc_startblock; /* starting block number */
- xfs_extlen_t rc_blockcount; /* count of free blocks */
- xfs_nlink_t rc_refcount; /* number of inodes linked here */
-};
-
#define MAXREFCOUNT ((xfs_nlink_t)~0U)
#define MAXREFCEXTLEN ((xfs_extlen_t)~0U)
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 94db50eb706a..5118dedf9267 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -636,7 +636,7 @@ xfs_ialloc_ag_alloc(
/* randomly do sparse inode allocations */
if (xfs_has_sparseinodes(tp->t_mountp) &&
igeo->ialloc_min_blks < igeo->ialloc_blks)
- do_sparse = prandom_u32_max(2);
+ do_sparse = get_random_u32_below(2);
#endif
/*
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index b351b9dc6561..f13e0809dc63 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -613,25 +613,49 @@ typedef struct xfs_efi_log_format {
uint16_t efi_size; /* size of this item */
uint32_t efi_nextents; /* # extents to free */
uint64_t efi_id; /* efi identifier */
- xfs_extent_t efi_extents[1]; /* array of extents to free */
+ xfs_extent_t efi_extents[]; /* array of extents to free */
} xfs_efi_log_format_t;
+static inline size_t
+xfs_efi_log_format_sizeof(
+ unsigned int nr)
+{
+ return sizeof(struct xfs_efi_log_format) +
+ nr * sizeof(struct xfs_extent);
+}
+
typedef struct xfs_efi_log_format_32 {
uint16_t efi_type; /* efi log item type */
uint16_t efi_size; /* size of this item */
uint32_t efi_nextents; /* # extents to free */
uint64_t efi_id; /* efi identifier */
- xfs_extent_32_t efi_extents[1]; /* array of extents to free */
+ xfs_extent_32_t efi_extents[]; /* array of extents to free */
} __attribute__((packed)) xfs_efi_log_format_32_t;
+static inline size_t
+xfs_efi_log_format32_sizeof(
+ unsigned int nr)
+{
+ return sizeof(struct xfs_efi_log_format_32) +
+ nr * sizeof(struct xfs_extent_32);
+}
+
typedef struct xfs_efi_log_format_64 {
uint16_t efi_type; /* efi log item type */
uint16_t efi_size; /* size of this item */
uint32_t efi_nextents; /* # extents to free */
uint64_t efi_id; /* efi identifier */
- xfs_extent_64_t efi_extents[1]; /* array of extents to free */
+ xfs_extent_64_t efi_extents[]; /* array of extents to free */
} xfs_efi_log_format_64_t;
+static inline size_t
+xfs_efi_log_format64_sizeof(
+ unsigned int nr)
+{
+ return sizeof(struct xfs_efi_log_format_64) +
+ nr * sizeof(struct xfs_extent_64);
+}
+
/*
* This is the structure used to lay out an efd log item in the
* log. The efd_extents array is a variable size array whose
@@ -642,25 +666,49 @@ typedef struct xfs_efd_log_format {
uint16_t efd_size; /* size of this item */
uint32_t efd_nextents; /* # of extents freed */
uint64_t efd_efi_id; /* id of corresponding efi */
- xfs_extent_t efd_extents[1]; /* array of extents freed */
+ xfs_extent_t efd_extents[]; /* array of extents freed */
} xfs_efd_log_format_t;
+static inline size_t
+xfs_efd_log_format_sizeof(
+ unsigned int nr)
+{
+ return sizeof(struct xfs_efd_log_format) +
+ nr * sizeof(struct xfs_extent);
+}
+
typedef struct xfs_efd_log_format_32 {
uint16_t efd_type; /* efd log item type */
uint16_t efd_size; /* size of this item */
uint32_t efd_nextents; /* # of extents freed */
uint64_t efd_efi_id; /* id of corresponding efi */
- xfs_extent_32_t efd_extents[1]; /* array of extents freed */
+ xfs_extent_32_t efd_extents[]; /* array of extents freed */
} __attribute__((packed)) xfs_efd_log_format_32_t;
+static inline size_t
+xfs_efd_log_format32_sizeof(
+ unsigned int nr)
+{
+ return sizeof(struct xfs_efd_log_format_32) +
+ nr * sizeof(struct xfs_extent_32);
+}
+
typedef struct xfs_efd_log_format_64 {
uint16_t efd_type; /* efd log item type */
uint16_t efd_size; /* size of this item */
uint32_t efd_nextents; /* # of extents freed */
uint64_t efd_efi_id; /* id of corresponding efi */
- xfs_extent_64_t efd_extents[1]; /* array of extents freed */
+ xfs_extent_64_t efd_extents[]; /* array of extents freed */
} xfs_efd_log_format_64_t;
+static inline size_t
+xfs_efd_log_format64_sizeof(
+ unsigned int nr)
+{
+ return sizeof(struct xfs_efd_log_format_64) +
+ nr * sizeof(struct xfs_extent_64);
+}
+
/*
* RUI/RUD (reverse mapping) log format definitions
*/
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 64b910caafaa..6f7ed9288fe4 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -46,13 +46,16 @@ STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur,
int
xfs_refcount_lookup_le(
struct xfs_btree_cur *cur,
+ enum xfs_refc_domain domain,
xfs_agblock_t bno,
int *stat)
{
- trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, bno,
+ trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno,
+ xfs_refcount_encode_startblock(bno, domain),
XFS_LOOKUP_LE);
cur->bc_rec.rc.rc_startblock = bno;
cur->bc_rec.rc.rc_blockcount = 0;
+ cur->bc_rec.rc.rc_domain = domain;
return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
}
@@ -63,13 +66,16 @@ xfs_refcount_lookup_le(
int
xfs_refcount_lookup_ge(
struct xfs_btree_cur *cur,
+ enum xfs_refc_domain domain,
xfs_agblock_t bno,
int *stat)
{
- trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, bno,
+ trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno,
+ xfs_refcount_encode_startblock(bno, domain),
XFS_LOOKUP_GE);
cur->bc_rec.rc.rc_startblock = bno;
cur->bc_rec.rc.rc_blockcount = 0;
+ cur->bc_rec.rc.rc_domain = domain;
return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
}
@@ -80,13 +86,16 @@ xfs_refcount_lookup_ge(
int
xfs_refcount_lookup_eq(
struct xfs_btree_cur *cur,
+ enum xfs_refc_domain domain,
xfs_agblock_t bno,
int *stat)
{
- trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, bno,
+ trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno,
+ xfs_refcount_encode_startblock(bno, domain),
XFS_LOOKUP_LE);
cur->bc_rec.rc.rc_startblock = bno;
cur->bc_rec.rc.rc_blockcount = 0;
+ cur->bc_rec.rc.rc_domain = domain;
return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
}
@@ -96,7 +105,17 @@ xfs_refcount_btrec_to_irec(
const union xfs_btree_rec *rec,
struct xfs_refcount_irec *irec)
{
- irec->rc_startblock = be32_to_cpu(rec->refc.rc_startblock);
+ uint32_t start;
+
+ start = be32_to_cpu(rec->refc.rc_startblock);
+ if (start & XFS_REFC_COWFLAG) {
+ start &= ~XFS_REFC_COWFLAG;
+ irec->rc_domain = XFS_REFC_DOMAIN_COW;
+ } else {
+ irec->rc_domain = XFS_REFC_DOMAIN_SHARED;
+ }
+
+ irec->rc_startblock = start;
irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount);
irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount);
}
@@ -114,7 +133,6 @@ xfs_refcount_get_rec(
struct xfs_perag *pag = cur->bc_ag.pag;
union xfs_btree_rec *rec;
int error;
- xfs_agblock_t realstart;
error = xfs_btree_get_rec(cur, &rec, stat);
if (error || !*stat)
@@ -124,22 +142,11 @@ xfs_refcount_get_rec(
if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN)
goto out_bad_rec;
- /* handle special COW-staging state */
- realstart = irec->rc_startblock;
- if (realstart & XFS_REFC_COW_START) {
- if (irec->rc_refcount != 1)
- goto out_bad_rec;
- realstart &= ~XFS_REFC_COW_START;
- } else if (irec->rc_refcount < 2) {
+ if (!xfs_refcount_check_domain(irec))
goto out_bad_rec;
- }
/* check for valid extent range, including overflow */
- if (!xfs_verify_agbno(pag, realstart))
- goto out_bad_rec;
- if (realstart > realstart + irec->rc_blockcount)
- goto out_bad_rec;
- if (!xfs_verify_agbno(pag, realstart + irec->rc_blockcount - 1))
+ if (!xfs_verify_agbext(pag, irec->rc_startblock, irec->rc_blockcount))
goto out_bad_rec;
if (irec->rc_refcount == 0 || irec->rc_refcount > MAXREFCOUNT)
@@ -169,12 +176,17 @@ xfs_refcount_update(
struct xfs_refcount_irec *irec)
{
union xfs_btree_rec rec;
+ uint32_t start;
int error;
trace_xfs_refcount_update(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec);
- rec.refc.rc_startblock = cpu_to_be32(irec->rc_startblock);
+
+ start = xfs_refcount_encode_startblock(irec->rc_startblock,
+ irec->rc_domain);
+ rec.refc.rc_startblock = cpu_to_be32(start);
rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount);
rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount);
+
error = xfs_btree_update(cur, &rec);
if (error)
trace_xfs_refcount_update_error(cur->bc_mp,
@@ -196,9 +208,12 @@ xfs_refcount_insert(
int error;
trace_xfs_refcount_insert(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec);
+
cur->bc_rec.rc.rc_startblock = irec->rc_startblock;
cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount;
cur->bc_rec.rc.rc_refcount = irec->rc_refcount;
+ cur->bc_rec.rc.rc_domain = irec->rc_domain;
+
error = xfs_btree_insert(cur, i);
if (error)
goto out_error;
@@ -244,7 +259,8 @@ xfs_refcount_delete(
}
if (error)
goto out_error;
- error = xfs_refcount_lookup_ge(cur, irec.rc_startblock, &found_rec);
+ error = xfs_refcount_lookup_ge(cur, irec.rc_domain, irec.rc_startblock,
+ &found_rec);
out_error:
if (error)
trace_xfs_refcount_delete_error(cur->bc_mp,
@@ -343,6 +359,7 @@ xfs_refc_next(
STATIC int
xfs_refcount_split_extent(
struct xfs_btree_cur *cur,
+ enum xfs_refc_domain domain,
xfs_agblock_t agbno,
bool *shape_changed)
{
@@ -351,7 +368,7 @@ xfs_refcount_split_extent(
int error;
*shape_changed = false;
- error = xfs_refcount_lookup_le(cur, agbno, &found_rec);
+ error = xfs_refcount_lookup_le(cur, domain, agbno, &found_rec);
if (error)
goto out_error;
if (!found_rec)
@@ -364,6 +381,8 @@ xfs_refcount_split_extent(
error = -EFSCORRUPTED;
goto out_error;
}
+ if (rcext.rc_domain != domain)
+ return 0;
if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno)
return 0;
@@ -415,6 +434,9 @@ xfs_refcount_merge_center_extents(
trace_xfs_refcount_merge_center_extents(cur->bc_mp,
cur->bc_ag.pag->pag_agno, left, center, right);
+ ASSERT(left->rc_domain == center->rc_domain);
+ ASSERT(right->rc_domain == center->rc_domain);
+
/*
* Make sure the center and right extents are not in the btree.
* If the center extent was synthesized, the first delete call
@@ -423,8 +445,8 @@ xfs_refcount_merge_center_extents(
* call removes the center and the second one removes the right
* extent.
*/
- error = xfs_refcount_lookup_ge(cur, center->rc_startblock,
- &found_rec);
+ error = xfs_refcount_lookup_ge(cur, center->rc_domain,
+ center->rc_startblock, &found_rec);
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
@@ -451,8 +473,8 @@ xfs_refcount_merge_center_extents(
}
/* Enlarge the left extent. */
- error = xfs_refcount_lookup_le(cur, left->rc_startblock,
- &found_rec);
+ error = xfs_refcount_lookup_le(cur, left->rc_domain,
+ left->rc_startblock, &found_rec);
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
@@ -491,10 +513,12 @@ xfs_refcount_merge_left_extent(
trace_xfs_refcount_merge_left_extent(cur->bc_mp,
cur->bc_ag.pag->pag_agno, left, cleft);
+ ASSERT(left->rc_domain == cleft->rc_domain);
+
/* If the extent at agbno (cleft) wasn't synthesized, remove it. */
if (cleft->rc_refcount > 1) {
- error = xfs_refcount_lookup_le(cur, cleft->rc_startblock,
- &found_rec);
+ error = xfs_refcount_lookup_le(cur, cleft->rc_domain,
+ cleft->rc_startblock, &found_rec);
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
@@ -512,8 +536,8 @@ xfs_refcount_merge_left_extent(
}
/* Enlarge the left extent. */
- error = xfs_refcount_lookup_le(cur, left->rc_startblock,
- &found_rec);
+ error = xfs_refcount_lookup_le(cur, left->rc_domain,
+ left->rc_startblock, &found_rec);
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
@@ -552,13 +576,15 @@ xfs_refcount_merge_right_extent(
trace_xfs_refcount_merge_right_extent(cur->bc_mp,
cur->bc_ag.pag->pag_agno, cright, right);
+ ASSERT(right->rc_domain == cright->rc_domain);
+
/*
* If the extent ending at agbno+aglen (cright) wasn't synthesized,
* remove it.
*/
if (cright->rc_refcount > 1) {
- error = xfs_refcount_lookup_le(cur, cright->rc_startblock,
- &found_rec);
+ error = xfs_refcount_lookup_le(cur, cright->rc_domain,
+ cright->rc_startblock, &found_rec);
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
@@ -576,8 +602,8 @@ xfs_refcount_merge_right_extent(
}
/* Enlarge the right extent. */
- error = xfs_refcount_lookup_le(cur, right->rc_startblock,
- &found_rec);
+ error = xfs_refcount_lookup_le(cur, right->rc_domain,
+ right->rc_startblock, &found_rec);
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
@@ -600,8 +626,6 @@ out_error:
return error;
}
-#define XFS_FIND_RCEXT_SHARED 1
-#define XFS_FIND_RCEXT_COW 2
/*
* Find the left extent and the one after it (cleft). This function assumes
* that we've already split any extent crossing agbno.
@@ -611,16 +635,16 @@ xfs_refcount_find_left_extents(
struct xfs_btree_cur *cur,
struct xfs_refcount_irec *left,
struct xfs_refcount_irec *cleft,
+ enum xfs_refc_domain domain,
xfs_agblock_t agbno,
- xfs_extlen_t aglen,
- int flags)
+ xfs_extlen_t aglen)
{
struct xfs_refcount_irec tmp;
int error;
int found_rec;
left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK;
- error = xfs_refcount_lookup_le(cur, agbno - 1, &found_rec);
+ error = xfs_refcount_lookup_le(cur, domain, agbno - 1, &found_rec);
if (error)
goto out_error;
if (!found_rec)
@@ -634,11 +658,9 @@ xfs_refcount_find_left_extents(
goto out_error;
}
- if (xfs_refc_next(&tmp) != agbno)
- return 0;
- if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2)
+ if (tmp.rc_domain != domain)
return 0;
- if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1)
+ if (xfs_refc_next(&tmp) != agbno)
return 0;
/* We have a left extent; retrieve (or invent) the next right one */
*left = tmp;
@@ -655,6 +677,9 @@ xfs_refcount_find_left_extents(
goto out_error;
}
+ if (tmp.rc_domain != domain)
+ goto not_found;
+
/* if tmp starts at the end of our range, just use that */
if (tmp.rc_startblock == agbno)
*cleft = tmp;
@@ -671,8 +696,10 @@ xfs_refcount_find_left_extents(
cleft->rc_blockcount = min(aglen,
tmp.rc_startblock - agbno);
cleft->rc_refcount = 1;
+ cleft->rc_domain = domain;
}
} else {
+not_found:
/*
* No extents, so pretend that there's one covering the whole
* range.
@@ -680,6 +707,7 @@ xfs_refcount_find_left_extents(
cleft->rc_startblock = agbno;
cleft->rc_blockcount = aglen;
cleft->rc_refcount = 1;
+ cleft->rc_domain = domain;
}
trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno,
left, cleft, agbno);
@@ -700,16 +728,16 @@ xfs_refcount_find_right_extents(
struct xfs_btree_cur *cur,
struct xfs_refcount_irec *right,
struct xfs_refcount_irec *cright,
+ enum xfs_refc_domain domain,
xfs_agblock_t agbno,
- xfs_extlen_t aglen,
- int flags)
+ xfs_extlen_t aglen)
{
struct xfs_refcount_irec tmp;
int error;
int found_rec;
right->rc_startblock = cright->rc_startblock = NULLAGBLOCK;
- error = xfs_refcount_lookup_ge(cur, agbno + aglen, &found_rec);
+ error = xfs_refcount_lookup_ge(cur, domain, agbno + aglen, &found_rec);
if (error)
goto out_error;
if (!found_rec)
@@ -723,11 +751,9 @@ xfs_refcount_find_right_extents(
goto out_error;
}
- if (tmp.rc_startblock != agbno + aglen)
+ if (tmp.rc_domain != domain)
return 0;
- if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2)
- return 0;
- if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1)
+ if (tmp.rc_startblock != agbno + aglen)
return 0;
/* We have a right extent; retrieve (or invent) the next left one */
*right = tmp;
@@ -744,6 +770,9 @@ xfs_refcount_find_right_extents(
goto out_error;
}
+ if (tmp.rc_domain != domain)
+ goto not_found;
+
/* if tmp ends at the end of our range, just use that */
if (xfs_refc_next(&tmp) == agbno + aglen)
*cright = tmp;
@@ -760,8 +789,10 @@ xfs_refcount_find_right_extents(
cright->rc_blockcount = right->rc_startblock -
cright->rc_startblock;
cright->rc_refcount = 1;
+ cright->rc_domain = domain;
}
} else {
+not_found:
/*
* No extents, so pretend that there's one covering the whole
* range.
@@ -769,6 +800,7 @@ xfs_refcount_find_right_extents(
cright->rc_startblock = agbno;
cright->rc_blockcount = aglen;
cright->rc_refcount = 1;
+ cright->rc_domain = domain;
}
trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno,
cright, right, agbno + aglen);
@@ -783,21 +815,146 @@ out_error:
/* Is this extent valid? */
static inline bool
xfs_refc_valid(
- struct xfs_refcount_irec *rc)
+ const struct xfs_refcount_irec *rc)
{
return rc->rc_startblock != NULLAGBLOCK;
}
+static inline xfs_nlink_t
+xfs_refc_merge_refcount(
+ const struct xfs_refcount_irec *irec,
+ enum xfs_refc_adjust_op adjust)
+{
+ /* Once a record hits MAXREFCOUNT, it is pinned there forever */
+ if (irec->rc_refcount == MAXREFCOUNT)
+ return MAXREFCOUNT;
+ return irec->rc_refcount + adjust;
+}
+
+static inline bool
+xfs_refc_want_merge_center(
+ const struct xfs_refcount_irec *left,
+ const struct xfs_refcount_irec *cleft,
+ const struct xfs_refcount_irec *cright,
+ const struct xfs_refcount_irec *right,
+ bool cleft_is_cright,
+ enum xfs_refc_adjust_op adjust,
+ unsigned long long *ulenp)
+{
+ unsigned long long ulen = left->rc_blockcount;
+ xfs_nlink_t new_refcount;
+
+ /*
+ * To merge with a center record, both shoulder records must be
+ * adjacent to the record we want to adjust. This is only true if
+ * find_left and find_right made all four records valid.
+ */
+ if (!xfs_refc_valid(left) || !xfs_refc_valid(right) ||
+ !xfs_refc_valid(cleft) || !xfs_refc_valid(cright))
+ return false;
+
+ /* There must only be one record for the entire range. */
+ if (!cleft_is_cright)
+ return false;
+
+ /* The shoulder record refcounts must match the new refcount. */
+ new_refcount = xfs_refc_merge_refcount(cleft, adjust);
+ if (left->rc_refcount != new_refcount)
+ return false;
+ if (right->rc_refcount != new_refcount)
+ return false;
+
+ /*
+ * The new record cannot exceed the max length. ulen is a ULL as the
+ * individual record block counts can be up to (u32 - 1) in length
+ * hence we need to catch u32 addition overflows here.
+ */
+ ulen += cleft->rc_blockcount + right->rc_blockcount;
+ if (ulen >= MAXREFCEXTLEN)
+ return false;
+
+ *ulenp = ulen;
+ return true;
+}
+
+static inline bool
+xfs_refc_want_merge_left(
+ const struct xfs_refcount_irec *left,
+ const struct xfs_refcount_irec *cleft,
+ enum xfs_refc_adjust_op adjust)
+{
+ unsigned long long ulen = left->rc_blockcount;
+ xfs_nlink_t new_refcount;
+
+ /*
+ * For a left merge, the left shoulder record must be adjacent to the
+ * start of the range. If this is true, find_left made left and cleft
+ * contain valid contents.
+ */
+ if (!xfs_refc_valid(left) || !xfs_refc_valid(cleft))
+ return false;
+
+ /* Left shoulder record refcount must match the new refcount. */
+ new_refcount = xfs_refc_merge_refcount(cleft, adjust);
+ if (left->rc_refcount != new_refcount)
+ return false;
+
+ /*
+ * The new record cannot exceed the max length. ulen is a ULL as the
+ * individual record block counts can be up to (u32 - 1) in length
+ * hence we need to catch u32 addition overflows here.
+ */
+ ulen += cleft->rc_blockcount;
+ if (ulen >= MAXREFCEXTLEN)
+ return false;
+
+ return true;
+}
+
+static inline bool
+xfs_refc_want_merge_right(
+ const struct xfs_refcount_irec *cright,
+ const struct xfs_refcount_irec *right,
+ enum xfs_refc_adjust_op adjust)
+{
+ unsigned long long ulen = right->rc_blockcount;
+ xfs_nlink_t new_refcount;
+
+ /*
+ * For a right merge, the right shoulder record must be adjacent to the
+ * end of the range. If this is true, find_right made cright and right
+ * contain valid contents.
+ */
+ if (!xfs_refc_valid(right) || !xfs_refc_valid(cright))
+ return false;
+
+ /* Right shoulder record refcount must match the new refcount. */
+ new_refcount = xfs_refc_merge_refcount(cright, adjust);
+ if (right->rc_refcount != new_refcount)
+ return false;
+
+ /*
+ * The new record cannot exceed the max length. ulen is a ULL as the
+ * individual record block counts can be up to (u32 - 1) in length
+ * hence we need to catch u32 addition overflows here.
+ */
+ ulen += cright->rc_blockcount;
+ if (ulen >= MAXREFCEXTLEN)
+ return false;
+
+ return true;
+}
+
/*
* Try to merge with any extents on the boundaries of the adjustment range.
*/
STATIC int
xfs_refcount_merge_extents(
struct xfs_btree_cur *cur,
+ enum xfs_refc_domain domain,
xfs_agblock_t *agbno,
xfs_extlen_t *aglen,
enum xfs_refc_adjust_op adjust,
- int flags,
bool *shape_changed)
{
struct xfs_refcount_irec left = {0}, cleft = {0};
@@ -812,12 +969,12 @@ xfs_refcount_merge_extents(
* just below (agbno + aglen) [cright], and just above (agbno + aglen)
* [right].
*/
- error = xfs_refcount_find_left_extents(cur, &left, &cleft, *agbno,
- *aglen, flags);
+ error = xfs_refcount_find_left_extents(cur, &left, &cleft, domain,
+ *agbno, *aglen);
if (error)
return error;
- error = xfs_refcount_find_right_extents(cur, &right, &cright, *agbno,
- *aglen, flags);
+ error = xfs_refcount_find_right_extents(cur, &right, &cright, domain,
+ *agbno, *aglen);
if (error)
return error;
@@ -829,23 +986,15 @@ xfs_refcount_merge_extents(
(cleft.rc_blockcount == cright.rc_blockcount);
/* Try to merge left, cleft, and right. cleft must == cright. */
- ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount +
- right.rc_blockcount;
- if (xfs_refc_valid(&left) && xfs_refc_valid(&right) &&
- xfs_refc_valid(&cleft) && xfs_refc_valid(&cright) && cequal &&
- left.rc_refcount == cleft.rc_refcount + adjust &&
- right.rc_refcount == cleft.rc_refcount + adjust &&
- ulen < MAXREFCEXTLEN) {
+ if (xfs_refc_want_merge_center(&left, &cleft, &cright, &right, cequal,
+ adjust, &ulen)) {
*shape_changed = true;
return xfs_refcount_merge_center_extents(cur, &left, &cleft,
&right, ulen, aglen);
}
/* Try to merge left and cleft. */
- ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount;
- if (xfs_refc_valid(&left) && xfs_refc_valid(&cleft) &&
- left.rc_refcount == cleft.rc_refcount + adjust &&
- ulen < MAXREFCEXTLEN) {
+ if (xfs_refc_want_merge_left(&left, &cleft, adjust)) {
*shape_changed = true;
error = xfs_refcount_merge_left_extent(cur, &left, &cleft,
agbno, aglen);
@@ -861,16 +1010,13 @@ xfs_refcount_merge_extents(
}
/* Try to merge cright and right. */
- ulen = (unsigned long long)right.rc_blockcount + cright.rc_blockcount;
- if (xfs_refc_valid(&right) && xfs_refc_valid(&cright) &&
- right.rc_refcount == cright.rc_refcount + adjust &&
- ulen < MAXREFCEXTLEN) {
+ if (xfs_refc_want_merge_right(&cright, &right, adjust)) {
*shape_changed = true;
return xfs_refcount_merge_right_extent(cur, &right, &cright,
aglen);
}
- return error;
+ return 0;
}
/*
@@ -933,7 +1079,8 @@ xfs_refcount_adjust_extents(
if (*aglen == 0)
return 0;
- error = xfs_refcount_lookup_ge(cur, *agbno, &found_rec);
+ error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_SHARED, *agbno,
+ &found_rec);
if (error)
goto out_error;
@@ -941,10 +1088,11 @@ xfs_refcount_adjust_extents(
error = xfs_refcount_get_rec(cur, &ext, &found_rec);
if (error)
goto out_error;
- if (!found_rec) {
+ if (!found_rec || ext.rc_domain != XFS_REFC_DOMAIN_SHARED) {
ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks;
ext.rc_blockcount = 0;
ext.rc_refcount = 0;
+ ext.rc_domain = XFS_REFC_DOMAIN_SHARED;
}
/*
@@ -957,6 +1105,8 @@ xfs_refcount_adjust_extents(
tmp.rc_blockcount = min(*aglen,
ext.rc_startblock - *agbno);
tmp.rc_refcount = 1 + adj;
+ tmp.rc_domain = XFS_REFC_DOMAIN_SHARED;
+
trace_xfs_refcount_modify_extent(cur->bc_mp,
cur->bc_ag.pag->pag_agno, &tmp);
@@ -986,15 +1136,30 @@ xfs_refcount_adjust_extents(
(*agbno) += tmp.rc_blockcount;
(*aglen) -= tmp.rc_blockcount;
- error = xfs_refcount_lookup_ge(cur, *agbno,
+ /* Stop if there's nothing left to modify */
+ if (*aglen == 0 || !xfs_refcount_still_have_space(cur))
+ break;
+
+ /* Move the cursor to the start of ext. */
+ error = xfs_refcount_lookup_ge(cur,
+ XFS_REFC_DOMAIN_SHARED, *agbno,
&found_rec);
if (error)
goto out_error;
}
- /* Stop if there's nothing left to modify */
- if (*aglen == 0 || !xfs_refcount_still_have_space(cur))
- break;
+ /*
+ * A previous step trimmed agbno/aglen such that the end of the
+ * range would not be in the middle of the record. If this is
+ * no longer the case, something is seriously wrong with the
+ * btree. Make sure we never feed the synthesized record into
+ * the processing loop below.
+ */
+ if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount == 0) ||
+ XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount > *aglen)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
/*
* Adjust the reference count and either update the tree
@@ -1070,13 +1235,15 @@ xfs_refcount_adjust(
/*
* Ensure that no rcextents cross the boundary of the adjustment range.
*/
- error = xfs_refcount_split_extent(cur, agbno, &shape_changed);
+ error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED,
+ agbno, &shape_changed);
if (error)
goto out_error;
if (shape_changed)
shape_changes++;
- error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed);
+ error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED,
+ agbno + aglen, &shape_changed);
if (error)
goto out_error;
if (shape_changed)
@@ -1085,8 +1252,8 @@ xfs_refcount_adjust(
/*
* Try to merge with the left or right extents of the range.
*/
- error = xfs_refcount_merge_extents(cur, new_agbno, new_aglen, adj,
- XFS_FIND_RCEXT_SHARED, &shape_changed);
+ error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_SHARED,
+ new_agbno, new_aglen, adj, &shape_changed);
if (error)
goto out_error;
if (shape_changed)
@@ -1125,6 +1292,32 @@ xfs_refcount_finish_one_cleanup(
}
/*
+ * Set up a continuation a deferred refcount operation by updating the intent.
+ * Checks to make sure we're not going to run off the end of the AG.
+ */
+static inline int
+xfs_refcount_continue_op(
+ struct xfs_btree_cur *cur,
+ xfs_fsblock_t startblock,
+ xfs_agblock_t new_agbno,
+ xfs_extlen_t new_len,
+ xfs_fsblock_t *new_fsbno)
+{
+ struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_perag *pag = cur->bc_ag.pag;
+
+ if (XFS_IS_CORRUPT(mp, !xfs_verify_agbext(pag, new_agbno, new_len)))
+ return -EFSCORRUPTED;
+
+ *new_fsbno = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno);
+
+ ASSERT(xfs_verify_fsbext(mp, *new_fsbno, new_len));
+ ASSERT(pag->pag_agno == XFS_FSB_TO_AGNO(mp, *new_fsbno));
+
+ return 0;
+}
+
+/*
* Process one of the deferred refcount operations. We pass back the
* btree cursor to maintain our lock on the btree between calls.
* This saves time and eliminates a buffer deadlock between the
@@ -1191,12 +1384,20 @@ xfs_refcount_finish_one(
case XFS_REFCOUNT_INCREASE:
error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno,
new_len, XFS_REFCOUNT_ADJUST_INCREASE);
- *new_fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno);
+ if (error)
+ goto out_drop;
+ if (*new_len > 0)
+ error = xfs_refcount_continue_op(rcur, startblock,
+ new_agbno, *new_len, new_fsb);
break;
case XFS_REFCOUNT_DECREASE:
error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno,
new_len, XFS_REFCOUNT_ADJUST_DECREASE);
- *new_fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno);
+ if (error)
+ goto out_drop;
+ if (*new_len > 0)
+ error = xfs_refcount_continue_op(rcur, startblock,
+ new_agbno, *new_len, new_fsb);
break;
case XFS_REFCOUNT_ALLOC_COW:
*new_fsb = startblock + blockcount;
@@ -1307,7 +1508,8 @@ xfs_refcount_find_shared(
*flen = 0;
/* Try to find a refcount extent that crosses the start */
- error = xfs_refcount_lookup_le(cur, agbno, &have);
+ error = xfs_refcount_lookup_le(cur, XFS_REFC_DOMAIN_SHARED, agbno,
+ &have);
if (error)
goto out_error;
if (!have) {
@@ -1325,6 +1527,8 @@ xfs_refcount_find_shared(
error = -EFSCORRUPTED;
goto out_error;
}
+ if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED)
+ goto done;
/* If the extent ends before the start, look at the next one */
if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) {
@@ -1340,6 +1544,8 @@ xfs_refcount_find_shared(
error = -EFSCORRUPTED;
goto out_error;
}
+ if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED)
+ goto done;
}
/* If the extent starts after the range we want, bail out */
@@ -1371,7 +1577,8 @@ xfs_refcount_find_shared(
error = -EFSCORRUPTED;
goto out_error;
}
- if (tmp.rc_startblock >= agbno + aglen ||
+ if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED ||
+ tmp.rc_startblock >= agbno + aglen ||
tmp.rc_startblock != *fbno + *flen)
break;
*flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno);
@@ -1455,17 +1662,23 @@ xfs_refcount_adjust_cow_extents(
return 0;
/* Find any overlapping refcount records */
- error = xfs_refcount_lookup_ge(cur, agbno, &found_rec);
+ error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_COW, agbno,
+ &found_rec);
if (error)
goto out_error;
error = xfs_refcount_get_rec(cur, &ext, &found_rec);
if (error)
goto out_error;
+ if (XFS_IS_CORRUPT(cur->bc_mp, found_rec &&
+ ext.rc_domain != XFS_REFC_DOMAIN_COW)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
if (!found_rec) {
- ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks +
- XFS_REFC_COW_START;
+ ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks;
ext.rc_blockcount = 0;
ext.rc_refcount = 0;
+ ext.rc_domain = XFS_REFC_DOMAIN_COW;
}
switch (adj) {
@@ -1480,6 +1693,8 @@ xfs_refcount_adjust_cow_extents(
tmp.rc_startblock = agbno;
tmp.rc_blockcount = aglen;
tmp.rc_refcount = 1;
+ tmp.rc_domain = XFS_REFC_DOMAIN_COW;
+
trace_xfs_refcount_modify_extent(cur->bc_mp,
cur->bc_ag.pag->pag_agno, &tmp);
@@ -1542,24 +1757,24 @@ xfs_refcount_adjust_cow(
bool shape_changed;
int error;
- agbno += XFS_REFC_COW_START;
-
/*
* Ensure that no rcextents cross the boundary of the adjustment range.
*/
- error = xfs_refcount_split_extent(cur, agbno, &shape_changed);
+ error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_COW,
+ agbno, &shape_changed);
if (error)
goto out_error;
- error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed);
+ error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_COW,
+ agbno + aglen, &shape_changed);
if (error)
goto out_error;
/*
* Try to merge with the left or right extents of the range.
*/
- error = xfs_refcount_merge_extents(cur, &agbno, &aglen, adj,
- XFS_FIND_RCEXT_COW, &shape_changed);
+ error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_COW, &agbno,
+ &aglen, adj, &shape_changed);
if (error)
goto out_error;
@@ -1666,10 +1881,18 @@ xfs_refcount_recover_extent(
be32_to_cpu(rec->refc.rc_refcount) != 1))
return -EFSCORRUPTED;
- rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), 0);
+ rr = kmalloc(sizeof(struct xfs_refcount_recovery),
+ GFP_KERNEL | __GFP_NOFAIL);
+ INIT_LIST_HEAD(&rr->rr_list);
xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec);
- list_add_tail(&rr->rr_list, debris);
+ if (XFS_IS_CORRUPT(cur->bc_mp,
+ rr->rr_rrec.rc_domain != XFS_REFC_DOMAIN_COW)) {
+ kfree(rr);
+ return -EFSCORRUPTED;
+ }
+
+ list_add_tail(&rr->rr_list, debris);
return 0;
}
@@ -1687,10 +1910,11 @@ xfs_refcount_recover_cow_leftovers(
union xfs_btree_irec low;
union xfs_btree_irec high;
xfs_fsblock_t fsb;
- xfs_agblock_t agbno;
int error;
- if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START)
+ /* reflink filesystems mustn't have AGs larger than 2^31-1 blocks */
+ BUILD_BUG_ON(XFS_MAX_CRC_AG_BLOCKS >= XFS_REFC_COWFLAG);
+ if (mp->m_sb.sb_agblocks > XFS_MAX_CRC_AG_BLOCKS)
return -EOPNOTSUPP;
INIT_LIST_HEAD(&debris);
@@ -1717,7 +1941,7 @@ xfs_refcount_recover_cow_leftovers(
/* Find all the leftover CoW staging extents. */
memset(&low, 0, sizeof(low));
memset(&high, 0, sizeof(high));
- low.rc.rc_startblock = XFS_REFC_COW_START;
+ low.rc.rc_domain = high.rc.rc_domain = XFS_REFC_DOMAIN_COW;
high.rc.rc_startblock = -1U;
error = xfs_btree_query_range(cur, &low, &high,
xfs_refcount_recover_extent, &debris);
@@ -1738,8 +1962,8 @@ xfs_refcount_recover_cow_leftovers(
&rr->rr_rrec);
/* Free the orphan record */
- agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START;
- fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, agbno);
+ fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno,
+ rr->rr_rrec.rc_startblock);
xfs_refcount_free_cow_extent(tp, fsb,
rr->rr_rrec.rc_blockcount);
@@ -1751,7 +1975,7 @@ xfs_refcount_recover_cow_leftovers(
goto out_free;
list_del(&rr->rr_list);
- kmem_free(rr);
+ kfree(rr);
}
return error;
@@ -1761,7 +1985,7 @@ out_free:
/* Free the leftover list */
list_for_each_entry_safe(rr, n, &debris, rr_list) {
list_del(&rr->rr_list);
- kmem_free(rr);
+ kfree(rr);
}
return error;
}
@@ -1770,6 +1994,7 @@ out_free:
int
xfs_refcount_has_record(
struct xfs_btree_cur *cur,
+ enum xfs_refc_domain domain,
xfs_agblock_t bno,
xfs_extlen_t len,
bool *exists)
@@ -1781,6 +2006,7 @@ xfs_refcount_has_record(
low.rc.rc_startblock = bno;
memset(&high, 0xFF, sizeof(high));
high.rc.rc_startblock = bno + len - 1;
+ low.rc.rc_domain = high.rc.rc_domain = domain;
return xfs_btree_has_record(cur, &low, &high, exists);
}
diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h
index e8b322de7f3d..452f30556f5a 100644
--- a/fs/xfs/libxfs/xfs_refcount.h
+++ b/fs/xfs/libxfs/xfs_refcount.h
@@ -14,14 +14,33 @@ struct xfs_bmbt_irec;
struct xfs_refcount_irec;
extern int xfs_refcount_lookup_le(struct xfs_btree_cur *cur,
- xfs_agblock_t bno, int *stat);
+ enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat);
extern int xfs_refcount_lookup_ge(struct xfs_btree_cur *cur,
- xfs_agblock_t bno, int *stat);
+ enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat);
extern int xfs_refcount_lookup_eq(struct xfs_btree_cur *cur,
- xfs_agblock_t bno, int *stat);
+ enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat);
extern int xfs_refcount_get_rec(struct xfs_btree_cur *cur,
struct xfs_refcount_irec *irec, int *stat);
+static inline uint32_t
+xfs_refcount_encode_startblock(
+ xfs_agblock_t startblock,
+ enum xfs_refc_domain domain)
+{
+ uint32_t start;
+
+ /*
+ * low level btree operations need to handle the generic btree range
+ * query functions (which set rc_domain == -1U), so we check that the
+ * domain is /not/ shared.
+ */
+ start = startblock & ~XFS_REFC_COWFLAG;
+ if (domain != XFS_REFC_DOMAIN_SHARED)
+ start |= XFS_REFC_COWFLAG;
+
+ return start;
+}
+
enum xfs_refcount_intent_type {
XFS_REFCOUNT_INCREASE = 1,
XFS_REFCOUNT_DECREASE,
@@ -36,6 +55,18 @@ struct xfs_refcount_intent {
xfs_fsblock_t ri_startblock;
};
+/* Check that the refcount is appropriate for the record domain. */
+static inline bool
+xfs_refcount_check_domain(
+ const struct xfs_refcount_irec *irec)
+{
+ if (irec->rc_domain == XFS_REFC_DOMAIN_COW && irec->rc_refcount != 1)
+ return false;
+ if (irec->rc_domain == XFS_REFC_DOMAIN_SHARED && irec->rc_refcount < 2)
+ return false;
+ return true;
+}
+
void xfs_refcount_increase_extent(struct xfs_trans *tp,
struct xfs_bmbt_irec *irec);
void xfs_refcount_decrease_extent(struct xfs_trans *tp,
@@ -79,7 +110,8 @@ extern int xfs_refcount_recover_cow_leftovers(struct xfs_mount *mp,
#define XFS_REFCOUNT_ITEM_OVERHEAD 32
extern int xfs_refcount_has_record(struct xfs_btree_cur *cur,
- xfs_agblock_t bno, xfs_extlen_t len, bool *exists);
+ enum xfs_refc_domain domain, xfs_agblock_t bno,
+ xfs_extlen_t len, bool *exists);
union xfs_btree_rec;
extern void xfs_refcount_btrec_to_irec(const union xfs_btree_rec *rec,
struct xfs_refcount_irec *irec);
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index 316c1ec0c3c2..e1f789866683 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -13,6 +13,7 @@
#include "xfs_btree.h"
#include "xfs_btree_staging.h"
#include "xfs_refcount_btree.h"
+#include "xfs_refcount.h"
#include "xfs_alloc.h"
#include "xfs_error.h"
#include "xfs_trace.h"
@@ -160,7 +161,12 @@ xfs_refcountbt_init_rec_from_cur(
struct xfs_btree_cur *cur,
union xfs_btree_rec *rec)
{
- rec->refc.rc_startblock = cpu_to_be32(cur->bc_rec.rc.rc_startblock);
+ const struct xfs_refcount_irec *irec = &cur->bc_rec.rc;
+ uint32_t start;
+
+ start = xfs_refcount_encode_startblock(irec->rc_startblock,
+ irec->rc_domain);
+ rec->refc.rc_startblock = cpu_to_be32(start);
rec->refc.rc_blockcount = cpu_to_be32(cur->bc_rec.rc.rc_blockcount);
rec->refc.rc_refcount = cpu_to_be32(cur->bc_rec.rc.rc_refcount);
}
@@ -182,10 +188,13 @@ xfs_refcountbt_key_diff(
struct xfs_btree_cur *cur,
const union xfs_btree_key *key)
{
- struct xfs_refcount_irec *rec = &cur->bc_rec.rc;
const struct xfs_refcount_key *kp = &key->refc;
+ const struct xfs_refcount_irec *irec = &cur->bc_rec.rc;
+ uint32_t start;
- return (int64_t)be32_to_cpu(kp->rc_startblock) - rec->rc_startblock;
+ start = xfs_refcount_encode_startblock(irec->rc_startblock,
+ irec->rc_domain);
+ return (int64_t)be32_to_cpu(kp->rc_startblock) - start;
}
STATIC int64_t
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 094dfc897ebc..b56aca1e7c66 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -235,13 +235,8 @@ xfs_rmap_get_rec(
goto out_bad_rec;
} else {
/* check for valid extent range, including overflow */
- if (!xfs_verify_agbno(pag, irec->rm_startblock))
- goto out_bad_rec;
- if (irec->rm_startblock >
- irec->rm_startblock + irec->rm_blockcount)
- goto out_bad_rec;
- if (!xfs_verify_agbno(pag,
- irec->rm_startblock + irec->rm_blockcount - 1))
+ if (!xfs_verify_agbext(pag, irec->rm_startblock,
+ irec->rm_blockcount))
goto out_bad_rec;
}
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index a20cade590e9..1eeecf2eb2a7 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -972,7 +972,9 @@ xfs_log_sb(
*/
if (xfs_has_lazysbcount(mp)) {
mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount);
- mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree);
+ mp->m_sb.sb_ifree = min_t(uint64_t,
+ percpu_counter_sum(&mp->m_ifree),
+ mp->m_sb.sb_icount);
mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks);
}
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index 2c4ad6e4bb14..5b2f27cbdb80 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -422,7 +422,7 @@ xfs_calc_itruncate_reservation_minlogsize(
/*
* In renaming a files we can modify:
- * the four inodes involved: 4 * inode size
+ * the five inodes involved: 5 * inode size
* the two directory btrees: 2 * (max depth + v2) * dir block size
* the two directory bmap btrees: 2 * max depth * block size
* And the bmap_finish transaction can free dir and bmap blocks (two sets
@@ -437,7 +437,7 @@ xfs_calc_rename_reservation(
struct xfs_mount *mp)
{
return XFS_DQUOT_LOGRES(mp) +
- max((xfs_calc_inode_res(mp, 4) +
+ max((xfs_calc_inode_res(mp, 5) +
xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
XFS_FSB_TO_B(mp, 1))),
(xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) +
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index a6b7d98cf68f..5ebdda7e1078 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -166,6 +166,36 @@ typedef struct xfs_bmbt_irec
xfs_exntst_t br_state; /* extent state */
} xfs_bmbt_irec_t;
+enum xfs_refc_domain {
+ XFS_REFC_DOMAIN_SHARED = 0,
+ XFS_REFC_DOMAIN_COW,
+};
+
+#define XFS_REFC_DOMAIN_STRINGS \
+ { XFS_REFC_DOMAIN_SHARED, "shared" }, \
+ { XFS_REFC_DOMAIN_COW, "cow" }
+
+struct xfs_refcount_irec {
+ xfs_agblock_t rc_startblock; /* starting block number */
+ xfs_extlen_t rc_blockcount; /* count of free blocks */
+ xfs_nlink_t rc_refcount; /* number of inodes linked here */
+ enum xfs_refc_domain rc_domain; /* shared or cow staging extent? */
+};
+
+#define XFS_RMAP_ATTR_FORK (1 << 0)
+#define XFS_RMAP_BMBT_BLOCK (1 << 1)
+#define XFS_RMAP_UNWRITTEN (1 << 2)
+#define XFS_RMAP_KEY_FLAGS (XFS_RMAP_ATTR_FORK | \
+ XFS_RMAP_BMBT_BLOCK)
+#define XFS_RMAP_REC_FLAGS (XFS_RMAP_UNWRITTEN)
+struct xfs_rmap_irec {
+ xfs_agblock_t rm_startblock; /* extent start block */
+ xfs_extlen_t rm_blockcount; /* extent length */
+ uint64_t rm_owner; /* extent owner */
+ uint64_t rm_offset; /* offset within the owner */
+ unsigned int rm_flags; /* state flags */
+};
+
/* per-AG block reservation types */
enum xfs_ag_resv_type {
XFS_AG_RESV_NONE = 0,
diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
index b7b838bd4ba4..4dd52b15f09c 100644
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -609,9 +609,16 @@ out:
/* AGFL */
struct xchk_agfl_info {
- unsigned int sz_entries;
+ /* Number of AGFL entries that the AGF claims are in use. */
+ unsigned int agflcount;
+
+ /* Number of AGFL entries that we found. */
unsigned int nr_entries;
+
+ /* Buffer to hold AGFL entries for extent checking. */
xfs_agblock_t *entries;
+
+ struct xfs_buf *agfl_bp;
struct xfs_scrub *sc;
};
@@ -641,10 +648,10 @@ xchk_agfl_block(
struct xfs_scrub *sc = sai->sc;
if (xfs_verify_agbno(sc->sa.pag, agbno) &&
- sai->nr_entries < sai->sz_entries)
+ sai->nr_entries < sai->agflcount)
sai->entries[sai->nr_entries++] = agbno;
else
- xchk_block_set_corrupt(sc, sc->sa.agfl_bp);
+ xchk_block_set_corrupt(sc, sai->agfl_bp);
xchk_agfl_block_xref(sc, agbno);
@@ -696,19 +703,26 @@ int
xchk_agfl(
struct xfs_scrub *sc)
{
- struct xchk_agfl_info sai;
+ struct xchk_agfl_info sai = {
+ .sc = sc,
+ };
struct xfs_agf *agf;
xfs_agnumber_t agno = sc->sm->sm_agno;
- unsigned int agflcount;
unsigned int i;
int error;
+ /* Lock the AGF and AGI so that nobody can touch this AG. */
error = xchk_ag_read_headers(sc, agno, &sc->sa);
if (!xchk_process_error(sc, agno, XFS_AGFL_BLOCK(sc->mp), &error))
- goto out;
+ return error;
if (!sc->sa.agf_bp)
return -EFSCORRUPTED;
- xchk_buffer_recheck(sc, sc->sa.agfl_bp);
+
+ /* Try to read the AGFL, and verify its structure if we get it. */
+ error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &sai.agfl_bp);
+ if (!xchk_process_error(sc, agno, XFS_AGFL_BLOCK(sc->mp), &error))
+ return error;
+ xchk_buffer_recheck(sc, sai.agfl_bp);
xchk_agfl_xref(sc);
@@ -717,24 +731,21 @@ xchk_agfl(
/* Allocate buffer to ensure uniqueness of AGFL entries. */
agf = sc->sa.agf_bp->b_addr;
- agflcount = be32_to_cpu(agf->agf_flcount);
- if (agflcount > xfs_agfl_size(sc->mp)) {
+ sai.agflcount = be32_to_cpu(agf->agf_flcount);
+ if (sai.agflcount > xfs_agfl_size(sc->mp)) {
xchk_block_set_corrupt(sc, sc->sa.agf_bp);
goto out;
}
- memset(&sai, 0, sizeof(sai));
- sai.sc = sc;
- sai.sz_entries = agflcount;
- sai.entries = kmem_zalloc(sizeof(xfs_agblock_t) * agflcount,
- KM_MAYFAIL);
+ sai.entries = kvcalloc(sai.agflcount, sizeof(xfs_agblock_t),
+ XCHK_GFP_FLAGS);
if (!sai.entries) {
error = -ENOMEM;
goto out;
}
/* Check the blocks in the AGFL. */
- error = xfs_agfl_walk(sc->mp, sc->sa.agf_bp->b_addr,
- sc->sa.agfl_bp, xchk_agfl_block, &sai);
+ error = xfs_agfl_walk(sc->mp, sc->sa.agf_bp->b_addr, sai.agfl_bp,
+ xchk_agfl_block, &sai);
if (error == -ECANCELED) {
error = 0;
goto out_free;
@@ -742,7 +753,7 @@ xchk_agfl(
if (error)
goto out_free;
- if (agflcount != sai.nr_entries) {
+ if (sai.agflcount != sai.nr_entries) {
xchk_block_set_corrupt(sc, sc->sa.agf_bp);
goto out_free;
}
@@ -758,7 +769,7 @@ xchk_agfl(
}
out_free:
- kmem_free(sai.entries);
+ kvfree(sai.entries);
out:
return error;
}
diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
index 1b0b4e243f77..d75d82151eeb 100644
--- a/fs/xfs/scrub/agheader_repair.c
+++ b/fs/xfs/scrub/agheader_repair.c
@@ -442,12 +442,18 @@ out_revert:
/* AGFL */
struct xrep_agfl {
+ /* Bitmap of alleged AGFL blocks that we're not going to add. */
+ struct xbitmap crossed;
+
/* Bitmap of other OWN_AG metadata blocks. */
struct xbitmap agmetablocks;
/* Bitmap of free space. */
struct xbitmap *freesp;
+ /* rmapbt cursor for finding crosslinked blocks */
+ struct xfs_btree_cur *rmap_cur;
+
struct xfs_scrub *sc;
};
@@ -477,6 +483,41 @@ xrep_agfl_walk_rmap(
return xbitmap_set_btcur_path(&ra->agmetablocks, cur);
}
+/* Strike out the blocks that are cross-linked according to the rmapbt. */
+STATIC int
+xrep_agfl_check_extent(
+ struct xrep_agfl *ra,
+ uint64_t start,
+ uint64_t len)
+{
+ xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(ra->sc->mp, start);
+ xfs_agblock_t last_agbno = agbno + len - 1;
+ int error;
+
+ ASSERT(XFS_FSB_TO_AGNO(ra->sc->mp, start) == ra->sc->sa.pag->pag_agno);
+
+ while (agbno <= last_agbno) {
+ bool other_owners;
+
+ error = xfs_rmap_has_other_keys(ra->rmap_cur, agbno, 1,
+ &XFS_RMAP_OINFO_AG, &other_owners);
+ if (error)
+ return error;
+
+ if (other_owners) {
+ error = xbitmap_set(&ra->crossed, agbno, 1);
+ if (error)
+ return error;
+ }
+
+ if (xchk_should_terminate(ra->sc, &error))
+ return error;
+ agbno++;
+ }
+
+ return 0;
+}
+
/*
* Map out all the non-AGFL OWN_AG space in this AG so that we can deduce
* which blocks belong to the AGFL.
@@ -496,44 +537,58 @@ xrep_agfl_collect_blocks(
struct xrep_agfl ra;
struct xfs_mount *mp = sc->mp;
struct xfs_btree_cur *cur;
+ struct xbitmap_range *br, *n;
int error;
ra.sc = sc;
ra.freesp = agfl_extents;
xbitmap_init(&ra.agmetablocks);
+ xbitmap_init(&ra.crossed);
/* Find all space used by the free space btrees & rmapbt. */
cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.pag);
error = xfs_rmap_query_all(cur, xrep_agfl_walk_rmap, &ra);
- if (error)
- goto err;
xfs_btree_del_cursor(cur, error);
+ if (error)
+ goto out_bmp;
/* Find all blocks currently being used by the bnobt. */
cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp,
sc->sa.pag, XFS_BTNUM_BNO);
error = xbitmap_set_btblocks(&ra.agmetablocks, cur);
- if (error)
- goto err;
xfs_btree_del_cursor(cur, error);
+ if (error)
+ goto out_bmp;
/* Find all blocks currently being used by the cntbt. */
cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp,
sc->sa.pag, XFS_BTNUM_CNT);
error = xbitmap_set_btblocks(&ra.agmetablocks, cur);
- if (error)
- goto err;
-
xfs_btree_del_cursor(cur, error);
+ if (error)
+ goto out_bmp;
/*
* Drop the freesp meta blocks that are in use by btrees.
* The remaining blocks /should/ be AGFL blocks.
*/
error = xbitmap_disunion(agfl_extents, &ra.agmetablocks);
- xbitmap_destroy(&ra.agmetablocks);
if (error)
- return error;
+ goto out_bmp;
+
+ /* Strike out the blocks that are cross-linked. */
+ ra.rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.pag);
+ for_each_xbitmap_extent(br, n, agfl_extents) {
+ error = xrep_agfl_check_extent(&ra, br->start, br->len);
+ if (error)
+ break;
+ }
+ xfs_btree_del_cursor(ra.rmap_cur, error);
+ if (error)
+ goto out_bmp;
+ error = xbitmap_disunion(agfl_extents, &ra.crossed);
+ if (error)
+ goto out_bmp;
/*
* Calculate the new AGFL size. If we found more blocks than fit in
@@ -541,11 +596,10 @@ xrep_agfl_collect_blocks(
*/
*flcount = min_t(uint64_t, xbitmap_hweight(agfl_extents),
xfs_agfl_size(mp));
- return 0;
-err:
+out_bmp:
+ xbitmap_destroy(&ra.crossed);
xbitmap_destroy(&ra.agmetablocks);
- xfs_btree_del_cursor(cur, error);
return error;
}
@@ -631,7 +685,7 @@ xrep_agfl_init_header(
if (br->len)
break;
list_del(&br->list);
- kmem_free(br);
+ kfree(br);
}
/* Write new AGFL to disk. */
@@ -697,7 +751,6 @@ xrep_agfl(
* freespace overflow to the freespace btrees.
*/
sc->sa.agf_bp = agf_bp;
- sc->sa.agfl_bp = agfl_bp;
error = xrep_roll_ag_trans(sc);
if (error)
goto err;
diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c
index ab427b4d7fe0..3b38f4e2a537 100644
--- a/fs/xfs/scrub/alloc.c
+++ b/fs/xfs/scrub/alloc.c
@@ -100,9 +100,7 @@ xchk_allocbt_rec(
bno = be32_to_cpu(rec->alloc.ar_startblock);
len = be32_to_cpu(rec->alloc.ar_blockcount);
- if (bno + len <= bno ||
- !xfs_verify_agbno(pag, bno) ||
- !xfs_verify_agbno(pag, bno + len - 1))
+ if (!xfs_verify_agbext(pag, bno, len))
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
xchk_allocbt_xref(bs->sc, bno, len);
diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c
index b6f0c9f3f124..31529b9bf389 100644
--- a/fs/xfs/scrub/attr.c
+++ b/fs/xfs/scrub/attr.c
@@ -49,7 +49,7 @@ xchk_setup_xattr_buf(
if (ab) {
if (sz <= ab->sz)
return 0;
- kmem_free(ab);
+ kvfree(ab);
sc->buf = NULL;
}
@@ -79,7 +79,8 @@ xchk_setup_xattr(
* without the inode lock held, which means we can sleep.
*/
if (sc->flags & XCHK_TRY_HARDER) {
- error = xchk_setup_xattr_buf(sc, XATTR_SIZE_MAX, GFP_KERNEL);
+ error = xchk_setup_xattr_buf(sc, XATTR_SIZE_MAX,
+ XCHK_GFP_FLAGS);
if (error)
return error;
}
@@ -138,8 +139,7 @@ xchk_xattr_listent(
* doesn't work, we overload the seen_enough variable to convey
* the error message back to the main scrub function.
*/
- error = xchk_setup_xattr_buf(sx->sc, valuelen,
- GFP_KERNEL | __GFP_RETRY_MAYFAIL);
+ error = xchk_setup_xattr_buf(sx->sc, valuelen, XCHK_GFP_FLAGS);
if (error == -ENOMEM)
error = -EDEADLOCK;
if (error) {
@@ -324,8 +324,7 @@ xchk_xattr_block(
return 0;
/* Allocate memory for block usage checking. */
- error = xchk_setup_xattr_buf(ds->sc, 0,
- GFP_KERNEL | __GFP_RETRY_MAYFAIL);
+ error = xchk_setup_xattr_buf(ds->sc, 0, XCHK_GFP_FLAGS);
if (error == -ENOMEM)
return -EDEADLOCK;
if (error)
diff --git a/fs/xfs/scrub/bitmap.c b/fs/xfs/scrub/bitmap.c
index b89bf9de9b1c..a255f09e9f0a 100644
--- a/fs/xfs/scrub/bitmap.c
+++ b/fs/xfs/scrub/bitmap.c
@@ -10,6 +10,7 @@
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_btree.h"
+#include "scrub/scrub.h"
#include "scrub/bitmap.h"
/*
@@ -25,7 +26,7 @@ xbitmap_set(
{
struct xbitmap_range *bmr;
- bmr = kmem_alloc(sizeof(struct xbitmap_range), KM_MAYFAIL);
+ bmr = kmalloc(sizeof(struct xbitmap_range), XCHK_GFP_FLAGS);
if (!bmr)
return -ENOMEM;
@@ -47,7 +48,7 @@ xbitmap_destroy(
for_each_xbitmap_extent(bmr, n, bitmap) {
list_del(&bmr->list);
- kmem_free(bmr);
+ kfree(bmr);
}
}
@@ -174,15 +175,15 @@ xbitmap_disunion(
/* Total overlap, just delete ex. */
lp = lp->next;
list_del(&br->list);
- kmem_free(br);
+ kfree(br);
break;
case 0:
/*
* Deleting from the middle: add the new right extent
* and then shrink the left extent.
*/
- new_br = kmem_alloc(sizeof(struct xbitmap_range),
- KM_MAYFAIL);
+ new_br = kmalloc(sizeof(struct xbitmap_range),
+ XCHK_GFP_FLAGS);
if (!new_br) {
error = -ENOMEM;
goto out;
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index f0b9cb6506fd..d50d0eab196a 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -90,6 +90,7 @@ out:
struct xchk_bmap_info {
struct xfs_scrub *sc;
+ struct xfs_iext_cursor icur;
xfs_fileoff_t lastoff;
bool is_rt;
bool is_shared;
@@ -146,6 +147,48 @@ xchk_bmap_get_rmap(
return has_rmap;
}
+static inline bool
+xchk_bmap_has_prev(
+ struct xchk_bmap_info *info,
+ struct xfs_bmbt_irec *irec)
+{
+ struct xfs_bmbt_irec got;
+ struct xfs_ifork *ifp;
+
+ ifp = xfs_ifork_ptr(info->sc->ip, info->whichfork);
+
+ if (!xfs_iext_peek_prev_extent(ifp, &info->icur, &got))
+ return false;
+ if (got.br_startoff + got.br_blockcount != irec->br_startoff)
+ return false;
+ if (got.br_startblock + got.br_blockcount != irec->br_startblock)
+ return false;
+ if (got.br_state != irec->br_state)
+ return false;
+ return true;
+}
+
+static inline bool
+xchk_bmap_has_next(
+ struct xchk_bmap_info *info,
+ struct xfs_bmbt_irec *irec)
+{
+ struct xfs_bmbt_irec got;
+ struct xfs_ifork *ifp;
+
+ ifp = xfs_ifork_ptr(info->sc->ip, info->whichfork);
+
+ if (!xfs_iext_peek_next_extent(ifp, &info->icur, &got))
+ return false;
+ if (irec->br_startoff + irec->br_blockcount != got.br_startoff)
+ return false;
+ if (irec->br_startblock + irec->br_blockcount != got.br_startblock)
+ return false;
+ if (got.br_state != irec->br_state)
+ return false;
+ return true;
+}
+
/* Make sure that we have rmapbt records for this extent. */
STATIC void
xchk_bmap_xref_rmap(
@@ -214,6 +257,34 @@ xchk_bmap_xref_rmap(
if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
irec->br_startoff);
+
+ /*
+ * If the rmap starts before this bmbt record, make sure there's a bmbt
+ * record for the previous offset that is contiguous with this mapping.
+ * Skip this for CoW fork extents because the refcount btree (and not
+ * the inode) is the ondisk owner for those extents.
+ */
+ if (info->whichfork != XFS_COW_FORK && rmap.rm_startblock < agbno &&
+ !xchk_bmap_has_prev(info, irec)) {
+ xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
+ irec->br_startoff);
+ return;
+ }
+
+ /*
+ * If the rmap ends after this bmbt record, make sure there's a bmbt
+ * record for the next offset that is contiguous with this mapping.
+ * Skip this for CoW fork extents because the refcount btree (and not
+ * the inode) is the ondisk owner for those extents.
+ */
+ rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount;
+ if (info->whichfork != XFS_COW_FORK &&
+ rmap_end > agbno + irec->br_blockcount &&
+ !xchk_bmap_has_next(info, irec)) {
+ xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
+ irec->br_startoff);
+ return;
+ }
}
/* Cross-reference a single rtdev extent record. */
@@ -264,6 +335,8 @@ xchk_bmap_iextent_xref(
case XFS_COW_FORK:
xchk_xref_is_cow_staging(info->sc, agbno,
irec->br_blockcount);
+ xchk_xref_is_not_shared(info->sc, agbno,
+ irec->br_blockcount);
break;
}
@@ -297,14 +370,13 @@ xchk_bmap_dirattr_extent(
}
/* Scrub a single extent record. */
-STATIC int
+STATIC void
xchk_bmap_iextent(
struct xfs_inode *ip,
struct xchk_bmap_info *info,
struct xfs_bmbt_irec *irec)
{
struct xfs_mount *mp = info->sc->mp;
- int error = 0;
/*
* Check for out-of-order extents. This record could have come
@@ -325,14 +397,6 @@ xchk_bmap_iextent(
xchk_fblock_set_corrupt(info->sc, info->whichfork,
irec->br_startoff);
- /*
- * Check for delalloc extents. We never iterate the ones in the
- * in-core extent scan, and we should never see these in the bmbt.
- */
- if (isnullstartblock(irec->br_startblock))
- xchk_fblock_set_corrupt(info->sc, info->whichfork,
- irec->br_startoff);
-
/* Make sure the extent points to a valid place. */
if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN)
xchk_fblock_set_corrupt(info->sc, info->whichfork,
@@ -353,15 +417,12 @@ xchk_bmap_iextent(
irec->br_startoff);
if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
- return 0;
+ return;
if (info->is_rt)
xchk_bmap_rt_iextent_xref(ip, info, irec);
else
xchk_bmap_iextent_xref(ip, info, irec);
-
- info->lastoff = irec->br_startoff + irec->br_blockcount;
- return error;
}
/* Scrub a bmbt record. */
@@ -599,14 +660,41 @@ xchk_bmap_check_rmaps(
for_each_perag(sc->mp, agno, pag) {
error = xchk_bmap_check_ag_rmaps(sc, whichfork, pag);
- if (error)
- break;
- if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
- break;
+ if (error ||
+ (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) {
+ xfs_perag_put(pag);
+ return error;
+ }
}
- if (pag)
- xfs_perag_put(pag);
- return error;
+
+ return 0;
+}
+
+/* Scrub a delalloc reservation from the incore extent map tree. */
+STATIC void
+xchk_bmap_iextent_delalloc(
+ struct xfs_inode *ip,
+ struct xchk_bmap_info *info,
+ struct xfs_bmbt_irec *irec)
+{
+ struct xfs_mount *mp = info->sc->mp;
+
+ /*
+ * Check for out-of-order extents. This record could have come
+ * from the incore list, for which there is no ordering check.
+ */
+ if (irec->br_startoff < info->lastoff)
+ xchk_fblock_set_corrupt(info->sc, info->whichfork,
+ irec->br_startoff);
+
+ if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount))
+ xchk_fblock_set_corrupt(info->sc, info->whichfork,
+ irec->br_startoff);
+
+ /* Make sure the extent points to a valid place. */
+ if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN)
+ xchk_fblock_set_corrupt(info->sc, info->whichfork,
+ irec->br_startoff);
}
/*
@@ -626,7 +714,6 @@ xchk_bmap(
struct xfs_inode *ip = sc->ip;
struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
xfs_fileoff_t endoff;
- struct xfs_iext_cursor icur;
int error = 0;
/* Non-existent forks can be ignored. */
@@ -661,6 +748,8 @@ xchk_bmap(
case XFS_DINODE_FMT_DEV:
case XFS_DINODE_FMT_LOCAL:
/* No mappings to check. */
+ if (whichfork == XFS_COW_FORK)
+ xchk_fblock_set_corrupt(sc, whichfork, 0);
goto out;
case XFS_DINODE_FMT_EXTENTS:
break;
@@ -690,20 +779,22 @@ xchk_bmap(
/* Scrub extent records. */
info.lastoff = 0;
ifp = xfs_ifork_ptr(ip, whichfork);
- for_each_xfs_iext(ifp, &icur, &irec) {
+ for_each_xfs_iext(ifp, &info.icur, &irec) {
if (xchk_should_terminate(sc, &error) ||
(sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
goto out;
- if (isnullstartblock(irec.br_startblock))
- continue;
+
if (irec.br_startoff >= endoff) {
xchk_fblock_set_corrupt(sc, whichfork,
irec.br_startoff);
goto out;
}
- error = xchk_bmap_iextent(ip, &info, &irec);
- if (error)
- goto out;
+
+ if (isnullstartblock(irec.br_startblock))
+ xchk_bmap_iextent_delalloc(ip, &info, &irec);
+ else
+ xchk_bmap_iextent(ip, &info, &irec);
+ info.lastoff = irec.br_startoff + irec.br_blockcount;
}
error = xchk_bmap_check_rmaps(sc, whichfork);
diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c
index 2f4519590dc1..0fd36d5b4646 100644
--- a/fs/xfs/scrub/btree.c
+++ b/fs/xfs/scrub/btree.c
@@ -408,7 +408,6 @@ xchk_btree_check_owner(
struct xfs_buf *bp)
{
struct xfs_btree_cur *cur = bs->cur;
- struct check_owner *co;
/*
* In theory, xfs_btree_get_block should only give us a null buffer
@@ -431,10 +430,13 @@ xchk_btree_check_owner(
* later scanning.
*/
if (cur->bc_btnum == XFS_BTNUM_BNO || cur->bc_btnum == XFS_BTNUM_RMAP) {
- co = kmem_alloc(sizeof(struct check_owner),
- KM_MAYFAIL);
+ struct check_owner *co;
+
+ co = kmalloc(sizeof(struct check_owner), XCHK_GFP_FLAGS);
if (!co)
return -ENOMEM;
+
+ INIT_LIST_HEAD(&co->list);
co->level = level;
co->daddr = xfs_buf_daddr(bp);
list_add_tail(&co->list, &bs->to_check);
@@ -649,7 +651,7 @@ xchk_btree(
xchk_btree_set_corrupt(sc, cur, 0);
return 0;
}
- bs = kmem_zalloc(cur_sz, KM_NOFS | KM_MAYFAIL);
+ bs = kzalloc(cur_sz, XCHK_GFP_FLAGS);
if (!bs)
return -ENOMEM;
bs->cur = cur;
@@ -740,9 +742,9 @@ out:
error = xchk_btree_check_block_owner(bs, co->level,
co->daddr);
list_del(&co->list);
- kmem_free(co);
+ kfree(co);
}
- kmem_free(bs);
+ kfree(bs);
return error;
}
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 9bbbf20f401b..613260b04a3d 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -424,10 +424,6 @@ xchk_ag_read_headers(
if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGF))
return error;
- error = xfs_alloc_read_agfl(sa->pag, sc->tp, &sa->agfl_bp);
- if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL))
- return error;
-
return 0;
}
@@ -515,10 +511,6 @@ xchk_ag_free(
struct xchk_ag *sa)
{
xchk_ag_btcur_free(sa);
- if (sa->agfl_bp) {
- xfs_trans_brelse(sc->tp, sa->agfl_bp);
- sa->agfl_bp = NULL;
- }
if (sa->agf_bp) {
xfs_trans_brelse(sc->tp, sa->agf_bp);
sa->agf_bp = NULL;
@@ -789,6 +781,33 @@ xchk_buffer_recheck(
trace_xchk_block_error(sc, xfs_buf_daddr(bp), fa);
}
+static inline int
+xchk_metadata_inode_subtype(
+ struct xfs_scrub *sc,
+ unsigned int scrub_type)
+{
+ __u32 smtype = sc->sm->sm_type;
+ int error;
+
+ sc->sm->sm_type = scrub_type;
+
+ switch (scrub_type) {
+ case XFS_SCRUB_TYPE_INODE:
+ error = xchk_inode(sc);
+ break;
+ case XFS_SCRUB_TYPE_BMBTD:
+ error = xchk_bmap_data(sc);
+ break;
+ default:
+ ASSERT(0);
+ error = -EFSCORRUPTED;
+ break;
+ }
+
+ sc->sm->sm_type = smtype;
+ return error;
+}
+
/*
* Scrub the attr/data forks of a metadata inode. The metadata inode must be
* pointed to by sc->ip and the ILOCK must be held.
@@ -797,13 +816,17 @@ int
xchk_metadata_inode_forks(
struct xfs_scrub *sc)
{
- __u32 smtype;
bool shared;
int error;
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
return 0;
+ /* Check the inode record. */
+ error = xchk_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_INODE);
+ if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
+ return error;
+
/* Metadata inodes don't live on the rt device. */
if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME) {
xchk_ino_set_corrupt(sc, sc->ip->i_ino);
@@ -823,10 +846,7 @@ xchk_metadata_inode_forks(
}
/* Invoke the data fork scrubber. */
- smtype = sc->sm->sm_type;
- sc->sm->sm_type = XFS_SCRUB_TYPE_BMBTD;
- error = xchk_bmap_data(sc);
- sc->sm->sm_type = smtype;
+ error = xchk_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTD);
if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
return error;
@@ -841,7 +861,7 @@ xchk_metadata_inode_forks(
xchk_ino_set_corrupt(sc, sc->ip->i_ino);
}
- return error;
+ return 0;
}
/*
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index 454145db10e7..b73648d81d23 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -25,7 +25,7 @@ xchk_should_terminate(
if (fatal_signal_pending(current)) {
if (*error == 0)
- *error = -EAGAIN;
+ *error = -EINTR;
return true;
}
return false;
diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c
index 84fe3d33d699..d17cee177085 100644
--- a/fs/xfs/scrub/dabtree.c
+++ b/fs/xfs/scrub/dabtree.c
@@ -486,7 +486,7 @@ xchk_da_btree(
return 0;
/* Set up initial da state. */
- ds = kmem_zalloc(sizeof(struct xchk_da_btree), KM_NOFS | KM_MAYFAIL);
+ ds = kzalloc(sizeof(struct xchk_da_btree), XCHK_GFP_FLAGS);
if (!ds)
return -ENOMEM;
ds->dargs.dp = sc->ip;
@@ -591,6 +591,6 @@ out:
out_state:
xfs_da_state_free(ds->state);
- kmem_free(ds);
+ kfree(ds);
return error;
}
diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c
index 5c87800ab223..d1b0f23c2c59 100644
--- a/fs/xfs/scrub/dir.c
+++ b/fs/xfs/scrub/dir.c
@@ -666,7 +666,12 @@ xchk_directory_blocks(
struct xfs_scrub *sc)
{
struct xfs_bmbt_irec got;
- struct xfs_da_args args;
+ struct xfs_da_args args = {
+ .dp = sc ->ip,
+ .whichfork = XFS_DATA_FORK,
+ .geo = sc->mp->m_dir_geo,
+ .trans = sc->tp,
+ };
struct xfs_ifork *ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
struct xfs_mount *mp = sc->mp;
xfs_fileoff_t leaf_lblk;
@@ -689,9 +694,6 @@ xchk_directory_blocks(
free_lblk = XFS_B_TO_FSB(mp, XFS_DIR2_FREE_OFFSET);
/* Is this a block dir? */
- args.dp = sc->ip;
- args.geo = mp->m_dir_geo;
- args.trans = sc->tp;
error = xfs_dir2_isblock(&args, &is_block);
if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
goto out;
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index 6a6f8fe7f87c..4777e7b89fdc 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -14,6 +14,8 @@
#include "xfs_health.h"
#include "xfs_btree.h"
#include "xfs_ag.h"
+#include "xfs_rtalloc.h"
+#include "xfs_inode.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
@@ -43,6 +45,16 @@
* our tolerance for mismatch between expected and actual counter values.
*/
+struct xchk_fscounters {
+ struct xfs_scrub *sc;
+ uint64_t icount;
+ uint64_t ifree;
+ uint64_t fdblocks;
+ uint64_t frextents;
+ unsigned long long icount_min;
+ unsigned long long icount_max;
+};
+
/*
* Since the expected value computation is lockless but only browses incore
* values, the percpu counters should be fairly close to each other. However,
@@ -116,10 +128,11 @@ xchk_setup_fscounters(
struct xchk_fscounters *fsc;
int error;
- sc->buf = kmem_zalloc(sizeof(struct xchk_fscounters), 0);
+ sc->buf = kzalloc(sizeof(struct xchk_fscounters), XCHK_GFP_FLAGS);
if (!sc->buf)
return -ENOMEM;
fsc = sc->buf;
+ fsc->sc = sc;
xfs_icount_range(sc->mp, &fsc->icount_min, &fsc->icount_max);
@@ -138,6 +151,18 @@ xchk_setup_fscounters(
return xchk_trans_alloc(sc, 0);
}
+/*
+ * Part 1: Collecting filesystem summary counts. For each AG, we add its
+ * summary counts (total inodes, free inodes, free data blocks) to an incore
+ * copy of the overall filesystem summary counts.
+ *
+ * To avoid false corruption reports in part 2, any failure in this part must
+ * set the INCOMPLETE flag even when a negative errno is returned. This care
+ * must be taken with certain errno values (i.e. EFSBADCRC, EFSCORRUPTED,
+ * ECANCELED) that are absorbed into a scrub state flag update by
+ * xchk_*_process_error.
+ */
+
/* Count free space btree blocks manually for pre-lazysbcount filesystems. */
static int
xchk_fscount_btreeblks(
@@ -225,8 +250,10 @@ retry:
}
if (pag)
xfs_perag_put(pag);
- if (error)
+ if (error) {
+ xchk_set_incomplete(sc);
return error;
+ }
/*
* The global incore space reservation is taken from the incore
@@ -267,6 +294,64 @@ retry:
return 0;
}
+#ifdef CONFIG_XFS_RT
+STATIC int
+xchk_fscount_add_frextent(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ const struct xfs_rtalloc_rec *rec,
+ void *priv)
+{
+ struct xchk_fscounters *fsc = priv;
+ int error = 0;
+
+ fsc->frextents += rec->ar_extcount;
+
+ xchk_should_terminate(fsc->sc, &error);
+ return error;
+}
+
+/* Calculate the number of free realtime extents from the realtime bitmap. */
+STATIC int
+xchk_fscount_count_frextents(
+ struct xfs_scrub *sc,
+ struct xchk_fscounters *fsc)
+{
+ struct xfs_mount *mp = sc->mp;
+ int error;
+
+ fsc->frextents = 0;
+ if (!xfs_has_realtime(mp))
+ return 0;
+
+ xfs_ilock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
+ error = xfs_rtalloc_query_all(sc->mp, sc->tp,
+ xchk_fscount_add_frextent, fsc);
+ if (error) {
+ xchk_set_incomplete(sc);
+ goto out_unlock;
+ }
+
+out_unlock:
+ xfs_iunlock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
+ return error;
+}
+#else
+STATIC int
+xchk_fscount_count_frextents(
+ struct xfs_scrub *sc,
+ struct xchk_fscounters *fsc)
+{
+ fsc->frextents = 0;
+ return 0;
+}
+#endif /* CONFIG_XFS_RT */
+
+/*
+ * Part 2: Comparing filesystem summary counters. All we have to do here is
+ * sum the percpu counters and compare them to what we've observed.
+ */
+
/*
* Is the @counter reasonably close to the @expected value?
*
@@ -333,16 +418,17 @@ xchk_fscounters(
{
struct xfs_mount *mp = sc->mp;
struct xchk_fscounters *fsc = sc->buf;
- int64_t icount, ifree, fdblocks;
+ int64_t icount, ifree, fdblocks, frextents;
int error;
/* Snapshot the percpu counters. */
icount = percpu_counter_sum(&mp->m_icount);
ifree = percpu_counter_sum(&mp->m_ifree);
fdblocks = percpu_counter_sum(&mp->m_fdblocks);
+ frextents = percpu_counter_sum(&mp->m_frextents);
/* No negative values, please! */
- if (icount < 0 || ifree < 0 || fdblocks < 0)
+ if (icount < 0 || ifree < 0 || fdblocks < 0 || frextents < 0)
xchk_set_corrupt(sc);
/* See if icount is obviously wrong. */
@@ -353,6 +439,10 @@ xchk_fscounters(
if (fdblocks > mp->m_sb.sb_dblocks)
xchk_set_corrupt(sc);
+ /* See if frextents is obviously wrong. */
+ if (frextents > mp->m_sb.sb_rextents)
+ xchk_set_corrupt(sc);
+
/*
* If ifree exceeds icount by more than the minimum variance then
* something's probably wrong with the counters.
@@ -367,6 +457,13 @@ xchk_fscounters(
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)
return 0;
+ /* Count the free extents counter for rt volumes. */
+ error = xchk_fscount_count_frextents(sc, fsc);
+ if (!xchk_process_error(sc, 0, XFS_SB_BLOCK(mp), &error))
+ return error;
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)
+ return 0;
+
/* Compare the in-core counters with whatever we counted. */
if (!xchk_fscount_within_range(sc, icount, &mp->m_icount, fsc->icount))
xchk_set_corrupt(sc);
@@ -378,5 +475,9 @@ xchk_fscounters(
fsc->fdblocks))
xchk_set_corrupt(sc);
+ if (!xchk_fscount_within_range(sc, frextents, &mp->m_frextents,
+ fsc->frextents))
+ xchk_set_corrupt(sc);
+
return 0;
}
diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c
index e1026e07bf94..e312be7cd375 100644
--- a/fs/xfs/scrub/ialloc.c
+++ b/fs/xfs/scrub/ialloc.c
@@ -108,9 +108,8 @@ xchk_iallocbt_chunk(
xfs_agblock_t bno;
bno = XFS_AGINO_TO_AGBNO(mp, agino);
- if (bno + len <= bno ||
- !xfs_verify_agbno(pag, bno) ||
- !xfs_verify_agbno(pag, bno + len - 1))
+
+ if (!xfs_verify_agbext(pag, bno, len))
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
xchk_iallocbt_chunk_xref(bs->sc, irec, agino, bno, len);
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index 51820b40ab1c..7a2f38e5202c 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -365,7 +365,7 @@ xchk_dinode(
* pagecache can't cache all the blocks in this file due to
* overly large offsets, flag the inode for admin review.
*/
- if (isize >= mp->m_super->s_maxbytes)
+ if (isize > mp->m_super->s_maxbytes)
xchk_ino_set_warning(sc, ino);
/* di_nblocks */
diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c
index 21b4c9006859..9eeac8565394 100644
--- a/fs/xfs/scrub/quota.c
+++ b/fs/xfs/scrub/quota.c
@@ -14,6 +14,7 @@
#include "xfs_inode.h"
#include "xfs_quota.h"
#include "xfs_qm.h"
+#include "xfs_bmap.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
@@ -84,7 +85,7 @@ xchk_quota_item(
int error = 0;
if (xchk_should_terminate(sc, &error))
- return -ECANCELED;
+ return error;
/*
* Except for the root dquot, the actual dquot we got must either have
@@ -189,11 +190,12 @@ xchk_quota_data_fork(
for_each_xfs_iext(ifp, &icur, &irec) {
if (xchk_should_terminate(sc, &error))
break;
+
/*
- * delalloc extents or blocks mapped above the highest
+ * delalloc/unwritten extents or blocks mapped above the highest
* quota id shouldn't happen.
*/
- if (isnullstartblock(irec.br_startblock) ||
+ if (!xfs_bmap_is_written_extent(&irec) ||
irec.br_startoff > max_dqid_off ||
irec.br_startoff + irec.br_blockcount - 1 > max_dqid_off) {
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK,
diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c
index c68b767dc08f..d9c1b3cea4a5 100644
--- a/fs/xfs/scrub/refcount.c
+++ b/fs/xfs/scrub/refcount.c
@@ -127,8 +127,8 @@ xchk_refcountbt_rmap_check(
* is healthy each rmap_irec we see will be in agbno order
* so we don't need insertion sort here.
*/
- frag = kmem_alloc(sizeof(struct xchk_refcnt_frag),
- KM_MAYFAIL);
+ frag = kmalloc(sizeof(struct xchk_refcnt_frag),
+ XCHK_GFP_FLAGS);
if (!frag)
return -ENOMEM;
memcpy(&frag->rm, rec, sizeof(frag->rm));
@@ -215,7 +215,7 @@ xchk_refcountbt_process_rmap_fragments(
continue;
}
list_del(&frag->list);
- kmem_free(frag);
+ kfree(frag);
nr++;
}
@@ -257,11 +257,11 @@ done:
/* Delete fragments and work list. */
list_for_each_entry_safe(frag, n, &worklist, list) {
list_del(&frag->list);
- kmem_free(frag);
+ kfree(frag);
}
list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
list_del(&frag->list);
- kmem_free(frag);
+ kfree(frag);
}
}
@@ -269,15 +269,13 @@ done:
STATIC void
xchk_refcountbt_xref_rmap(
struct xfs_scrub *sc,
- xfs_agblock_t bno,
- xfs_extlen_t len,
- xfs_nlink_t refcount)
+ const struct xfs_refcount_irec *irec)
{
struct xchk_refcnt_check refchk = {
- .sc = sc,
- .bno = bno,
- .len = len,
- .refcount = refcount,
+ .sc = sc,
+ .bno = irec->rc_startblock,
+ .len = irec->rc_blockcount,
+ .refcount = irec->rc_refcount,
.seen = 0,
};
struct xfs_rmap_irec low;
@@ -291,9 +289,9 @@ xchk_refcountbt_xref_rmap(
/* Cross-reference with the rmapbt to confirm the refcount. */
memset(&low, 0, sizeof(low));
- low.rm_startblock = bno;
+ low.rm_startblock = irec->rc_startblock;
memset(&high, 0xFF, sizeof(high));
- high.rm_startblock = bno + len - 1;
+ high.rm_startblock = irec->rc_startblock + irec->rc_blockcount - 1;
INIT_LIST_HEAD(&refchk.fragments);
error = xfs_rmap_query_range(sc->sa.rmap_cur, &low, &high,
@@ -302,30 +300,29 @@ xchk_refcountbt_xref_rmap(
goto out_free;
xchk_refcountbt_process_rmap_fragments(&refchk);
- if (refcount != refchk.seen)
+ if (irec->rc_refcount != refchk.seen)
xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
out_free:
list_for_each_entry_safe(frag, n, &refchk.fragments, list) {
list_del(&frag->list);
- kmem_free(frag);
+ kfree(frag);
}
}
/* Cross-reference with the other btrees. */
STATIC void
xchk_refcountbt_xref(
- struct xfs_scrub *sc,
- xfs_agblock_t agbno,
- xfs_extlen_t len,
- xfs_nlink_t refcount)
+ struct xfs_scrub *sc,
+ const struct xfs_refcount_irec *irec)
{
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
return;
- xchk_xref_is_used_space(sc, agbno, len);
- xchk_xref_is_not_inode_chunk(sc, agbno, len);
- xchk_refcountbt_xref_rmap(sc, agbno, len, refcount);
+ xchk_xref_is_used_space(sc, irec->rc_startblock, irec->rc_blockcount);
+ xchk_xref_is_not_inode_chunk(sc, irec->rc_startblock,
+ irec->rc_blockcount);
+ xchk_refcountbt_xref_rmap(sc, irec);
}
/* Scrub a refcountbt record. */
@@ -334,35 +331,27 @@ xchk_refcountbt_rec(
struct xchk_btree *bs,
const union xfs_btree_rec *rec)
{
+ struct xfs_refcount_irec irec;
xfs_agblock_t *cow_blocks = bs->private;
struct xfs_perag *pag = bs->cur->bc_ag.pag;
- xfs_agblock_t bno;
- xfs_extlen_t len;
- xfs_nlink_t refcount;
- bool has_cowflag;
- bno = be32_to_cpu(rec->refc.rc_startblock);
- len = be32_to_cpu(rec->refc.rc_blockcount);
- refcount = be32_to_cpu(rec->refc.rc_refcount);
+ xfs_refcount_btrec_to_irec(rec, &irec);
- /* Only CoW records can have refcount == 1. */
- has_cowflag = (bno & XFS_REFC_COW_START);
- if ((refcount == 1 && !has_cowflag) || (refcount != 1 && has_cowflag))
+ /* Check the domain and refcount are not incompatible. */
+ if (!xfs_refcount_check_domain(&irec))
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
- if (has_cowflag)
- (*cow_blocks) += len;
+
+ if (irec.rc_domain == XFS_REFC_DOMAIN_COW)
+ (*cow_blocks) += irec.rc_blockcount;
/* Check the extent. */
- bno &= ~XFS_REFC_COW_START;
- if (bno + len <= bno ||
- !xfs_verify_agbno(pag, bno) ||
- !xfs_verify_agbno(pag, bno + len - 1))
+ if (!xfs_verify_agbext(pag, irec.rc_startblock, irec.rc_blockcount))
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
- if (refcount == 0)
+ if (irec.rc_refcount == 0)
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
- xchk_refcountbt_xref(bs->sc, bno, len, refcount);
+ xchk_refcountbt_xref(bs->sc, &irec);
return 0;
}
@@ -426,7 +415,6 @@ xchk_xref_is_cow_staging(
xfs_extlen_t len)
{
struct xfs_refcount_irec rc;
- bool has_cowflag;
int has_refcount;
int error;
@@ -434,8 +422,8 @@ xchk_xref_is_cow_staging(
return;
/* Find the CoW staging extent. */
- error = xfs_refcount_lookup_le(sc->sa.refc_cur,
- agbno + XFS_REFC_COW_START, &has_refcount);
+ error = xfs_refcount_lookup_le(sc->sa.refc_cur, XFS_REFC_DOMAIN_COW,
+ agbno, &has_refcount);
if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur))
return;
if (!has_refcount) {
@@ -451,9 +439,8 @@ xchk_xref_is_cow_staging(
return;
}
- /* CoW flag must be set, refcount must be 1. */
- has_cowflag = (rc.rc_startblock & XFS_REFC_COW_START);
- if (!has_cowflag || rc.rc_refcount != 1)
+ /* CoW lookup returned a shared extent record? */
+ if (rc.rc_domain != XFS_REFC_DOMAIN_COW)
xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
/* Must be at least as long as what was passed in */
@@ -477,7 +464,8 @@ xchk_xref_is_not_shared(
if (!sc->sa.refc_cur || xchk_skip_xref(sc->sm))
return;
- error = xfs_refcount_has_record(sc->sa.refc_cur, agbno, len, &shared);
+ error = xfs_refcount_has_record(sc->sa.refc_cur, XFS_REFC_DOMAIN_SHARED,
+ agbno, len, &shared);
if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur))
return;
if (shared)
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index c18bd039fce9..4b92f9253ccd 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -61,7 +61,6 @@ xrep_attempt(
sc->flags |= XREP_ALREADY_FIXED;
return -EAGAIN;
case -EDEADLOCK:
- case -EAGAIN:
/* Tell the caller to try again having grabbed all the locks. */
if (!(sc->flags & XCHK_TRY_HARDER)) {
sc->flags |= XCHK_TRY_HARDER;
@@ -70,10 +69,15 @@ xrep_attempt(
/*
* We tried harder but still couldn't grab all the resources
* we needed to fix it. The corruption has not been fixed,
- * so report back to userspace.
+ * so exit to userspace with the scan's output flags unchanged.
*/
- return -EFSCORRUPTED;
+ return 0;
default:
+ /*
+ * EAGAIN tells the caller to re-scrub, so we cannot return
+ * that here.
+ */
+ ASSERT(error != -EAGAIN);
return error;
}
}
@@ -121,32 +125,40 @@ xrep_roll_ag_trans(
{
int error;
- /* Keep the AG header buffers locked so we can keep going. */
- if (sc->sa.agi_bp)
+ /*
+ * Keep the AG header buffers locked while we roll the transaction.
+ * Ensure that both AG buffers are dirty and held when we roll the
+ * transaction so that they move forward in the log without losing the
+ * bli (and hence the bli type) when the transaction commits.
+ *
+ * Normal code would never hold clean buffers across a roll, but repair
+ * needs both buffers to maintain a total lock on the AG.
+ */
+ if (sc->sa.agi_bp) {
+ xfs_ialloc_log_agi(sc->tp, sc->sa.agi_bp, XFS_AGI_MAGICNUM);
xfs_trans_bhold(sc->tp, sc->sa.agi_bp);
- if (sc->sa.agf_bp)
+ }
+
+ if (sc->sa.agf_bp) {
+ xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp, XFS_AGF_MAGICNUM);
xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
- if (sc->sa.agfl_bp)
- xfs_trans_bhold(sc->tp, sc->sa.agfl_bp);
+ }
/*
- * Roll the transaction. We still own the buffer and the buffer lock
- * regardless of whether or not the roll succeeds. If the roll fails,
- * the buffers will be released during teardown on our way out of the
- * kernel. If it succeeds, we join them to the new transaction and
- * move on.
+ * Roll the transaction. We still hold the AG header buffers locked
+ * regardless of whether or not that succeeds. On failure, the buffers
+ * will be released during teardown on our way out of the kernel. If
+ * successful, join the buffers to the new transaction and move on.
*/
error = xfs_trans_roll(&sc->tp);
if (error)
return error;
- /* Join AG headers to the new transaction. */
+ /* Join the AG headers to the new transaction. */
if (sc->sa.agi_bp)
xfs_trans_bjoin(sc->tp, sc->sa.agi_bp);
if (sc->sa.agf_bp)
xfs_trans_bjoin(sc->tp, sc->sa.agf_bp);
- if (sc->sa.agfl_bp)
- xfs_trans_bjoin(sc->tp, sc->sa.agfl_bp);
return 0;
}
@@ -498,6 +510,7 @@ xrep_put_freelist(
struct xfs_scrub *sc,
xfs_agblock_t agbno)
{
+ struct xfs_buf *agfl_bp;
int error;
/* Make sure there's space on the freelist. */
@@ -516,8 +529,12 @@ xrep_put_freelist(
return error;
/* Put the block on the AGFL. */
+ error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp);
+ if (error)
+ return error;
+
error = xfs_alloc_put_freelist(sc->sa.pag, sc->tp, sc->sa.agf_bp,
- sc->sa.agfl_bp, agbno, 0);
+ agfl_bp, agbno, 0);
if (error)
return error;
xfs_extent_busy_insert(sc->tp, sc->sa.pag, agbno, 1,
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 2e8e400f10a9..07a7a75f987f 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -174,7 +174,7 @@ xchk_teardown(
if (sc->flags & XCHK_REAPING_DISABLED)
xchk_start_reaping(sc);
if (sc->buf) {
- kmem_free(sc->buf);
+ kvfree(sc->buf);
sc->buf = NULL;
}
return error;
@@ -467,7 +467,7 @@ xfs_scrub_metadata(
xfs_warn_mount(mp, XFS_OPSTATE_WARNED_SCRUB,
"EXPERIMENTAL online scrub feature in use. Use at your own risk!");
- sc = kmem_zalloc(sizeof(struct xfs_scrub), KM_NOFS | KM_MAYFAIL);
+ sc = kzalloc(sizeof(struct xfs_scrub), XCHK_GFP_FLAGS);
if (!sc) {
error = -ENOMEM;
goto out;
@@ -557,7 +557,7 @@ out_nofix:
out_teardown:
error = xchk_teardown(sc, error);
out_sc:
- kmem_free(sc);
+ kfree(sc);
out:
trace_xchk_done(XFS_I(file_inode(file)), sm, error);
if (error == -EFSCORRUPTED || error == -EFSBADCRC) {
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index 3de5287e98d8..b4d391b4c938 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -8,6 +8,15 @@
struct xfs_scrub;
+/*
+ * Standard flags for allocating memory within scrub. NOFS context is
+ * configured by the process allocation scope. Scrub and repair must be able
+ * to back out gracefully if there isn't enough memory. Force-cast to avoid
+ * complaints from static checkers.
+ */
+#define XCHK_GFP_FLAGS ((__force gfp_t)(GFP_KERNEL | __GFP_NOWARN | \
+ __GFP_RETRY_MAYFAIL))
+
/* Type info and names for the scrub types. */
enum xchk_type {
ST_NONE = 1, /* disabled */
@@ -39,7 +48,6 @@ struct xchk_ag {
/* AG btree roots */
struct xfs_buf *agf_bp;
- struct xfs_buf *agfl_bp;
struct xfs_buf *agi_bp;
/* AG btrees */
@@ -161,12 +169,4 @@ void xchk_xref_is_used_rt_space(struct xfs_scrub *sc, xfs_rtblock_t rtbno,
# define xchk_xref_is_used_rt_space(sc, rtbno, len) do { } while (0)
#endif
-struct xchk_fscounters {
- uint64_t icount;
- uint64_t ifree;
- uint64_t fdblocks;
- unsigned long long icount_min;
- unsigned long long icount_max;
-};
-
#endif /* __XFS_SCRUB_SCRUB_H__ */
diff --git a/fs/xfs/scrub/symlink.c b/fs/xfs/scrub/symlink.c
index 75311f8daeeb..c1c99ffe7408 100644
--- a/fs/xfs/scrub/symlink.c
+++ b/fs/xfs/scrub/symlink.c
@@ -21,7 +21,7 @@ xchk_setup_symlink(
struct xfs_scrub *sc)
{
/* Allocate the buffer without the inode lock held. */
- sc->buf = kvzalloc(XFS_SYMLINK_MAXLEN + 1, GFP_KERNEL);
+ sc->buf = kvzalloc(XFS_SYMLINK_MAXLEN + 1, XCHK_GFP_FLAGS);
if (!sc->buf)
return -ENOMEM;
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index b744c62052b6..a05f44eb8178 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -242,12 +242,13 @@ xfs_acl_set_mode(
}
int
-xfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+xfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
struct posix_acl *acl, int type)
{
umode_t mode;
bool set_mode = false;
int error = 0;
+ struct inode *inode = d_inode(dentry);
if (!acl)
goto set_acl;
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 263404d0bfda..dcd176149c7a 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -11,7 +11,7 @@ struct posix_acl;
#ifdef CONFIG_XFS_POSIX_ACL
extern struct posix_acl *xfs_get_acl(struct inode *inode, int type, bool rcu);
-extern int xfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+extern int xfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
struct posix_acl *acl, int type);
extern int __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
void xfs_forget_acl(struct inode *inode, const char *name);
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 5d1a995b15f8..41734202796f 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -17,6 +17,8 @@
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
#include "xfs_reflink.h"
+#include "xfs_errortag.h"
+#include "xfs_error.h"
struct xfs_writepage_ctx {
struct iomap_writepage_ctx ctx;
@@ -114,9 +116,8 @@ xfs_end_ioend(
if (unlikely(error)) {
if (ioend->io_flags & IOMAP_F_SHARED) {
xfs_reflink_cancel_cow_range(ip, offset, size, true);
- xfs_bmap_punch_delalloc_range(ip,
- XFS_B_TO_FSBT(mp, offset),
- XFS_B_TO_FSB(mp, size));
+ xfs_bmap_punch_delalloc_range(ip, offset,
+ offset + size);
}
goto done;
}
@@ -218,11 +219,17 @@ xfs_imap_valid(
* checked (and found nothing at this offset) could have added
* overlapping blocks.
*/
- if (XFS_WPC(wpc)->data_seq != READ_ONCE(ip->i_df.if_seq))
+ if (XFS_WPC(wpc)->data_seq != READ_ONCE(ip->i_df.if_seq)) {
+ trace_xfs_wb_data_iomap_invalid(ip, &wpc->iomap,
+ XFS_WPC(wpc)->data_seq, XFS_DATA_FORK);
return false;
+ }
if (xfs_inode_has_cow_data(ip) &&
- XFS_WPC(wpc)->cow_seq != READ_ONCE(ip->i_cowfp->if_seq))
+ XFS_WPC(wpc)->cow_seq != READ_ONCE(ip->i_cowfp->if_seq)) {
+ trace_xfs_wb_cow_iomap_invalid(ip, &wpc->iomap,
+ XFS_WPC(wpc)->cow_seq, XFS_COW_FORK);
return false;
+ }
return true;
}
@@ -286,6 +293,8 @@ xfs_map_blocks(
if (xfs_is_shutdown(mp))
return -EIO;
+ XFS_ERRORTAG_DELAY(mp, XFS_ERRTAG_WB_DELAY_MS);
+
/*
* COW fork blocks can overlap data fork blocks even if the blocks
* aren't shared. COW I/O always takes precedent, so we must always
@@ -373,7 +382,7 @@ retry:
isnullstartblock(imap.br_startblock))
goto allocate_blocks;
- xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0);
+ xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0, XFS_WPC(wpc)->data_seq);
trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap);
return 0;
allocate_blocks:
@@ -455,12 +464,8 @@ xfs_discard_folio(
struct folio *folio,
loff_t pos)
{
- struct inode *inode = folio->mapping->host;
- struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_inode *ip = XFS_I(folio->mapping->host);
struct xfs_mount *mp = ip->i_mount;
- size_t offset = offset_in_folio(folio, pos);
- xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, pos);
- xfs_fileoff_t pageoff_fsb = XFS_B_TO_FSBT(mp, offset);
int error;
if (xfs_is_shutdown(mp))
@@ -470,8 +475,9 @@ xfs_discard_folio(
"page discard on page "PTR_FMT", inode 0x%llx, pos %llu.",
folio, ip->i_ino, pos);
- error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
- i_blocks_per_folio(inode, folio) - pageoff_fsb);
+ error = xfs_bmap_punch_delalloc_range(ip, pos,
+ round_up(pos, folio_size(folio)));
+
if (error && !xfs_is_shutdown(mp))
xfs_alert(mp, "page discard unable to remove delalloc mapping.");
}
diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c
index cf5ce607dc05..2788a6f2edcd 100644
--- a/fs/xfs/xfs_attr_item.c
+++ b/fs/xfs/xfs_attr_item.c
@@ -245,28 +245,6 @@ xfs_attri_init(
return attrip;
}
-/*
- * Copy an attr format buffer from the given buf, and into the destination attr
- * format structure.
- */
-STATIC int
-xfs_attri_copy_format(
- struct xfs_log_iovec *buf,
- struct xfs_attri_log_format *dst_attr_fmt)
-{
- struct xfs_attri_log_format *src_attr_fmt = buf->i_addr;
- size_t len;
-
- len = sizeof(struct xfs_attri_log_format);
- if (buf->i_len != len) {
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
- return -EFSCORRUPTED;
- }
-
- memcpy((char *)dst_attr_fmt, (char *)src_attr_fmt, len);
- return 0;
-}
-
static inline struct xfs_attrd_log_item *ATTRD_ITEM(struct xfs_log_item *lip)
{
return container_of(lip, struct xfs_attrd_log_item, attrd_item);
@@ -731,24 +709,50 @@ xlog_recover_attri_commit_pass2(
struct xfs_attri_log_nameval *nv;
const void *attr_value = NULL;
const void *attr_name;
- int error;
+ size_t len;
attri_formatp = item->ri_buf[0].i_addr;
attr_name = item->ri_buf[1].i_addr;
/* Validate xfs_attri_log_format before the large memory allocation */
+ len = sizeof(struct xfs_attri_log_format);
+ if (item->ri_buf[0].i_len != len) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
+ }
+
if (!xfs_attri_validate(mp, attri_formatp)) {
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
+ }
+
+ /* Validate the attr name */
+ if (item->ri_buf[1].i_len !=
+ xlog_calc_iovec_len(attri_formatp->alfi_name_len)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
return -EFSCORRUPTED;
}
if (!xfs_attr_namecheck(attr_name, attri_formatp->alfi_name_len)) {
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[1].i_addr, item->ri_buf[1].i_len);
return -EFSCORRUPTED;
}
- if (attri_formatp->alfi_value_len)
+ /* Validate the attr value, if present */
+ if (attri_formatp->alfi_value_len != 0) {
+ if (item->ri_buf[2].i_len != xlog_calc_iovec_len(attri_formatp->alfi_value_len)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr,
+ item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
+ }
+
attr_value = item->ri_buf[2].i_addr;
+ }
/*
* Memory alloc failure will cause replay to abort. We attach the
@@ -760,9 +764,7 @@ xlog_recover_attri_commit_pass2(
attri_formatp->alfi_value_len);
attrip = xfs_attri_init(mp, nv);
- error = xfs_attri_copy_format(&item->ri_buf[0], &attrip->attri_format);
- if (error)
- goto out;
+ memcpy(&attrip->attri_format, attri_formatp, len);
/*
* The ATTRI has two references. One for the ATTRD and one for ATTRI to
@@ -774,10 +776,6 @@ xlog_recover_attri_commit_pass2(
xfs_attri_release(attrip);
xfs_attri_log_nameval_put(nv);
return 0;
-out:
- xfs_attri_item_free(attrip);
- xfs_attri_log_nameval_put(nv);
- return error;
}
/*
@@ -842,7 +840,8 @@ xlog_recover_attrd_commit_pass2(
attrd_formatp = item->ri_buf[0].i_addr;
if (item->ri_buf[0].i_len != sizeof(struct xfs_attrd_log_format)) {
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
return -EFSCORRUPTED;
}
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index 51f66e982484..41323da523d1 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -608,28 +608,18 @@ static const struct xfs_item_ops xfs_bui_item_ops = {
.iop_relog = xfs_bui_item_relog,
};
-/*
- * Copy an BUI format buffer from the given buf, and into the destination
- * BUI format structure. The BUI/BUD items were designed not to need any
- * special alignment handling.
- */
-static int
+static inline void
xfs_bui_copy_format(
- struct xfs_log_iovec *buf,
- struct xfs_bui_log_format *dst_bui_fmt)
+ struct xfs_bui_log_format *dst,
+ const struct xfs_bui_log_format *src)
{
- struct xfs_bui_log_format *src_bui_fmt;
- uint len;
+ unsigned int i;
- src_bui_fmt = buf->i_addr;
- len = xfs_bui_log_format_sizeof(src_bui_fmt->bui_nextents);
+ memcpy(dst, src, offsetof(struct xfs_bui_log_format, bui_extents));
- if (buf->i_len == len) {
- memcpy(dst_bui_fmt, src_bui_fmt, len);
- return 0;
- }
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
- return -EFSCORRUPTED;
+ for (i = 0; i < src->bui_nextents; i++)
+ memcpy(&dst->bui_extents[i], &src->bui_extents[i],
+ sizeof(struct xfs_map_extent));
}
/*
@@ -646,23 +636,34 @@ xlog_recover_bui_commit_pass2(
struct xlog_recover_item *item,
xfs_lsn_t lsn)
{
- int error;
struct xfs_mount *mp = log->l_mp;
struct xfs_bui_log_item *buip;
struct xfs_bui_log_format *bui_formatp;
+ size_t len;
bui_formatp = item->ri_buf[0].i_addr;
+ if (item->ri_buf[0].i_len < xfs_bui_log_format_sizeof(0)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
+ }
+
if (bui_formatp->bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) {
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
return -EFSCORRUPTED;
}
- buip = xfs_bui_init(mp);
- error = xfs_bui_copy_format(&item->ri_buf[0], &buip->bui_format);
- if (error) {
- xfs_bui_item_free(buip);
- return error;
+
+ len = xfs_bui_log_format_sizeof(bui_formatp->bui_nextents);
+ if (item->ri_buf[0].i_len != len) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
}
+
+ buip = xfs_bui_init(mp);
+ xfs_bui_copy_format(&buip->bui_format, bui_formatp);
atomic_set(&buip->bui_next_extent, bui_formatp->bui_nextents);
/*
* Insert the intent into the AIL directly and drop one reference so
@@ -696,7 +697,8 @@ xlog_recover_bud_commit_pass2(
bud_formatp = item->ri_buf[0].i_addr;
if (item->ri_buf[0].i_len != sizeof(struct xfs_bud_log_format)) {
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
return -EFSCORRUPTED;
}
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 04d0c2bff67c..867645b74d88 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -590,11 +590,13 @@ out_unlock_iolock:
int
xfs_bmap_punch_delalloc_range(
struct xfs_inode *ip,
- xfs_fileoff_t start_fsb,
- xfs_fileoff_t length)
+ xfs_off_t start_byte,
+ xfs_off_t end_byte)
{
+ struct xfs_mount *mp = ip->i_mount;
struct xfs_ifork *ifp = &ip->i_df;
- xfs_fileoff_t end_fsb = start_fsb + length;
+ xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, start_byte);
+ xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, end_byte);
struct xfs_bmbt_irec got, del;
struct xfs_iext_cursor icur;
int error = 0;
@@ -607,7 +609,7 @@ xfs_bmap_punch_delalloc_range(
while (got.br_startoff + got.br_blockcount > start_fsb) {
del = got;
- xfs_trim_extent(&del, start_fsb, length);
+ xfs_trim_extent(&del, start_fsb, end_fsb - start_fsb);
/*
* A delete can push the cursor forward. Step back to the
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 24b37d211f1d..6888078f5c31 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -31,7 +31,7 @@ xfs_bmap_rtalloc(struct xfs_bmalloca *ap)
#endif /* CONFIG_XFS_RT */
int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip,
- xfs_fileoff_t start_fsb, xfs_fileoff_t length);
+ xfs_off_t start_byte, xfs_off_t end_byte);
struct kgetbmap {
__s64 bmv_offset; /* file offset of segment in blocks */
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index dde346450952..54c774af6e1c 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1945,6 +1945,7 @@ xfs_free_buftarg(
list_lru_destroy(&btp->bt_lru);
blkdev_issue_flush(btp->bt_bdev);
+ invalidate_bdev(btp->bt_bdev);
fs_put_dax(btp->bt_daxdev, btp->bt_mount);
kmem_free(btp);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 522d450a94b1..df7322ed73fa 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -1018,6 +1018,8 @@ xfs_buf_item_relse(
trace_xfs_buf_item_relse(bp, _RET_IP_);
ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags));
+ if (atomic_read(&bip->bli_refcount))
+ return;
bp->b_log_item = NULL;
xfs_buf_rele(bp);
xfs_buf_item_free(bip);
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 7db588ed0be5..ae082808cfed 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -46,7 +46,7 @@ static unsigned int xfs_errortag_random_default[] = {
XFS_RANDOM_REFCOUNT_FINISH_ONE,
XFS_RANDOM_BMAP_FINISH_ONE,
XFS_RANDOM_AG_RESV_CRITICAL,
- XFS_RANDOM_DROP_WRITES,
+ 0, /* XFS_RANDOM_DROP_WRITES has been removed */
XFS_RANDOM_LOG_BAD_CRC,
XFS_RANDOM_LOG_ITEM_PIN,
XFS_RANDOM_BUF_LRU_REF,
@@ -60,6 +60,8 @@ static unsigned int xfs_errortag_random_default[] = {
XFS_RANDOM_LARP,
XFS_RANDOM_DA_LEAF_SPLIT,
XFS_RANDOM_ATTR_LEAF_TO_NODE,
+ XFS_RANDOM_WB_DELAY_MS,
+ XFS_RANDOM_WRITE_DELAY_MS,
};
struct xfs_errortag_attr {
@@ -162,7 +164,6 @@ XFS_ERRORTAG_ATTR_RW(refcount_continue_update, XFS_ERRTAG_REFCOUNT_CONTINUE_UPDA
XFS_ERRORTAG_ATTR_RW(refcount_finish_one, XFS_ERRTAG_REFCOUNT_FINISH_ONE);
XFS_ERRORTAG_ATTR_RW(bmap_finish_one, XFS_ERRTAG_BMAP_FINISH_ONE);
XFS_ERRORTAG_ATTR_RW(ag_resv_critical, XFS_ERRTAG_AG_RESV_CRITICAL);
-XFS_ERRORTAG_ATTR_RW(drop_writes, XFS_ERRTAG_DROP_WRITES);
XFS_ERRORTAG_ATTR_RW(log_bad_crc, XFS_ERRTAG_LOG_BAD_CRC);
XFS_ERRORTAG_ATTR_RW(log_item_pin, XFS_ERRTAG_LOG_ITEM_PIN);
XFS_ERRORTAG_ATTR_RW(buf_lru_ref, XFS_ERRTAG_BUF_LRU_REF);
@@ -176,6 +177,8 @@ XFS_ERRORTAG_ATTR_RW(ag_resv_fail, XFS_ERRTAG_AG_RESV_FAIL);
XFS_ERRORTAG_ATTR_RW(larp, XFS_ERRTAG_LARP);
XFS_ERRORTAG_ATTR_RW(da_leaf_split, XFS_ERRTAG_DA_LEAF_SPLIT);
XFS_ERRORTAG_ATTR_RW(attr_leaf_to_node, XFS_ERRTAG_ATTR_LEAF_TO_NODE);
+XFS_ERRORTAG_ATTR_RW(wb_delay_ms, XFS_ERRTAG_WB_DELAY_MS);
+XFS_ERRORTAG_ATTR_RW(write_delay_ms, XFS_ERRTAG_WRITE_DELAY_MS);
static struct attribute *xfs_errortag_attrs[] = {
XFS_ERRORTAG_ATTR_LIST(noerror),
@@ -206,7 +209,6 @@ static struct attribute *xfs_errortag_attrs[] = {
XFS_ERRORTAG_ATTR_LIST(refcount_finish_one),
XFS_ERRORTAG_ATTR_LIST(bmap_finish_one),
XFS_ERRORTAG_ATTR_LIST(ag_resv_critical),
- XFS_ERRORTAG_ATTR_LIST(drop_writes),
XFS_ERRORTAG_ATTR_LIST(log_bad_crc),
XFS_ERRORTAG_ATTR_LIST(log_item_pin),
XFS_ERRORTAG_ATTR_LIST(buf_lru_ref),
@@ -220,6 +222,8 @@ static struct attribute *xfs_errortag_attrs[] = {
XFS_ERRORTAG_ATTR_LIST(larp),
XFS_ERRORTAG_ATTR_LIST(da_leaf_split),
XFS_ERRORTAG_ATTR_LIST(attr_leaf_to_node),
+ XFS_ERRORTAG_ATTR_LIST(wb_delay_ms),
+ XFS_ERRORTAG_ATTR_LIST(write_delay_ms),
NULL,
};
ATTRIBUTE_GROUPS(xfs_errortag);
@@ -234,13 +238,18 @@ int
xfs_errortag_init(
struct xfs_mount *mp)
{
+ int ret;
+
mp->m_errortag = kmem_zalloc(sizeof(unsigned int) * XFS_ERRTAG_MAX,
KM_MAYFAIL);
if (!mp->m_errortag)
return -ENOMEM;
- return xfs_sysfs_init(&mp->m_errortag_kobj, &xfs_errortag_ktype,
- &mp->m_kobj, "errortag");
+ ret = xfs_sysfs_init(&mp->m_errortag_kobj, &xfs_errortag_ktype,
+ &mp->m_kobj, "errortag");
+ if (ret)
+ kmem_free(mp->m_errortag);
+ return ret;
}
void
@@ -251,6 +260,32 @@ xfs_errortag_del(
kmem_free(mp->m_errortag);
}
+static bool
+xfs_errortag_valid(
+ unsigned int error_tag)
+{
+ if (error_tag >= XFS_ERRTAG_MAX)
+ return false;
+
+ /* Error out removed injection types */
+ if (error_tag == XFS_ERRTAG_DROP_WRITES)
+ return false;
+ return true;
+}
+
+bool
+xfs_errortag_enabled(
+ struct xfs_mount *mp,
+ unsigned int tag)
+{
+ if (!mp->m_errortag)
+ return false;
+ if (!xfs_errortag_valid(tag))
+ return false;
+
+ return mp->m_errortag[tag] != 0;
+}
+
bool
xfs_errortag_test(
struct xfs_mount *mp,
@@ -272,9 +307,11 @@ xfs_errortag_test(
if (!mp->m_errortag)
return false;
- ASSERT(error_tag < XFS_ERRTAG_MAX);
+ if (!xfs_errortag_valid(error_tag))
+ return false;
+
randfactor = mp->m_errortag[error_tag];
- if (!randfactor || prandom_u32_max(randfactor))
+ if (!randfactor || get_random_u32_below(randfactor))
return false;
xfs_warn_ratelimited(mp,
@@ -288,7 +325,7 @@ xfs_errortag_get(
struct xfs_mount *mp,
unsigned int error_tag)
{
- if (error_tag >= XFS_ERRTAG_MAX)
+ if (!xfs_errortag_valid(error_tag))
return -EINVAL;
return mp->m_errortag[error_tag];
@@ -300,7 +337,7 @@ xfs_errortag_set(
unsigned int error_tag,
unsigned int tag_value)
{
- if (error_tag >= XFS_ERRTAG_MAX)
+ if (!xfs_errortag_valid(error_tag))
return -EINVAL;
mp->m_errortag[error_tag] = tag_value;
@@ -314,7 +351,7 @@ xfs_errortag_add(
{
BUILD_BUG_ON(ARRAY_SIZE(xfs_errortag_random_default) != XFS_ERRTAG_MAX);
- if (error_tag >= XFS_ERRTAG_MAX)
+ if (!xfs_errortag_valid(error_tag))
return -EINVAL;
return xfs_errortag_set(mp, error_tag,
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 5191e9145e55..dbe6c37dc697 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -45,6 +45,18 @@ extern bool xfs_errortag_test(struct xfs_mount *mp, const char *expression,
const char *file, int line, unsigned int error_tag);
#define XFS_TEST_ERROR(expr, mp, tag) \
((expr) || xfs_errortag_test((mp), #expr, __FILE__, __LINE__, (tag)))
+bool xfs_errortag_enabled(struct xfs_mount *mp, unsigned int tag);
+#define XFS_ERRORTAG_DELAY(mp, tag) \
+ do { \
+ might_sleep(); \
+ if (!xfs_errortag_enabled((mp), (tag))) \
+ break; \
+ xfs_warn_ratelimited((mp), \
+"Injecting %ums delay at file %s, line %d, on filesystem \"%s\"", \
+ (mp)->m_errortag[(tag)], __FILE__, __LINE__, \
+ (mp)->m_super->s_id); \
+ mdelay((mp)->m_errortag[(tag)]); \
+ } while (0)
extern int xfs_errortag_get(struct xfs_mount *mp, unsigned int error_tag);
extern int xfs_errortag_set(struct xfs_mount *mp, unsigned int error_tag,
@@ -55,6 +67,7 @@ extern int xfs_errortag_clearall(struct xfs_mount *mp);
#define xfs_errortag_init(mp) (0)
#define xfs_errortag_del(mp)
#define XFS_TEST_ERROR(expr, mp, tag) (expr)
+#define XFS_ERRORTAG_DELAY(mp, tag) ((void)0)
#define xfs_errortag_set(mp, tag, val) (ENOSYS)
#define xfs_errortag_add(mp, tag) (ENOSYS)
#define xfs_errortag_clearall(mp) (ENOSYS)
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 27ccfcd82f04..d5130d1fcfae 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -66,27 +66,16 @@ xfs_efi_release(
xfs_efi_item_free(efip);
}
-/*
- * This returns the number of iovecs needed to log the given efi item.
- * We only need 1 iovec for an efi item. It just logs the efi_log_format
- * structure.
- */
-static inline int
-xfs_efi_item_sizeof(
- struct xfs_efi_log_item *efip)
-{
- return sizeof(struct xfs_efi_log_format) +
- (efip->efi_format.efi_nextents - 1) * sizeof(xfs_extent_t);
-}
-
STATIC void
xfs_efi_item_size(
struct xfs_log_item *lip,
int *nvecs,
int *nbytes)
{
+ struct xfs_efi_log_item *efip = EFI_ITEM(lip);
+
*nvecs += 1;
- *nbytes += xfs_efi_item_sizeof(EFI_ITEM(lip));
+ *nbytes += xfs_efi_log_format_sizeof(efip->efi_format.efi_nextents);
}
/*
@@ -112,7 +101,7 @@ xfs_efi_item_format(
xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFI_FORMAT,
&efip->efi_format,
- xfs_efi_item_sizeof(efip));
+ xfs_efi_log_format_sizeof(efip->efi_format.efi_nextents));
}
@@ -155,13 +144,11 @@ xfs_efi_init(
{
struct xfs_efi_log_item *efip;
- uint size;
ASSERT(nextents > 0);
if (nextents > XFS_EFI_MAX_FAST_EXTENTS) {
- size = (uint)(sizeof(struct xfs_efi_log_item) +
- ((nextents - 1) * sizeof(xfs_extent_t)));
- efip = kmem_zalloc(size, 0);
+ efip = kzalloc(xfs_efi_log_item_sizeof(nextents),
+ GFP_KERNEL | __GFP_NOFAIL);
} else {
efip = kmem_cache_zalloc(xfs_efi_cache,
GFP_KERNEL | __GFP_NOFAIL);
@@ -188,15 +175,17 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
{
xfs_efi_log_format_t *src_efi_fmt = buf->i_addr;
uint i;
- uint len = sizeof(xfs_efi_log_format_t) +
- (src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_t);
- uint len32 = sizeof(xfs_efi_log_format_32_t) +
- (src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_32_t);
- uint len64 = sizeof(xfs_efi_log_format_64_t) +
- (src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_64_t);
+ uint len = xfs_efi_log_format_sizeof(src_efi_fmt->efi_nextents);
+ uint len32 = xfs_efi_log_format32_sizeof(src_efi_fmt->efi_nextents);
+ uint len64 = xfs_efi_log_format64_sizeof(src_efi_fmt->efi_nextents);
if (buf->i_len == len) {
- memcpy((char *)dst_efi_fmt, (char*)src_efi_fmt, len);
+ memcpy(dst_efi_fmt, src_efi_fmt,
+ offsetof(struct xfs_efi_log_format, efi_extents));
+ for (i = 0; i < src_efi_fmt->efi_nextents; i++)
+ memcpy(&dst_efi_fmt->efi_extents[i],
+ &src_efi_fmt->efi_extents[i],
+ sizeof(struct xfs_extent));
return 0;
} else if (buf->i_len == len32) {
xfs_efi_log_format_32_t *src_efi_fmt_32 = buf->i_addr;
@@ -227,7 +216,8 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
}
return 0;
}
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, NULL, buf->i_addr,
+ buf->i_len);
return -EFSCORRUPTED;
}
@@ -246,27 +236,16 @@ xfs_efd_item_free(struct xfs_efd_log_item *efdp)
kmem_cache_free(xfs_efd_cache, efdp);
}
-/*
- * This returns the number of iovecs needed to log the given efd item.
- * We only need 1 iovec for an efd item. It just logs the efd_log_format
- * structure.
- */
-static inline int
-xfs_efd_item_sizeof(
- struct xfs_efd_log_item *efdp)
-{
- return sizeof(xfs_efd_log_format_t) +
- (efdp->efd_format.efd_nextents - 1) * sizeof(xfs_extent_t);
-}
-
STATIC void
xfs_efd_item_size(
struct xfs_log_item *lip,
int *nvecs,
int *nbytes)
{
+ struct xfs_efd_log_item *efdp = EFD_ITEM(lip);
+
*nvecs += 1;
- *nbytes += xfs_efd_item_sizeof(EFD_ITEM(lip));
+ *nbytes += xfs_efd_log_format_sizeof(efdp->efd_format.efd_nextents);
}
/*
@@ -291,7 +270,7 @@ xfs_efd_item_format(
xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFD_FORMAT,
&efdp->efd_format,
- xfs_efd_item_sizeof(efdp));
+ xfs_efd_log_format_sizeof(efdp->efd_format.efd_nextents));
}
/*
@@ -340,9 +319,8 @@ xfs_trans_get_efd(
ASSERT(nextents > 0);
if (nextents > XFS_EFD_MAX_FAST_EXTENTS) {
- efdp = kmem_zalloc(sizeof(struct xfs_efd_log_item) +
- (nextents - 1) * sizeof(struct xfs_extent),
- 0);
+ efdp = kzalloc(xfs_efd_log_item_sizeof(nextents),
+ GFP_KERNEL | __GFP_NOFAIL);
} else {
efdp = kmem_cache_zalloc(xfs_efd_cache,
GFP_KERNEL | __GFP_NOFAIL);
@@ -733,6 +711,12 @@ xlog_recover_efi_commit_pass2(
efi_formatp = item->ri_buf[0].i_addr;
+ if (item->ri_buf[0].i_len < xfs_efi_log_format_sizeof(0)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
+ }
+
efip = xfs_efi_init(mp, efi_formatp->efi_nextents);
error = xfs_efi_copy_format(&item->ri_buf[0], &efip->efi_format);
if (error) {
@@ -769,12 +753,24 @@ xlog_recover_efd_commit_pass2(
xfs_lsn_t lsn)
{
struct xfs_efd_log_format *efd_formatp;
+ int buflen = item->ri_buf[0].i_len;
efd_formatp = item->ri_buf[0].i_addr;
- ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) +
- ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) ||
- (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) +
- ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t)))));
+
+ if (buflen < sizeof(struct xfs_efd_log_format)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
+ efd_formatp, buflen);
+ return -EFSCORRUPTED;
+ }
+
+ if (item->ri_buf[0].i_len != xfs_efd_log_format32_sizeof(
+ efd_formatp->efd_nextents) &&
+ item->ri_buf[0].i_len != xfs_efd_log_format64_sizeof(
+ efd_formatp->efd_nextents)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
+ efd_formatp, buflen);
+ return -EFSCORRUPTED;
+ }
xlog_recover_release_intent(log, XFS_LI_EFI, efd_formatp->efd_efi_id);
return 0;
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h
index 186d0f2137f1..da6a5afa607c 100644
--- a/fs/xfs/xfs_extfree_item.h
+++ b/fs/xfs/xfs_extfree_item.h
@@ -52,6 +52,14 @@ struct xfs_efi_log_item {
xfs_efi_log_format_t efi_format;
};
+static inline size_t
+xfs_efi_log_item_sizeof(
+ unsigned int nr)
+{
+ return offsetof(struct xfs_efi_log_item, efi_format) +
+ xfs_efi_log_format_sizeof(nr);
+}
+
/*
* This is the "extent free done" log item. It is used to log
* the fact that some extents earlier mentioned in an efi item
@@ -64,6 +72,14 @@ struct xfs_efd_log_item {
xfs_efd_log_format_t efd_format;
};
+static inline size_t
+xfs_efd_log_item_sizeof(
+ unsigned int nr)
+{
+ return offsetof(struct xfs_efd_log_item, efd_format) +
+ xfs_efd_log_format_sizeof(nr);
+}
+
/*
* Max number of extents in fast allocation path.
*/
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index c6c80265c0b2..595a5bcf46b9 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1261,7 +1261,7 @@ xfs_file_llseek(
}
#ifdef CONFIG_FS_DAX
-static int
+static inline vm_fault_t
xfs_dax_fault(
struct vm_fault *vmf,
enum page_entry_size pe_size,
@@ -1274,14 +1274,15 @@ xfs_dax_fault(
&xfs_read_iomap_ops);
}
#else
-static int
+static inline vm_fault_t
xfs_dax_fault(
struct vm_fault *vmf,
enum page_entry_size pe_size,
bool write_fault,
pfn_t *pfn)
{
- return 0;
+ ASSERT(0);
+ return VM_FAULT_SIGBUS;
}
#endif
@@ -1324,7 +1325,7 @@ __xfs_filemap_fault(
if (write_fault) {
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
ret = iomap_page_mkwrite(vmf,
- &xfs_buffered_write_iomap_ops);
+ &xfs_page_mkwrite_iomap_ops);
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
} else {
ret = filemap_fault(vmf);
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index d8337274c74d..88a88506ffff 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -524,7 +524,7 @@ xfs_getfsmap_rtdev_rtbitmap_query(
struct xfs_mount *mp = tp->t_mountp;
int error;
- xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED);
+ xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
/*
* Set up query parameters to return free rtextents covering the range
@@ -551,7 +551,7 @@ xfs_getfsmap_rtdev_rtbitmap_query(
if (error)
goto err;
err:
- xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED);
+ xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
return error;
}
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index eae7427062cf..f35e2cee5265 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -342,6 +342,9 @@ xfs_iget_recycle(
trace_xfs_iget_recycle(ip);
+ if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
+ return -EAGAIN;
+
/*
* We need to make it look like the inode is being reclaimed to prevent
* the actual reclaim workers from stomping over us while we recycle
@@ -355,6 +358,7 @@ xfs_iget_recycle(
ASSERT(!rwsem_is_locked(&inode->i_rwsem));
error = xfs_reinit_inode(mp, inode);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
if (error) {
/*
* Re-initializing the inode failed, and we are in deep
@@ -518,6 +522,8 @@ xfs_iget_cache_hit(
if (ip->i_flags & XFS_IRECLAIMABLE) {
/* Drops i_flags_lock and RCU read lock. */
error = xfs_iget_recycle(pag, ip);
+ if (error == -EAGAIN)
+ goto out_skip;
if (error)
return error;
} else {
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index c000b74dd203..d354ea2b74f9 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2479,7 +2479,7 @@ xfs_remove(
error = xfs_dir_replace(tp, ip, &xfs_name_dotdot,
tp->t_mountp->m_sb.sb_rootino, 0);
if (error)
- return error;
+ goto out_trans_cancel;
}
} else {
/*
@@ -2818,7 +2818,7 @@ retry:
* Lock all the participating inodes. Depending upon whether
* the target_name exists in the target directory, and
* whether the target directory is the same as the source
- * directory, we can lock from 2 to 4 inodes.
+ * directory, we can lock from 2 to 5 inodes.
*/
xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 1f783e979629..13f1b2add390 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1138,10 +1138,6 @@ xfs_ioctl_setattr_xflags(
if ((fa->fsx_xflags & FS_XFLAG_REALTIME) && xfs_is_reflink_inode(ip))
ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
- /* Don't allow us to set DAX mode for a reflinked file for now. */
- if ((fa->fsx_xflags & FS_XFLAG_DAX) && xfs_is_reflink_inode(ip))
- return -EINVAL;
-
/* diflags2 only valid for v3 inodes. */
i_flags2 = xfs_flags2diflags2(ip, fa->fsx_xflags);
if (i_flags2 && !xfs_has_v3inodes(mp))
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 07da03976ec1..669c1bc5c3a7 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -48,13 +48,53 @@ xfs_alert_fsblock_zero(
return -EFSCORRUPTED;
}
+u64
+xfs_iomap_inode_sequence(
+ struct xfs_inode *ip,
+ u16 iomap_flags)
+{
+ u64 cookie = 0;
+
+ if (iomap_flags & IOMAP_F_XATTR)
+ return READ_ONCE(ip->i_af.if_seq);
+ if ((iomap_flags & IOMAP_F_SHARED) && ip->i_cowfp)
+ cookie = (u64)READ_ONCE(ip->i_cowfp->if_seq) << 32;
+ return cookie | READ_ONCE(ip->i_df.if_seq);
+}
+
+/*
+ * Check that the iomap passed to us is still valid for the given offset and
+ * length.
+ */
+static bool
+xfs_iomap_valid(
+ struct inode *inode,
+ const struct iomap *iomap)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+
+ if (iomap->validity_cookie !=
+ xfs_iomap_inode_sequence(ip, iomap->flags)) {
+ trace_xfs_iomap_invalid(ip, iomap);
+ return false;
+ }
+
+ XFS_ERRORTAG_DELAY(ip->i_mount, XFS_ERRTAG_WRITE_DELAY_MS);
+ return true;
+}
+
+const struct iomap_page_ops xfs_iomap_page_ops = {
+ .iomap_valid = xfs_iomap_valid,
+};
+
int
xfs_bmbt_to_iomap(
struct xfs_inode *ip,
struct iomap *iomap,
struct xfs_bmbt_irec *imap,
unsigned int mapping_flags,
- u16 iomap_flags)
+ u16 iomap_flags,
+ u64 sequence_cookie)
{
struct xfs_mount *mp = ip->i_mount;
struct xfs_buftarg *target = xfs_inode_buftarg(ip);
@@ -91,6 +131,9 @@ xfs_bmbt_to_iomap(
if (xfs_ipincount(ip) &&
(ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
iomap->flags |= IOMAP_F_DIRTY;
+
+ iomap->validity_cookie = sequence_cookie;
+ iomap->page_ops = &xfs_iomap_page_ops;
return 0;
}
@@ -195,7 +238,8 @@ xfs_iomap_write_direct(
xfs_fileoff_t offset_fsb,
xfs_fileoff_t count_fsb,
unsigned int flags,
- struct xfs_bmbt_irec *imap)
+ struct xfs_bmbt_irec *imap,
+ u64 *seq)
{
struct xfs_mount *mp = ip->i_mount;
struct xfs_trans *tp;
@@ -285,6 +329,7 @@ xfs_iomap_write_direct(
error = xfs_alert_fsblock_zero(ip, imap);
out_unlock:
+ *seq = xfs_iomap_inode_sequence(ip, 0);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
@@ -743,6 +788,7 @@ xfs_direct_write_iomap_begin(
bool shared = false;
u16 iomap_flags = 0;
unsigned int lockmode = XFS_ILOCK_SHARED;
+ u64 seq;
ASSERT(flags & (IOMAP_WRITE | IOMAP_ZERO));
@@ -811,9 +857,10 @@ xfs_direct_write_iomap_begin(
goto out_unlock;
}
+ seq = xfs_iomap_inode_sequence(ip, iomap_flags);
xfs_iunlock(ip, lockmode);
trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap);
- return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, iomap_flags);
+ return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, iomap_flags, seq);
allocate_blocks:
error = -EAGAIN;
@@ -839,24 +886,26 @@ allocate_blocks:
xfs_iunlock(ip, lockmode);
error = xfs_iomap_write_direct(ip, offset_fsb, end_fsb - offset_fsb,
- flags, &imap);
+ flags, &imap, &seq);
if (error)
return error;
trace_xfs_iomap_alloc(ip, offset, length, XFS_DATA_FORK, &imap);
return xfs_bmbt_to_iomap(ip, iomap, &imap, flags,
- iomap_flags | IOMAP_F_NEW);
+ iomap_flags | IOMAP_F_NEW, seq);
out_found_cow:
- xfs_iunlock(ip, lockmode);
length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount);
trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap);
if (imap.br_startblock != HOLESTARTBLOCK) {
- error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0);
+ seq = xfs_iomap_inode_sequence(ip, 0);
+ error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, seq);
if (error)
- return error;
+ goto out_unlock;
}
- return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED);
+ seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED);
+ xfs_iunlock(ip, lockmode);
+ return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED, seq);
out_unlock:
if (lockmode)
@@ -915,6 +964,7 @@ xfs_buffered_write_iomap_begin(
int allocfork = XFS_DATA_FORK;
int error = 0;
unsigned int lockmode = XFS_ILOCK_EXCL;
+ u64 seq;
if (xfs_is_shutdown(mp))
return -EIO;
@@ -926,6 +976,10 @@ xfs_buffered_write_iomap_begin(
ASSERT(!XFS_IS_REALTIME_INODE(ip));
+ error = xfs_qm_dqattach(ip);
+ if (error)
+ return error;
+
error = xfs_ilock_for_iomap(ip, flags, &lockmode);
if (error)
return error;
@@ -1029,10 +1083,6 @@ xfs_buffered_write_iomap_begin(
allocfork = XFS_COW_FORK;
}
- error = xfs_qm_dqattach_locked(ip, false);
- if (error)
- goto out_unlock;
-
if (eof && offset + count > XFS_ISIZE(ip)) {
/*
* Determine the initial size of the preallocation.
@@ -1094,26 +1144,31 @@ retry:
* Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
* them out if the write happens to fail.
*/
+ seq = xfs_iomap_inode_sequence(ip, IOMAP_F_NEW);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
trace_xfs_iomap_alloc(ip, offset, count, allocfork, &imap);
- return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_NEW);
+ return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_NEW, seq);
found_imap:
+ seq = xfs_iomap_inode_sequence(ip, 0);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
- return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0);
+ return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0, seq);
found_cow:
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ seq = xfs_iomap_inode_sequence(ip, 0);
if (imap.br_startoff <= offset_fsb) {
- error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0);
+ error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, seq);
if (error)
- return error;
+ goto out_unlock;
+ seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags,
- IOMAP_F_SHARED);
+ IOMAP_F_SHARED, seq);
}
xfs_trim_extent(&cmap, offset_fsb, imap.br_startoff - offset_fsb);
- return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 0);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 0, seq);
out_unlock:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -1121,6 +1176,16 @@ out_unlock:
}
static int
+xfs_buffered_write_delalloc_punch(
+ struct inode *inode,
+ loff_t offset,
+ loff_t length)
+{
+ return xfs_bmap_punch_delalloc_range(XFS_I(inode), offset,
+ offset + length);
+}
+
+static int
xfs_buffered_write_iomap_end(
struct inode *inode,
loff_t offset,
@@ -1129,56 +1194,17 @@ xfs_buffered_write_iomap_end(
unsigned flags,
struct iomap *iomap)
{
- struct xfs_inode *ip = XFS_I(inode);
- struct xfs_mount *mp = ip->i_mount;
- xfs_fileoff_t start_fsb;
- xfs_fileoff_t end_fsb;
- int error = 0;
-
- if (iomap->type != IOMAP_DELALLOC)
- return 0;
-
- /*
- * Behave as if the write failed if drop writes is enabled. Set the NEW
- * flag to force delalloc cleanup.
- */
- if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_DROP_WRITES)) {
- iomap->flags |= IOMAP_F_NEW;
- written = 0;
- }
- /*
- * start_fsb refers to the first unused block after a short write. If
- * nothing was written, round offset down to point at the first block in
- * the range.
- */
- if (unlikely(!written))
- start_fsb = XFS_B_TO_FSBT(mp, offset);
- else
- start_fsb = XFS_B_TO_FSB(mp, offset + written);
- end_fsb = XFS_B_TO_FSB(mp, offset + length);
+ struct xfs_mount *mp = XFS_M(inode->i_sb);
+ int error;
- /*
- * Trim delalloc blocks if they were allocated by this write and we
- * didn't manage to write the whole range.
- *
- * We don't need to care about racing delalloc as we hold i_mutex
- * across the reserve/allocate/unreserve calls. If there are delalloc
- * blocks in the range, they are ours.
- */
- if ((iomap->flags & IOMAP_F_NEW) && start_fsb < end_fsb) {
- truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb),
- XFS_FSB_TO_B(mp, end_fsb) - 1);
-
- error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
- end_fsb - start_fsb);
- if (error && !xfs_is_shutdown(mp)) {
- xfs_alert(mp, "%s: unable to clean up ino %lld",
- __func__, ip->i_ino);
- return error;
- }
+ error = iomap_file_buffered_write_punch_delalloc(inode, iomap, offset,
+ length, written, &xfs_buffered_write_delalloc_punch);
+ if (error && !xfs_is_shutdown(mp)) {
+ xfs_alert(mp, "%s: unable to clean up ino 0x%llx",
+ __func__, XFS_I(inode)->i_ino);
+ return error;
}
-
return 0;
}
@@ -1187,6 +1213,15 @@ const struct iomap_ops xfs_buffered_write_iomap_ops = {
.iomap_end = xfs_buffered_write_iomap_end,
};
+/*
+ * iomap_page_mkwrite() will never fail in a way that requires delalloc extents
+ * that it allocated to be revoked. Hence we do not need an .iomap_end method
+ * for this operation.
+ */
+const struct iomap_ops xfs_page_mkwrite_iomap_ops = {
+ .iomap_begin = xfs_buffered_write_iomap_begin,
+};
+
static int
xfs_read_iomap_begin(
struct inode *inode,
@@ -1204,6 +1239,7 @@ xfs_read_iomap_begin(
int nimaps = 1, error = 0;
bool shared = false;
unsigned int lockmode = XFS_ILOCK_SHARED;
+ u64 seq;
ASSERT(!(flags & (IOMAP_WRITE | IOMAP_ZERO)));
@@ -1215,15 +1251,16 @@ xfs_read_iomap_begin(
return error;
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
&nimaps, 0);
- if (!error && (flags & IOMAP_REPORT))
+ if (!error && ((flags & IOMAP_REPORT) || IS_DAX(inode)))
error = xfs_reflink_trim_around_shared(ip, &imap, &shared);
+ seq = xfs_iomap_inode_sequence(ip, shared ? IOMAP_F_SHARED : 0);
xfs_iunlock(ip, lockmode);
if (error)
return error;
trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap);
return xfs_bmbt_to_iomap(ip, iomap, &imap, flags,
- shared ? IOMAP_F_SHARED : 0);
+ shared ? IOMAP_F_SHARED : 0, seq);
}
const struct iomap_ops xfs_read_iomap_ops = {
@@ -1248,6 +1285,7 @@ xfs_seek_iomap_begin(
struct xfs_bmbt_irec imap, cmap;
int error = 0;
unsigned lockmode;
+ u64 seq;
if (xfs_is_shutdown(mp))
return -EIO;
@@ -1282,8 +1320,9 @@ xfs_seek_iomap_begin(
if (data_fsb < cow_fsb + cmap.br_blockcount)
end_fsb = min(end_fsb, data_fsb);
xfs_trim_extent(&cmap, offset_fsb, end_fsb);
+ seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED);
error = xfs_bmbt_to_iomap(ip, iomap, &cmap, flags,
- IOMAP_F_SHARED);
+ IOMAP_F_SHARED, seq);
/*
* This is a COW extent, so we must probe the page cache
* because there could be dirty page cache being backed
@@ -1304,8 +1343,9 @@ xfs_seek_iomap_begin(
imap.br_startblock = HOLESTARTBLOCK;
imap.br_state = XFS_EXT_NORM;
done:
+ seq = xfs_iomap_inode_sequence(ip, 0);
xfs_trim_extent(&imap, offset_fsb, end_fsb);
- error = xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0);
+ error = xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0, seq);
out_unlock:
xfs_iunlock(ip, lockmode);
return error;
@@ -1331,6 +1371,7 @@ xfs_xattr_iomap_begin(
struct xfs_bmbt_irec imap;
int nimaps = 1, error = 0;
unsigned lockmode;
+ int seq;
if (xfs_is_shutdown(mp))
return -EIO;
@@ -1347,12 +1388,14 @@ xfs_xattr_iomap_begin(
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
&nimaps, XFS_BMAPI_ATTRFORK);
out_unlock:
+
+ seq = xfs_iomap_inode_sequence(ip, IOMAP_F_XATTR);
xfs_iunlock(ip, lockmode);
if (error)
return error;
ASSERT(nimaps);
- return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0);
+ return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_XATTR, seq);
}
const struct iomap_ops xfs_xattr_iomap_ops = {
@@ -1370,7 +1413,7 @@ xfs_zero_range(
if (IS_DAX(inode))
return dax_zero_range(inode, pos, len, did_zero,
- &xfs_direct_write_iomap_ops);
+ &xfs_dax_write_iomap_ops);
return iomap_zero_range(inode, pos, len, did_zero,
&xfs_buffered_write_iomap_ops);
}
@@ -1385,7 +1428,7 @@ xfs_truncate_page(
if (IS_DAX(inode))
return dax_truncate_page(inode, pos, did_zero,
- &xfs_direct_write_iomap_ops);
+ &xfs_dax_write_iomap_ops);
return iomap_truncate_page(inode, pos, did_zero,
&xfs_buffered_write_iomap_ops);
}
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index c782e8c0479c..4da13440bae9 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -13,14 +13,15 @@ struct xfs_bmbt_irec;
int xfs_iomap_write_direct(struct xfs_inode *ip, xfs_fileoff_t offset_fsb,
xfs_fileoff_t count_fsb, unsigned int flags,
- struct xfs_bmbt_irec *imap);
+ struct xfs_bmbt_irec *imap, u64 *sequence);
int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
xfs_fileoff_t xfs_iomap_eof_align_last_fsb(struct xfs_inode *ip,
xfs_fileoff_t end_fsb);
+u64 xfs_iomap_inode_sequence(struct xfs_inode *ip, u16 iomap_flags);
int xfs_bmbt_to_iomap(struct xfs_inode *ip, struct iomap *iomap,
struct xfs_bmbt_irec *imap, unsigned int mapping_flags,
- u16 iomap_flags);
+ u16 iomap_flags, u64 sequence_cookie);
int xfs_zero_range(struct xfs_inode *ip, loff_t pos, loff_t len,
bool *did_zero);
@@ -47,6 +48,7 @@ xfs_aligned_fsb_count(
}
extern const struct iomap_ops xfs_buffered_write_iomap_ops;
+extern const struct iomap_ops xfs_page_mkwrite_iomap_ops;
extern const struct iomap_ops xfs_direct_write_iomap_ops;
extern const struct iomap_ops xfs_read_iomap_ops;
extern const struct iomap_ops xfs_seek_iomap_ops;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 2e10e1c66ad6..515318dfbc38 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -651,6 +651,7 @@ xfs_vn_change_ok(
static int
xfs_setattr_nonsize(
struct user_namespace *mnt_userns,
+ struct dentry *dentry,
struct xfs_inode *ip,
struct iattr *iattr)
{
@@ -757,7 +758,7 @@ xfs_setattr_nonsize(
* Posix ACL code seems to care about this issue either.
*/
if (mask & ATTR_MODE) {
- error = posix_acl_chmod(mnt_userns, inode, inode->i_mode);
+ error = posix_acl_chmod(mnt_userns, dentry, inode->i_mode);
if (error)
return error;
}
@@ -779,6 +780,7 @@ out_dqrele:
STATIC int
xfs_setattr_size(
struct user_namespace *mnt_userns,
+ struct dentry *dentry,
struct xfs_inode *ip,
struct iattr *iattr)
{
@@ -810,7 +812,7 @@ xfs_setattr_size(
* Use the regular setattr path to update the timestamps.
*/
iattr->ia_valid &= ~ATTR_SIZE;
- return xfs_setattr_nonsize(mnt_userns, ip, iattr);
+ return xfs_setattr_nonsize(mnt_userns, dentry, ip, iattr);
}
/*
@@ -987,7 +989,7 @@ xfs_vn_setattr_size(
error = xfs_vn_change_ok(mnt_userns, dentry, iattr);
if (error)
return error;
- return xfs_setattr_size(mnt_userns, ip, iattr);
+ return xfs_setattr_size(mnt_userns, dentry, ip, iattr);
}
STATIC int
@@ -1019,7 +1021,7 @@ xfs_vn_setattr(
error = xfs_vn_change_ok(mnt_userns, dentry, iattr);
if (!error)
- error = xfs_setattr_nonsize(mnt_userns, ip, iattr);
+ error = xfs_setattr_nonsize(mnt_userns, dentry, ip, iattr);
}
return error;
@@ -1101,7 +1103,7 @@ xfs_vn_tmpfile(
}
static const struct inode_operations xfs_inode_operations = {
- .get_acl = xfs_get_acl,
+ .get_inode_acl = xfs_get_acl,
.set_acl = xfs_set_acl,
.getattr = xfs_vn_getattr,
.setattr = xfs_vn_setattr,
@@ -1128,7 +1130,7 @@ static const struct inode_operations xfs_dir_inode_operations = {
.rmdir = xfs_vn_unlink,
.mknod = xfs_vn_mknod,
.rename = xfs_vn_rename,
- .get_acl = xfs_get_acl,
+ .get_inode_acl = xfs_get_acl,
.set_acl = xfs_set_acl,
.getattr = xfs_vn_getattr,
.setattr = xfs_vn_setattr,
@@ -1155,7 +1157,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = {
.rmdir = xfs_vn_unlink,
.mknod = xfs_vn_mknod,
.rename = xfs_vn_rename,
- .get_acl = xfs_get_acl,
+ .get_inode_acl = xfs_get_acl,
.set_acl = xfs_set_acl,
.getattr = xfs_vn_getattr,
.setattr = xfs_vn_setattr,
@@ -1185,10 +1187,6 @@ xfs_inode_supports_dax(
if (!S_ISREG(VFS_I(ip)->i_mode))
return false;
- /* Only supported on non-reflinked files. */
- if (xfs_is_reflink_inode(ip))
- return false;
-
/* Block size must match page size */
if (mp->m_sb.sb_blocksize != PAGE_SIZE)
return false;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index f02a0dd522b3..fc61cc024023 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -644,12 +644,14 @@ xfs_log_mount(
int min_logfsbs;
if (!xfs_has_norecovery(mp)) {
- xfs_notice(mp, "Mounting V%d Filesystem",
- XFS_SB_VERSION_NUM(&mp->m_sb));
+ xfs_notice(mp, "Mounting V%d Filesystem %pU",
+ XFS_SB_VERSION_NUM(&mp->m_sb),
+ &mp->m_sb.sb_uuid);
} else {
xfs_notice(mp,
-"Mounting V%d filesystem in no-recovery mode. Filesystem will be inconsistent.",
- XFS_SB_VERSION_NUM(&mp->m_sb));
+"Mounting V%d filesystem %pU in no-recovery mode. Filesystem will be inconsistent.",
+ XFS_SB_VERSION_NUM(&mp->m_sb),
+ &mp->m_sb.sb_uuid);
ASSERT(xfs_is_readonly(mp));
}
@@ -887,6 +889,23 @@ xlog_force_iclog(
}
/*
+ * Cycle all the iclogbuf locks to make sure all log IO completion
+ * is done before we tear down these buffers.
+ */
+static void
+xlog_wait_iclog_completion(struct xlog *log)
+{
+ int i;
+ struct xlog_in_core *iclog = log->l_iclog;
+
+ for (i = 0; i < log->l_iclog_bufs; i++) {
+ down(&iclog->ic_sema);
+ up(&iclog->ic_sema);
+ iclog = iclog->ic_next;
+ }
+}
+
+/*
* Wait for the iclog and all prior iclogs to be written disk as required by the
* log force state machine. Waiting on ic_force_wait ensures iclog completions
* have been ordered and callbacks run before we are woken here, hence
@@ -1111,6 +1130,14 @@ xfs_log_unmount(
{
xfs_log_clean(mp);
+ /*
+ * If shutdown has come from iclog IO context, the log
+ * cleaning will have been skipped and so we need to wait
+ * for the iclog to complete shutdown processing before we
+ * tear anything down.
+ */
+ xlog_wait_iclog_completion(mp->m_log);
+
xfs_buftarg_drain(mp->m_ddev_targp);
xfs_trans_ail_destroy(mp);
@@ -2114,17 +2141,6 @@ xlog_dealloc_log(
int i;
/*
- * Cycle all the iclogbuf locks to make sure all log IO completion
- * is done before we tear down these buffers.
- */
- iclog = log->l_iclog;
- for (i = 0; i < log->l_iclog_bufs; i++) {
- down(&iclog->ic_sema);
- up(&iclog->ic_sema);
- iclog = iclog->ic_next;
- }
-
- /*
* Destroy the CIL after waiting for iclog IO completion because an
* iclog EIO error will try to shut down the log, which accesses the
* CIL to wake up the waiters.
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 17e923b9c5fa..322eb2ee6c55 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2552,6 +2552,8 @@ xlog_recover_process_intents(
for (lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
lip != NULL;
lip = xfs_trans_ail_cursor_next(ailp, &cur)) {
+ const struct xfs_item_ops *ops;
+
if (!xlog_item_is_intent(lip))
break;
@@ -2567,13 +2569,17 @@ xlog_recover_process_intents(
* deferred ops, you /must/ attach them to the capture list in
* the recover routine or else those subsequent intents will be
* replayed in the wrong order!
+ *
+ * The recovery function can free the log item, so we must not
+ * access lip after it returns.
*/
spin_unlock(&ailp->ail_lock);
- error = lip->li_ops->iop_recover(lip, &capture_list);
+ ops = lip->li_ops;
+ error = ops->iop_recover(lip, &capture_list);
spin_lock(&ailp->ail_lock);
if (error) {
trace_xlog_intent_recovery_failed(log->l_mp, error,
- lip->li_ops->iop_recover);
+ ops->iop_recover);
break;
}
}
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index e8bb3c2e847e..fb87ffb48f7f 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -538,6 +538,20 @@ xfs_check_summary_counts(
return 0;
}
+static void
+xfs_unmount_check(
+ struct xfs_mount *mp)
+{
+ if (xfs_is_shutdown(mp))
+ return;
+
+ if (percpu_counter_sum(&mp->m_ifree) >
+ percpu_counter_sum(&mp->m_icount)) {
+ xfs_alert(mp, "ifree/icount mismatch at unmount");
+ xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS);
+ }
+}
+
/*
* Flush and reclaim dirty inodes in preparation for unmount. Inodes and
* internal inode structures can be sitting in the CIL and AIL at this point,
@@ -1077,6 +1091,7 @@ xfs_unmountfs(
if (error)
xfs_warn(mp, "Unable to free reserved block pool. "
"Freespace may not be correct on next mount.");
+ xfs_unmount_check(mp);
xfs_log_unmount(mp);
xfs_da_unmount(mp);
diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h
index 758702b9495f..9737b5a9f405 100644
--- a/fs/xfs/xfs_ondisk.h
+++ b/fs/xfs/xfs_ondisk.h
@@ -118,10 +118,10 @@ xfs_check_ondisk_structs(void)
/* log structures */
XFS_CHECK_STRUCT_SIZE(struct xfs_buf_log_format, 88);
XFS_CHECK_STRUCT_SIZE(struct xfs_dq_logformat, 24);
- XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_32, 28);
- XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_64, 32);
- XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_32, 28);
- XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_64, 32);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_32, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_64, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_32, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_64, 16);
XFS_CHECK_STRUCT_SIZE(struct xfs_extent_32, 12);
XFS_CHECK_STRUCT_SIZE(struct xfs_extent_64, 16);
XFS_CHECK_STRUCT_SIZE(struct xfs_log_dinode, 176);
@@ -134,6 +134,21 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_STRUCT_SIZE(struct xfs_trans_header, 16);
XFS_CHECK_STRUCT_SIZE(struct xfs_attri_log_format, 40);
XFS_CHECK_STRUCT_SIZE(struct xfs_attrd_log_format, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_bui_log_format, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_bud_log_format, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_cui_log_format, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_cud_log_format, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_rui_log_format, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_rud_log_format, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_map_extent, 32);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_phys_extent, 16);
+
+ XFS_CHECK_OFFSET(struct xfs_bui_log_format, bui_extents, 16);
+ XFS_CHECK_OFFSET(struct xfs_cui_log_format, cui_extents, 16);
+ XFS_CHECK_OFFSET(struct xfs_rui_log_format, rui_extents, 16);
+ XFS_CHECK_OFFSET(struct xfs_efi_log_format, efi_extents, 16);
+ XFS_CHECK_OFFSET(struct xfs_efi_log_format_32, efi_extents, 16);
+ XFS_CHECK_OFFSET(struct xfs_efi_log_format_64, efi_extents, 16);
/*
* The v5 superblock format extended several v4 header structures with
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index 37a24f0f7cd4..38d23f0e703a 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -125,6 +125,7 @@ xfs_fs_map_blocks(
int nimaps = 1;
uint lock_flags;
int error = 0;
+ u64 seq;
if (xfs_is_shutdown(mp))
return -EIO;
@@ -176,6 +177,7 @@ xfs_fs_map_blocks(
lock_flags = xfs_ilock_data_map_shared(ip);
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
&imap, &nimaps, bmapi_flags);
+ seq = xfs_iomap_inode_sequence(ip, 0);
ASSERT(!nimaps || imap.br_startblock != DELAYSTARTBLOCK);
@@ -189,7 +191,7 @@ xfs_fs_map_blocks(
xfs_iunlock(ip, lock_flags);
error = xfs_iomap_write_direct(ip, offset_fsb,
- end_fsb - offset_fsb, 0, &imap);
+ end_fsb - offset_fsb, 0, &imap, &seq);
if (error)
goto out_unlock;
@@ -209,7 +211,7 @@ xfs_fs_map_blocks(
}
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
- error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0, 0);
+ error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0, 0, seq);
*device_generation = mp->m_generation;
return error;
out_unlock:
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 18bb4ec4d7c9..ff53d40a2dae 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -423,6 +423,14 @@ xfs_qm_dquot_isolate(
goto out_miss_busy;
/*
+ * If something else is freeing this dquot and hasn't yet removed it
+ * from the LRU, leave it for the freeing task to complete the freeing
+ * process rather than risk it being free from under us here.
+ */
+ if (dqp->q_flags & XFS_DQFLAG_FREEING)
+ goto out_miss_unlock;
+
+ /*
* This dquot has acquired a reference in the meantime remove it from
* the freelist and try again.
*/
@@ -441,10 +449,8 @@ xfs_qm_dquot_isolate(
* skip it so there is time for the IO to complete before we try to
* reclaim it again on the next LRU pass.
*/
- if (!xfs_dqflock_nowait(dqp)) {
- xfs_dqunlock(dqp);
- goto out_miss_busy;
- }
+ if (!xfs_dqflock_nowait(dqp))
+ goto out_miss_unlock;
if (XFS_DQ_IS_DIRTY(dqp)) {
struct xfs_buf *bp = NULL;
@@ -478,6 +484,8 @@ xfs_qm_dquot_isolate(
XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaims);
return LRU_REMOVED;
+out_miss_unlock:
+ xfs_dqunlock(dqp);
out_miss_busy:
trace_xfs_dqreclaim_busy(dqp);
XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses);
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index 7e97bf19793d..858e3e9eb4a8 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -523,7 +523,9 @@ xfs_cui_item_recover(
type = refc_type;
break;
default:
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ &cuip->cui_format,
+ sizeof(cuip->cui_format));
error = -EFSCORRUPTED;
goto abort_error;
}
@@ -536,7 +538,8 @@ xfs_cui_item_recover(
&new_fsb, &new_len, &rcur);
if (error == -EFSCORRUPTED)
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- refc, sizeof(*refc));
+ &cuip->cui_format,
+ sizeof(cuip->cui_format));
if (error)
goto abort_error;
@@ -622,28 +625,18 @@ static const struct xfs_item_ops xfs_cui_item_ops = {
.iop_relog = xfs_cui_item_relog,
};
-/*
- * Copy an CUI format buffer from the given buf, and into the destination
- * CUI format structure. The CUI/CUD items were designed not to need any
- * special alignment handling.
- */
-static int
+static inline void
xfs_cui_copy_format(
- struct xfs_log_iovec *buf,
- struct xfs_cui_log_format *dst_cui_fmt)
+ struct xfs_cui_log_format *dst,
+ const struct xfs_cui_log_format *src)
{
- struct xfs_cui_log_format *src_cui_fmt;
- uint len;
+ unsigned int i;
- src_cui_fmt = buf->i_addr;
- len = xfs_cui_log_format_sizeof(src_cui_fmt->cui_nextents);
+ memcpy(dst, src, offsetof(struct xfs_cui_log_format, cui_extents));
- if (buf->i_len == len) {
- memcpy(dst_cui_fmt, src_cui_fmt, len);
- return 0;
- }
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
- return -EFSCORRUPTED;
+ for (i = 0; i < src->cui_nextents; i++)
+ memcpy(&dst->cui_extents[i], &src->cui_extents[i],
+ sizeof(struct xfs_phys_extent));
}
/*
@@ -660,19 +653,28 @@ xlog_recover_cui_commit_pass2(
struct xlog_recover_item *item,
xfs_lsn_t lsn)
{
- int error;
struct xfs_mount *mp = log->l_mp;
struct xfs_cui_log_item *cuip;
struct xfs_cui_log_format *cui_formatp;
+ size_t len;
cui_formatp = item->ri_buf[0].i_addr;
- cuip = xfs_cui_init(mp, cui_formatp->cui_nextents);
- error = xfs_cui_copy_format(&item->ri_buf[0], &cuip->cui_format);
- if (error) {
- xfs_cui_item_free(cuip);
- return error;
+ if (item->ri_buf[0].i_len < xfs_cui_log_format_sizeof(0)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
}
+
+ len = xfs_cui_log_format_sizeof(cui_formatp->cui_nextents);
+ if (item->ri_buf[0].i_len != len) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
+ }
+
+ cuip = xfs_cui_init(mp, cui_formatp->cui_nextents);
+ xfs_cui_copy_format(&cuip->cui_format, cui_formatp);
atomic_set(&cuip->cui_next_extent, cui_formatp->cui_nextents);
/*
* Insert the intent into the AIL directly and drop one reference so
@@ -706,7 +708,8 @@ xlog_recover_cud_commit_pass2(
cud_formatp = item->ri_buf[0].i_addr;
if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format)) {
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
return -EFSCORRUPTED;
}
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 93bdd25680bc..fe46bce8cae6 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1693,8 +1693,12 @@ xfs_reflink_unshare(
inode_dio_wait(inode);
- error = iomap_file_unshare(inode, offset, len,
- &xfs_buffered_write_iomap_ops);
+ if (IS_DAX(inode))
+ error = dax_file_unshare(inode, offset, len,
+ &xfs_dax_write_iomap_ops);
+ else
+ error = iomap_file_unshare(inode, offset, len,
+ &xfs_buffered_write_iomap_ops);
if (error)
goto out;
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index fef92e02f3bb..534504ede1a3 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -155,31 +155,6 @@ xfs_rui_init(
return ruip;
}
-/*
- * Copy an RUI format buffer from the given buf, and into the destination
- * RUI format structure. The RUI/RUD items were designed not to need any
- * special alignment handling.
- */
-STATIC int
-xfs_rui_copy_format(
- struct xfs_log_iovec *buf,
- struct xfs_rui_log_format *dst_rui_fmt)
-{
- struct xfs_rui_log_format *src_rui_fmt;
- uint len;
-
- src_rui_fmt = buf->i_addr;
- len = xfs_rui_log_format_sizeof(src_rui_fmt->rui_nextents);
-
- if (buf->i_len != len) {
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
- return -EFSCORRUPTED;
- }
-
- memcpy(dst_rui_fmt, src_rui_fmt, len);
- return 0;
-}
-
static inline struct xfs_rud_log_item *RUD_ITEM(struct xfs_log_item *lip)
{
return container_of(lip, struct xfs_rud_log_item, rud_item);
@@ -582,7 +557,9 @@ xfs_rui_item_recover(
type = XFS_RMAP_FREE;
break;
default:
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ &ruip->rui_format,
+ sizeof(ruip->rui_format));
error = -EFSCORRUPTED;
goto abort_error;
}
@@ -652,6 +629,20 @@ static const struct xfs_item_ops xfs_rui_item_ops = {
.iop_relog = xfs_rui_item_relog,
};
+static inline void
+xfs_rui_copy_format(
+ struct xfs_rui_log_format *dst,
+ const struct xfs_rui_log_format *src)
+{
+ unsigned int i;
+
+ memcpy(dst, src, offsetof(struct xfs_rui_log_format, rui_extents));
+
+ for (i = 0; i < src->rui_nextents; i++)
+ memcpy(&dst->rui_extents[i], &src->rui_extents[i],
+ sizeof(struct xfs_map_extent));
+}
+
/*
* This routine is called to create an in-core extent rmap update
* item from the rui format structure which was logged on disk.
@@ -666,19 +657,28 @@ xlog_recover_rui_commit_pass2(
struct xlog_recover_item *item,
xfs_lsn_t lsn)
{
- int error;
struct xfs_mount *mp = log->l_mp;
struct xfs_rui_log_item *ruip;
struct xfs_rui_log_format *rui_formatp;
+ size_t len;
rui_formatp = item->ri_buf[0].i_addr;
- ruip = xfs_rui_init(mp, rui_formatp->rui_nextents);
- error = xfs_rui_copy_format(&item->ri_buf[0], &ruip->rui_format);
- if (error) {
- xfs_rui_item_free(ruip);
- return error;
+ if (item->ri_buf[0].i_len < xfs_rui_log_format_sizeof(0)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
+ }
+
+ len = xfs_rui_log_format_sizeof(rui_formatp->rui_nextents);
+ if (item->ri_buf[0].i_len != len) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
}
+
+ ruip = xfs_rui_init(mp, rui_formatp->rui_nextents);
+ xfs_rui_copy_format(&ruip->rui_format, rui_formatp);
atomic_set(&ruip->rui_next_extent, rui_formatp->rui_nextents);
/*
* Insert the intent into the AIL directly and drop one reference so
@@ -711,7 +711,11 @@ xlog_recover_rud_commit_pass2(
struct xfs_rud_log_format *rud_formatp;
rud_formatp = item->ri_buf[0].i_addr;
- ASSERT(item->ri_buf[0].i_len == sizeof(struct xfs_rud_log_format));
+ if (item->ri_buf[0].i_len != sizeof(struct xfs_rud_log_format)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
+ rud_formatp, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
+ }
xlog_recover_release_intent(log, XFS_LI_RUI, rud_formatp->rud_rui_id);
return 0;
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 292d5e54a92c..16534e9873f6 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1311,10 +1311,10 @@ xfs_rtalloc_reinit_frextents(
uint64_t val = 0;
int error;
- xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
+ xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
error = xfs_rtalloc_query_all(mp, NULL, xfs_rtalloc_count_frextent,
&val);
- xfs_iunlock(mp->m_rbmip, XFS_ILOCK_EXCL);
+ xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
if (error)
return error;
@@ -1326,6 +1326,41 @@ xfs_rtalloc_reinit_frextents(
}
/*
+ * Read in the bmbt of an rt metadata inode so that we never have to load them
+ * at runtime. This enables the use of shared ILOCKs for rtbitmap scans. Use
+ * an empty transaction to avoid deadlocking on loops in the bmbt.
+ */
+static inline int
+xfs_rtmount_iread_extents(
+ struct xfs_inode *ip,
+ unsigned int lock_class)
+{
+ struct xfs_trans *tp;
+ int error;
+
+ error = xfs_trans_alloc_empty(ip->i_mount, &tp);
+ if (error)
+ return error;
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL | lock_class);
+
+ error = xfs_iread_extents(tp, ip, XFS_DATA_FORK);
+ if (error)
+ goto out_unlock;
+
+ if (xfs_inode_has_attr_fork(ip)) {
+ error = xfs_iread_extents(tp, ip, XFS_ATTR_FORK);
+ if (error)
+ goto out_unlock;
+ }
+
+out_unlock:
+ xfs_iunlock(ip, XFS_ILOCK_EXCL | lock_class);
+ xfs_trans_cancel(tp);
+ return error;
+}
+
+/*
* Get the bitmap and summary inodes and the summary cache into the mount
* structure at mount time.
*/
@@ -1342,14 +1377,27 @@ xfs_rtmount_inodes(
return error;
ASSERT(mp->m_rbmip != NULL);
+ error = xfs_rtmount_iread_extents(mp->m_rbmip, XFS_ILOCK_RTBITMAP);
+ if (error)
+ goto out_rele_bitmap;
+
error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip);
- if (error) {
- xfs_irele(mp->m_rbmip);
- return error;
- }
+ if (error)
+ goto out_rele_bitmap;
ASSERT(mp->m_rsumip != NULL);
+
+ error = xfs_rtmount_iread_extents(mp->m_rsumip, XFS_ILOCK_RTSUM);
+ if (error)
+ goto out_rele_summary;
+
xfs_alloc_rsum_cache(mp, sbp->sb_rbmblocks);
return 0;
+
+out_rele_summary:
+ xfs_irele(mp->m_rsumip);
+out_rele_bitmap:
+ xfs_irele(mp->m_rbmip);
+ return error;
}
void
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index f029c6702dda..0c4b73e9b29d 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1110,7 +1110,7 @@ xfs_fs_put_super(
if (!sb->s_fs_info)
return;
- xfs_notice(mp, "Unmounting Filesystem");
+ xfs_notice(mp, "Unmounting Filesystem %pU", &mp->m_sb.sb_uuid);
xfs_filestream_unmount(mp);
xfs_unmountfs(mp);
@@ -2028,18 +2028,14 @@ xfs_init_caches(void)
goto out_destroy_trans_cache;
xfs_efd_cache = kmem_cache_create("xfs_efd_item",
- (sizeof(struct xfs_efd_log_item) +
- (XFS_EFD_MAX_FAST_EXTENTS - 1) *
- sizeof(struct xfs_extent)),
- 0, 0, NULL);
+ xfs_efd_log_item_sizeof(XFS_EFD_MAX_FAST_EXTENTS),
+ 0, 0, NULL);
if (!xfs_efd_cache)
goto out_destroy_buf_item_cache;
xfs_efi_cache = kmem_cache_create("xfs_efi_item",
- (sizeof(struct xfs_efi_log_item) +
- (XFS_EFI_MAX_FAST_EXTENTS - 1) *
- sizeof(struct xfs_extent)),
- 0, 0, NULL);
+ xfs_efi_log_item_sizeof(XFS_EFI_MAX_FAST_EXTENTS),
+ 0, 0, NULL);
if (!xfs_efi_cache)
goto out_destroy_efd_cache;
diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h
index 43585850f154..513095e353a5 100644
--- a/fs/xfs/xfs_sysfs.h
+++ b/fs/xfs/xfs_sysfs.h
@@ -33,10 +33,15 @@ xfs_sysfs_init(
const char *name)
{
struct kobject *parent;
+ int err;
parent = parent_kobj ? &parent_kobj->kobject : NULL;
init_completion(&kobj->complete);
- return kobject_init_and_add(&kobj->kobject, ktype, parent, "%s", name);
+ err = kobject_init_and_add(&kobj->kobject, ktype, parent, "%s", name);
+ if (err)
+ kobject_put(&kobj->kobject);
+
+ return err;
}
static inline void
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c
index d269ef57ff01..8a5dc1538aa8 100644
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -34,6 +34,8 @@
#include "xfs_ag.h"
#include "xfs_ag_resv.h"
#include "xfs_error.h"
+#include <linux/iomap.h>
+#include "xfs_iomap.h"
/*
* We include this last to have the helpers above available for the trace
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index cb7c81ba7fa3..421d1e504ac4 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -799,6 +799,9 @@ TRACE_DEFINE_ENUM(PE_SIZE_PTE);
TRACE_DEFINE_ENUM(PE_SIZE_PMD);
TRACE_DEFINE_ENUM(PE_SIZE_PUD);
+TRACE_DEFINE_ENUM(XFS_REFC_DOMAIN_SHARED);
+TRACE_DEFINE_ENUM(XFS_REFC_DOMAIN_COW);
+
TRACE_EVENT(xfs_filemap_fault,
TP_PROTO(struct xfs_inode *ip, enum page_entry_size pe_size,
bool write_fault),
@@ -2925,6 +2928,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_class,
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
+ __field(enum xfs_refc_domain, domain)
__field(xfs_agblock_t, startblock)
__field(xfs_extlen_t, blockcount)
__field(xfs_nlink_t, refcount)
@@ -2932,13 +2936,15 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_class,
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->agno = agno;
+ __entry->domain = irec->rc_domain;
__entry->startblock = irec->rc_startblock;
__entry->blockcount = irec->rc_blockcount;
__entry->refcount = irec->rc_refcount;
),
- TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u",
+ TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
+ __print_symbolic(__entry->domain, XFS_REFC_DOMAIN_STRINGS),
__entry->startblock,
__entry->blockcount,
__entry->refcount)
@@ -2958,6 +2964,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_at_class,
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
+ __field(enum xfs_refc_domain, domain)
__field(xfs_agblock_t, startblock)
__field(xfs_extlen_t, blockcount)
__field(xfs_nlink_t, refcount)
@@ -2966,14 +2973,16 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_at_class,
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->agno = agno;
+ __entry->domain = irec->rc_domain;
__entry->startblock = irec->rc_startblock;
__entry->blockcount = irec->rc_blockcount;
__entry->refcount = irec->rc_refcount;
__entry->agbno = agbno;
),
- TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u @ agbno 0x%x",
+ TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u @ agbno 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
+ __print_symbolic(__entry->domain, XFS_REFC_DOMAIN_STRINGS),
__entry->startblock,
__entry->blockcount,
__entry->refcount,
@@ -2994,9 +3003,11 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_class,
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
+ __field(enum xfs_refc_domain, i1_domain)
__field(xfs_agblock_t, i1_startblock)
__field(xfs_extlen_t, i1_blockcount)
__field(xfs_nlink_t, i1_refcount)
+ __field(enum xfs_refc_domain, i2_domain)
__field(xfs_agblock_t, i2_startblock)
__field(xfs_extlen_t, i2_blockcount)
__field(xfs_nlink_t, i2_refcount)
@@ -3004,20 +3015,24 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_class,
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->agno = agno;
+ __entry->i1_domain = i1->rc_domain;
__entry->i1_startblock = i1->rc_startblock;
__entry->i1_blockcount = i1->rc_blockcount;
__entry->i1_refcount = i1->rc_refcount;
+ __entry->i2_domain = i2->rc_domain;
__entry->i2_startblock = i2->rc_startblock;
__entry->i2_blockcount = i2->rc_blockcount;
__entry->i2_refcount = i2->rc_refcount;
),
- TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u -- "
- "agbno 0x%x fsbcount 0x%x refcount %u",
+ TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u -- "
+ "dom %s agbno 0x%x fsbcount 0x%x refcount %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
+ __print_symbolic(__entry->i1_domain, XFS_REFC_DOMAIN_STRINGS),
__entry->i1_startblock,
__entry->i1_blockcount,
__entry->i1_refcount,
+ __print_symbolic(__entry->i2_domain, XFS_REFC_DOMAIN_STRINGS),
__entry->i2_startblock,
__entry->i2_blockcount,
__entry->i2_refcount)
@@ -3038,9 +3053,11 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_at_class,
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
+ __field(enum xfs_refc_domain, i1_domain)
__field(xfs_agblock_t, i1_startblock)
__field(xfs_extlen_t, i1_blockcount)
__field(xfs_nlink_t, i1_refcount)
+ __field(enum xfs_refc_domain, i2_domain)
__field(xfs_agblock_t, i2_startblock)
__field(xfs_extlen_t, i2_blockcount)
__field(xfs_nlink_t, i2_refcount)
@@ -3049,21 +3066,25 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_at_class,
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->agno = agno;
+ __entry->i1_domain = i1->rc_domain;
__entry->i1_startblock = i1->rc_startblock;
__entry->i1_blockcount = i1->rc_blockcount;
__entry->i1_refcount = i1->rc_refcount;
+ __entry->i2_domain = i2->rc_domain;
__entry->i2_startblock = i2->rc_startblock;
__entry->i2_blockcount = i2->rc_blockcount;
__entry->i2_refcount = i2->rc_refcount;
__entry->agbno = agbno;
),
- TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u -- "
- "agbno 0x%x fsbcount 0x%x refcount %u @ agbno 0x%x",
+ TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u -- "
+ "dom %s agbno 0x%x fsbcount 0x%x refcount %u @ agbno 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
+ __print_symbolic(__entry->i1_domain, XFS_REFC_DOMAIN_STRINGS),
__entry->i1_startblock,
__entry->i1_blockcount,
__entry->i1_refcount,
+ __print_symbolic(__entry->i2_domain, XFS_REFC_DOMAIN_STRINGS),
__entry->i2_startblock,
__entry->i2_blockcount,
__entry->i2_refcount,
@@ -3086,12 +3107,15 @@ DECLARE_EVENT_CLASS(xfs_refcount_triple_extent_class,
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
+ __field(enum xfs_refc_domain, i1_domain)
__field(xfs_agblock_t, i1_startblock)
__field(xfs_extlen_t, i1_blockcount)
__field(xfs_nlink_t, i1_refcount)
+ __field(enum xfs_refc_domain, i2_domain)
__field(xfs_agblock_t, i2_startblock)
__field(xfs_extlen_t, i2_blockcount)
__field(xfs_nlink_t, i2_refcount)
+ __field(enum xfs_refc_domain, i3_domain)
__field(xfs_agblock_t, i3_startblock)
__field(xfs_extlen_t, i3_blockcount)
__field(xfs_nlink_t, i3_refcount)
@@ -3099,27 +3123,33 @@ DECLARE_EVENT_CLASS(xfs_refcount_triple_extent_class,
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->agno = agno;
+ __entry->i1_domain = i1->rc_domain;
__entry->i1_startblock = i1->rc_startblock;
__entry->i1_blockcount = i1->rc_blockcount;
__entry->i1_refcount = i1->rc_refcount;
+ __entry->i2_domain = i2->rc_domain;
__entry->i2_startblock = i2->rc_startblock;
__entry->i2_blockcount = i2->rc_blockcount;
__entry->i2_refcount = i2->rc_refcount;
+ __entry->i3_domain = i3->rc_domain;
__entry->i3_startblock = i3->rc_startblock;
__entry->i3_blockcount = i3->rc_blockcount;
__entry->i3_refcount = i3->rc_refcount;
),
- TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u -- "
- "agbno 0x%x fsbcount 0x%x refcount %u -- "
- "agbno 0x%x fsbcount 0x%x refcount %u",
+ TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u -- "
+ "dom %s agbno 0x%x fsbcount 0x%x refcount %u -- "
+ "dom %s agbno 0x%x fsbcount 0x%x refcount %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
+ __print_symbolic(__entry->i1_domain, XFS_REFC_DOMAIN_STRINGS),
__entry->i1_startblock,
__entry->i1_blockcount,
__entry->i1_refcount,
+ __print_symbolic(__entry->i2_domain, XFS_REFC_DOMAIN_STRINGS),
__entry->i2_startblock,
__entry->i2_blockcount,
__entry->i2_refcount,
+ __print_symbolic(__entry->i3_domain, XFS_REFC_DOMAIN_STRINGS),
__entry->i3_startblock,
__entry->i3_blockcount,
__entry->i3_refcount)
@@ -3322,6 +3352,92 @@ DEFINE_EVENT(xfs_inode_irec_class, name, \
TP_PROTO(struct xfs_inode *ip, struct xfs_bmbt_irec *irec), \
TP_ARGS(ip, irec))
+/* inode iomap invalidation events */
+DECLARE_EVENT_CLASS(xfs_wb_invalid_class,
+ TP_PROTO(struct xfs_inode *ip, const struct iomap *iomap, unsigned int wpcseq, int whichfork),
+ TP_ARGS(ip, iomap, wpcseq, whichfork),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(u64, addr)
+ __field(loff_t, pos)
+ __field(u64, len)
+ __field(u16, type)
+ __field(u16, flags)
+ __field(u32, wpcseq)
+ __field(u32, forkseq)
+ ),
+ TP_fast_assign(
+ __entry->dev = VFS_I(ip)->i_sb->s_dev;
+ __entry->ino = ip->i_ino;
+ __entry->addr = iomap->addr;
+ __entry->pos = iomap->offset;
+ __entry->len = iomap->length;
+ __entry->type = iomap->type;
+ __entry->flags = iomap->flags;
+ __entry->wpcseq = wpcseq;
+ __entry->forkseq = READ_ONCE(xfs_ifork_ptr(ip, whichfork)->if_seq);
+ ),
+ TP_printk("dev %d:%d ino 0x%llx pos 0x%llx addr 0x%llx bytecount 0x%llx type 0x%x flags 0x%x wpcseq 0x%x forkseq 0x%x",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __entry->pos,
+ __entry->addr,
+ __entry->len,
+ __entry->type,
+ __entry->flags,
+ __entry->wpcseq,
+ __entry->forkseq)
+);
+#define DEFINE_WB_INVALID_EVENT(name) \
+DEFINE_EVENT(xfs_wb_invalid_class, name, \
+ TP_PROTO(struct xfs_inode *ip, const struct iomap *iomap, unsigned int wpcseq, int whichfork), \
+ TP_ARGS(ip, iomap, wpcseq, whichfork))
+DEFINE_WB_INVALID_EVENT(xfs_wb_cow_iomap_invalid);
+DEFINE_WB_INVALID_EVENT(xfs_wb_data_iomap_invalid);
+
+DECLARE_EVENT_CLASS(xfs_iomap_invalid_class,
+ TP_PROTO(struct xfs_inode *ip, const struct iomap *iomap),
+ TP_ARGS(ip, iomap),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(u64, addr)
+ __field(loff_t, pos)
+ __field(u64, len)
+ __field(u64, validity_cookie)
+ __field(u64, inodeseq)
+ __field(u16, type)
+ __field(u16, flags)
+ ),
+ TP_fast_assign(
+ __entry->dev = VFS_I(ip)->i_sb->s_dev;
+ __entry->ino = ip->i_ino;
+ __entry->addr = iomap->addr;
+ __entry->pos = iomap->offset;
+ __entry->len = iomap->length;
+ __entry->validity_cookie = iomap->validity_cookie;
+ __entry->type = iomap->type;
+ __entry->flags = iomap->flags;
+ __entry->inodeseq = xfs_iomap_inode_sequence(ip, iomap->flags);
+ ),
+ TP_printk("dev %d:%d ino 0x%llx pos 0x%llx addr 0x%llx bytecount 0x%llx type 0x%x flags 0x%x validity_cookie 0x%llx inodeseq 0x%llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __entry->pos,
+ __entry->addr,
+ __entry->len,
+ __entry->type,
+ __entry->flags,
+ __entry->validity_cookie,
+ __entry->inodeseq)
+);
+#define DEFINE_IOMAP_INVALID_EVENT(name) \
+DEFINE_EVENT(xfs_iomap_invalid_class, name, \
+ TP_PROTO(struct xfs_inode *ip, const struct iomap *iomap), \
+ TP_ARGS(ip, iomap))
+DEFINE_IOMAP_INVALID_EVENT(xfs_iomap_invalid);
+
/* refcount/reflink tracepoint definitions */
/* reflink tracepoints */
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 16fbf2a1144c..7d4109af193e 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -422,7 +422,7 @@ xfsaild_push(
struct xfs_ail_cursor cur;
struct xfs_log_item *lip;
xfs_lsn_t lsn;
- xfs_lsn_t target;
+ xfs_lsn_t target = NULLCOMMITLSN;
long tout;
int stuck = 0;
int flushing = 0;
@@ -472,6 +472,8 @@ xfsaild_push(
XFS_STATS_INC(mp, xs_push_ail);
+ ASSERT(target != NULLCOMMITLSN);
+
lsn = lip->li_lsn;
while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) {
int lock_result;
@@ -730,11 +732,10 @@ void
xfs_ail_push_all_sync(
struct xfs_ail *ailp)
{
- struct xfs_log_item *lip;
DEFINE_WAIT(wait);
spin_lock(&ailp->ail_lock);
- while ((lip = xfs_ail_max(ailp)) != NULL) {
+ while (xfs_ail_max(ailp) != NULL) {
prepare_to_wait(&ailp->ail_empty, &wait, TASK_UNINTERRUPTIBLE);
wake_up_process(ailp->ail_task);
spin_unlock(&ailp->ail_lock);
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index c325a28b89a8..10aa1fd39d2b 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -210,7 +210,7 @@ __xfs_xattr_put_listent(
return;
}
offset = context->buffer + context->count;
- strncpy(offset, prefix, prefix_len);
+ memcpy(offset, prefix, prefix_len);
offset += prefix_len;
strncpy(offset, (char *)name, namelen); /* real name */
offset += namelen;