diff options
Diffstat (limited to 'fs/xfs')
73 files changed, 1701 insertions, 714 deletions
diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h index 517a138faa66..191b22b9a35b 100644 --- a/fs/xfs/libxfs/xfs_ag.h +++ b/fs/xfs/libxfs/xfs_ag.h @@ -133,6 +133,21 @@ xfs_verify_agbno(struct xfs_perag *pag, xfs_agblock_t agbno) return true; } +static inline bool +xfs_verify_agbext( + struct xfs_perag *pag, + xfs_agblock_t agbno, + xfs_agblock_t len) +{ + if (agbno + len <= agbno) + return false; + + if (!xfs_verify_agbno(pag, agbno)) + return false; + + return xfs_verify_agbno(pag, agbno + len - 1); +} + /* * Verify that an AG inode number pointer neither points outside the AG * nor points at static metadata. diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 6261599bb389..989cf341779b 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -263,11 +263,7 @@ xfs_alloc_get_rec( goto out_bad_rec; /* check for valid extent range, including overflow */ - if (!xfs_verify_agbno(pag, *bno)) - goto out_bad_rec; - if (*bno > *bno + *len) - goto out_bad_rec; - if (!xfs_verify_agbno(pag, *bno + *len - 1)) + if (!xfs_verify_agbext(pag, *bno, *len)) goto out_bad_rec; return 0; @@ -1520,7 +1516,7 @@ xfs_alloc_ag_vextent_lastblock( #ifdef DEBUG /* Randomly don't execute the first algorithm. */ - if (prandom_u32_max(2)) + if (get_random_u32_below(2)) return 0; #endif diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 49d0d4ea63fc..0d56a8d862e8 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -4058,7 +4058,7 @@ xfs_bmap_alloc_userdata( * the busy list. */ bma->datatype = XFS_ALLOC_NOBUSY; - if (whichfork == XFS_DATA_FORK) { + if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) { bma->datatype |= XFS_ALLOC_USERDATA; if (bma->offset == 0) bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA; @@ -4551,7 +4551,8 @@ xfs_bmapi_convert_delalloc( * the extent. Just return the real extent at this offset. */ if (!isnullstartblock(bma.got.br_startblock)) { - xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags); + xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags, + xfs_iomap_inode_sequence(ip, flags)); *seq = READ_ONCE(ifp->if_seq); goto out_trans_cancel; } @@ -4599,7 +4600,8 @@ xfs_bmapi_convert_delalloc( XFS_STATS_INC(mp, xs_xstrat_quick); ASSERT(!isnullstartblock(bma.got.br_startblock)); - xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags); + xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags, + xfs_iomap_inode_sequence(ip, flags)); *seq = READ_ONCE(ifp->if_seq); if (whichfork == XFS_COW_FORK) diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index eef27858a013..29c4b4ccb909 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -556,7 +556,6 @@ xfs_btree_islastblock( struct xfs_buf *bp; block = xfs_btree_get_block(cur, level, &bp); - ASSERT(block && xfs_btree_check_block(cur, block, level, bp) == 0); if (cur->bc_flags & XFS_BTREE_LONG_PTRS) return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK); diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c index d9b66306a9a7..cb9e950a911d 100644 --- a/fs/xfs/libxfs/xfs_dir2_leaf.c +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c @@ -146,6 +146,8 @@ xfs_dir3_leaf_check_int( xfs_dir2_leaf_tail_t *ltp; int stale; int i; + bool isleaf1 = (hdr->magic == XFS_DIR2_LEAF1_MAGIC || + hdr->magic == XFS_DIR3_LEAF1_MAGIC); ltp = xfs_dir2_leaf_tail_p(geo, leaf); @@ -158,8 +160,7 @@ xfs_dir3_leaf_check_int( return __this_address; /* Leaves and bests don't overlap in leaf format. */ - if ((hdr->magic == XFS_DIR2_LEAF1_MAGIC || - hdr->magic == XFS_DIR3_LEAF1_MAGIC) && + if (isleaf1 && (char *)&hdr->ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp)) return __this_address; @@ -175,6 +176,10 @@ xfs_dir3_leaf_check_int( } if (hdr->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) stale++; + if (isleaf1 && xfs_dir2_dataptr_to_db(geo, + be32_to_cpu(hdr->ents[i].address)) >= + be32_to_cpu(ltp->bestcount)) + return __this_address; } if (hdr->stale != stale) return __this_address; diff --git a/fs/xfs/libxfs/xfs_errortag.h b/fs/xfs/libxfs/xfs_errortag.h index 5362908164b0..01a9e86b3037 100644 --- a/fs/xfs/libxfs/xfs_errortag.h +++ b/fs/xfs/libxfs/xfs_errortag.h @@ -40,13 +40,12 @@ #define XFS_ERRTAG_REFCOUNT_FINISH_ONE 25 #define XFS_ERRTAG_BMAP_FINISH_ONE 26 #define XFS_ERRTAG_AG_RESV_CRITICAL 27 + /* - * DEBUG mode instrumentation to test and/or trigger delayed allocation - * block killing in the event of failed writes. When enabled, all - * buffered writes are silenty dropped and handled as if they failed. - * All delalloc blocks in the range of the write (including pre-existing - * delalloc blocks!) are tossed as part of the write failure error - * handling sequence. + * Drop-writes support removed because write error handling cannot trash + * pre-existing delalloc extents in any useful way anymore. We retain the + * definition so that we can reject it as an invalid value in + * xfs_errortag_valid(). */ #define XFS_ERRTAG_DROP_WRITES 28 #define XFS_ERRTAG_LOG_BAD_CRC 29 @@ -62,7 +61,9 @@ #define XFS_ERRTAG_LARP 39 #define XFS_ERRTAG_DA_LEAF_SPLIT 40 #define XFS_ERRTAG_ATTR_LEAF_TO_NODE 41 -#define XFS_ERRTAG_MAX 42 +#define XFS_ERRTAG_WB_DELAY_MS 42 +#define XFS_ERRTAG_WRITE_DELAY_MS 43 +#define XFS_ERRTAG_MAX 44 /* * Random factors for above tags, 1 means always, 2 means 1/2 time, etc. @@ -95,7 +96,6 @@ #define XFS_RANDOM_REFCOUNT_FINISH_ONE 1 #define XFS_RANDOM_BMAP_FINISH_ONE 1 #define XFS_RANDOM_AG_RESV_CRITICAL 4 -#define XFS_RANDOM_DROP_WRITES 1 #define XFS_RANDOM_LOG_BAD_CRC 1 #define XFS_RANDOM_LOG_ITEM_PIN 1 #define XFS_RANDOM_BUF_LRU_REF 2 @@ -109,5 +109,7 @@ #define XFS_RANDOM_LARP 1 #define XFS_RANDOM_DA_LEAF_SPLIT 1 #define XFS_RANDOM_ATTR_LEAF_TO_NODE 1 +#define XFS_RANDOM_WB_DELAY_MS 3000 +#define XFS_RANDOM_WRITE_DELAY_MS 3000 #endif /* __XFS_ERRORTAG_H_ */ diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index b55bdfa9c8a8..371dc07233e0 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -1564,20 +1564,6 @@ struct xfs_rmap_rec { #define RMAPBT_UNUSED_OFFSET_BITLEN 7 #define RMAPBT_OFFSET_BITLEN 54 -#define XFS_RMAP_ATTR_FORK (1 << 0) -#define XFS_RMAP_BMBT_BLOCK (1 << 1) -#define XFS_RMAP_UNWRITTEN (1 << 2) -#define XFS_RMAP_KEY_FLAGS (XFS_RMAP_ATTR_FORK | \ - XFS_RMAP_BMBT_BLOCK) -#define XFS_RMAP_REC_FLAGS (XFS_RMAP_UNWRITTEN) -struct xfs_rmap_irec { - xfs_agblock_t rm_startblock; /* extent start block */ - xfs_extlen_t rm_blockcount; /* extent length */ - uint64_t rm_owner; /* extent owner */ - uint64_t rm_offset; /* offset within the owner */ - unsigned int rm_flags; /* state flags */ -}; - /* * Key structure * @@ -1626,7 +1612,7 @@ unsigned int xfs_refc_block(struct xfs_mount *mp); * on the startblock. This speeds up mount time deletion of stale * staging extents because they're all at the right side of the tree. */ -#define XFS_REFC_COW_START ((xfs_agblock_t)(1U << 31)) +#define XFS_REFC_COWFLAG (1U << 31) #define REFCNTBT_COWFLAG_BITLEN 1 #define REFCNTBT_AGBLOCK_BITLEN 31 @@ -1640,12 +1626,6 @@ struct xfs_refcount_key { __be32 rc_startblock; /* starting block number */ }; -struct xfs_refcount_irec { - xfs_agblock_t rc_startblock; /* starting block number */ - xfs_extlen_t rc_blockcount; /* count of free blocks */ - xfs_nlink_t rc_refcount; /* number of inodes linked here */ -}; - #define MAXREFCOUNT ((xfs_nlink_t)~0U) #define MAXREFCEXTLEN ((xfs_extlen_t)~0U) diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 94db50eb706a..5118dedf9267 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -636,7 +636,7 @@ xfs_ialloc_ag_alloc( /* randomly do sparse inode allocations */ if (xfs_has_sparseinodes(tp->t_mountp) && igeo->ialloc_min_blks < igeo->ialloc_blks) - do_sparse = prandom_u32_max(2); + do_sparse = get_random_u32_below(2); #endif /* diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index b351b9dc6561..f13e0809dc63 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -613,25 +613,49 @@ typedef struct xfs_efi_log_format { uint16_t efi_size; /* size of this item */ uint32_t efi_nextents; /* # extents to free */ uint64_t efi_id; /* efi identifier */ - xfs_extent_t efi_extents[1]; /* array of extents to free */ + xfs_extent_t efi_extents[]; /* array of extents to free */ } xfs_efi_log_format_t; +static inline size_t +xfs_efi_log_format_sizeof( + unsigned int nr) +{ + return sizeof(struct xfs_efi_log_format) + + nr * sizeof(struct xfs_extent); +} + typedef struct xfs_efi_log_format_32 { uint16_t efi_type; /* efi log item type */ uint16_t efi_size; /* size of this item */ uint32_t efi_nextents; /* # extents to free */ uint64_t efi_id; /* efi identifier */ - xfs_extent_32_t efi_extents[1]; /* array of extents to free */ + xfs_extent_32_t efi_extents[]; /* array of extents to free */ } __attribute__((packed)) xfs_efi_log_format_32_t; +static inline size_t +xfs_efi_log_format32_sizeof( + unsigned int nr) +{ + return sizeof(struct xfs_efi_log_format_32) + + nr * sizeof(struct xfs_extent_32); +} + typedef struct xfs_efi_log_format_64 { uint16_t efi_type; /* efi log item type */ uint16_t efi_size; /* size of this item */ uint32_t efi_nextents; /* # extents to free */ uint64_t efi_id; /* efi identifier */ - xfs_extent_64_t efi_extents[1]; /* array of extents to free */ + xfs_extent_64_t efi_extents[]; /* array of extents to free */ } xfs_efi_log_format_64_t; +static inline size_t +xfs_efi_log_format64_sizeof( + unsigned int nr) +{ + return sizeof(struct xfs_efi_log_format_64) + + nr * sizeof(struct xfs_extent_64); +} + /* * This is the structure used to lay out an efd log item in the * log. The efd_extents array is a variable size array whose @@ -642,25 +666,49 @@ typedef struct xfs_efd_log_format { uint16_t efd_size; /* size of this item */ uint32_t efd_nextents; /* # of extents freed */ uint64_t efd_efi_id; /* id of corresponding efi */ - xfs_extent_t efd_extents[1]; /* array of extents freed */ + xfs_extent_t efd_extents[]; /* array of extents freed */ } xfs_efd_log_format_t; +static inline size_t +xfs_efd_log_format_sizeof( + unsigned int nr) +{ + return sizeof(struct xfs_efd_log_format) + + nr * sizeof(struct xfs_extent); +} + typedef struct xfs_efd_log_format_32 { uint16_t efd_type; /* efd log item type */ uint16_t efd_size; /* size of this item */ uint32_t efd_nextents; /* # of extents freed */ uint64_t efd_efi_id; /* id of corresponding efi */ - xfs_extent_32_t efd_extents[1]; /* array of extents freed */ + xfs_extent_32_t efd_extents[]; /* array of extents freed */ } __attribute__((packed)) xfs_efd_log_format_32_t; +static inline size_t +xfs_efd_log_format32_sizeof( + unsigned int nr) +{ + return sizeof(struct xfs_efd_log_format_32) + + nr * sizeof(struct xfs_extent_32); +} + typedef struct xfs_efd_log_format_64 { uint16_t efd_type; /* efd log item type */ uint16_t efd_size; /* size of this item */ uint32_t efd_nextents; /* # of extents freed */ uint64_t efd_efi_id; /* id of corresponding efi */ - xfs_extent_64_t efd_extents[1]; /* array of extents freed */ + xfs_extent_64_t efd_extents[]; /* array of extents freed */ } xfs_efd_log_format_64_t; +static inline size_t +xfs_efd_log_format64_sizeof( + unsigned int nr) +{ + return sizeof(struct xfs_efd_log_format_64) + + nr * sizeof(struct xfs_extent_64); +} + /* * RUI/RUD (reverse mapping) log format definitions */ diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 64b910caafaa..6f7ed9288fe4 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -46,13 +46,16 @@ STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur, int xfs_refcount_lookup_le( struct xfs_btree_cur *cur, + enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat) { - trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, bno, + trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, + xfs_refcount_encode_startblock(bno, domain), XFS_LOOKUP_LE); cur->bc_rec.rc.rc_startblock = bno; cur->bc_rec.rc.rc_blockcount = 0; + cur->bc_rec.rc.rc_domain = domain; return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); } @@ -63,13 +66,16 @@ xfs_refcount_lookup_le( int xfs_refcount_lookup_ge( struct xfs_btree_cur *cur, + enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat) { - trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, bno, + trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, + xfs_refcount_encode_startblock(bno, domain), XFS_LOOKUP_GE); cur->bc_rec.rc.rc_startblock = bno; cur->bc_rec.rc.rc_blockcount = 0; + cur->bc_rec.rc.rc_domain = domain; return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); } @@ -80,13 +86,16 @@ xfs_refcount_lookup_ge( int xfs_refcount_lookup_eq( struct xfs_btree_cur *cur, + enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat) { - trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, bno, + trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, + xfs_refcount_encode_startblock(bno, domain), XFS_LOOKUP_LE); cur->bc_rec.rc.rc_startblock = bno; cur->bc_rec.rc.rc_blockcount = 0; + cur->bc_rec.rc.rc_domain = domain; return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); } @@ -96,7 +105,17 @@ xfs_refcount_btrec_to_irec( const union xfs_btree_rec *rec, struct xfs_refcount_irec *irec) { - irec->rc_startblock = be32_to_cpu(rec->refc.rc_startblock); + uint32_t start; + + start = be32_to_cpu(rec->refc.rc_startblock); + if (start & XFS_REFC_COWFLAG) { + start &= ~XFS_REFC_COWFLAG; + irec->rc_domain = XFS_REFC_DOMAIN_COW; + } else { + irec->rc_domain = XFS_REFC_DOMAIN_SHARED; + } + + irec->rc_startblock = start; irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount); irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount); } @@ -114,7 +133,6 @@ xfs_refcount_get_rec( struct xfs_perag *pag = cur->bc_ag.pag; union xfs_btree_rec *rec; int error; - xfs_agblock_t realstart; error = xfs_btree_get_rec(cur, &rec, stat); if (error || !*stat) @@ -124,22 +142,11 @@ xfs_refcount_get_rec( if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN) goto out_bad_rec; - /* handle special COW-staging state */ - realstart = irec->rc_startblock; - if (realstart & XFS_REFC_COW_START) { - if (irec->rc_refcount != 1) - goto out_bad_rec; - realstart &= ~XFS_REFC_COW_START; - } else if (irec->rc_refcount < 2) { + if (!xfs_refcount_check_domain(irec)) goto out_bad_rec; - } /* check for valid extent range, including overflow */ - if (!xfs_verify_agbno(pag, realstart)) - goto out_bad_rec; - if (realstart > realstart + irec->rc_blockcount) - goto out_bad_rec; - if (!xfs_verify_agbno(pag, realstart + irec->rc_blockcount - 1)) + if (!xfs_verify_agbext(pag, irec->rc_startblock, irec->rc_blockcount)) goto out_bad_rec; if (irec->rc_refcount == 0 || irec->rc_refcount > MAXREFCOUNT) @@ -169,12 +176,17 @@ xfs_refcount_update( struct xfs_refcount_irec *irec) { union xfs_btree_rec rec; + uint32_t start; int error; trace_xfs_refcount_update(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec); - rec.refc.rc_startblock = cpu_to_be32(irec->rc_startblock); + + start = xfs_refcount_encode_startblock(irec->rc_startblock, + irec->rc_domain); + rec.refc.rc_startblock = cpu_to_be32(start); rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount); rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount); + error = xfs_btree_update(cur, &rec); if (error) trace_xfs_refcount_update_error(cur->bc_mp, @@ -196,9 +208,12 @@ xfs_refcount_insert( int error; trace_xfs_refcount_insert(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec); + cur->bc_rec.rc.rc_startblock = irec->rc_startblock; cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount; cur->bc_rec.rc.rc_refcount = irec->rc_refcount; + cur->bc_rec.rc.rc_domain = irec->rc_domain; + error = xfs_btree_insert(cur, i); if (error) goto out_error; @@ -244,7 +259,8 @@ xfs_refcount_delete( } if (error) goto out_error; - error = xfs_refcount_lookup_ge(cur, irec.rc_startblock, &found_rec); + error = xfs_refcount_lookup_ge(cur, irec.rc_domain, irec.rc_startblock, + &found_rec); out_error: if (error) trace_xfs_refcount_delete_error(cur->bc_mp, @@ -343,6 +359,7 @@ xfs_refc_next( STATIC int xfs_refcount_split_extent( struct xfs_btree_cur *cur, + enum xfs_refc_domain domain, xfs_agblock_t agbno, bool *shape_changed) { @@ -351,7 +368,7 @@ xfs_refcount_split_extent( int error; *shape_changed = false; - error = xfs_refcount_lookup_le(cur, agbno, &found_rec); + error = xfs_refcount_lookup_le(cur, domain, agbno, &found_rec); if (error) goto out_error; if (!found_rec) @@ -364,6 +381,8 @@ xfs_refcount_split_extent( error = -EFSCORRUPTED; goto out_error; } + if (rcext.rc_domain != domain) + return 0; if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno) return 0; @@ -415,6 +434,9 @@ xfs_refcount_merge_center_extents( trace_xfs_refcount_merge_center_extents(cur->bc_mp, cur->bc_ag.pag->pag_agno, left, center, right); + ASSERT(left->rc_domain == center->rc_domain); + ASSERT(right->rc_domain == center->rc_domain); + /* * Make sure the center and right extents are not in the btree. * If the center extent was synthesized, the first delete call @@ -423,8 +445,8 @@ xfs_refcount_merge_center_extents( * call removes the center and the second one removes the right * extent. */ - error = xfs_refcount_lookup_ge(cur, center->rc_startblock, - &found_rec); + error = xfs_refcount_lookup_ge(cur, center->rc_domain, + center->rc_startblock, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { @@ -451,8 +473,8 @@ xfs_refcount_merge_center_extents( } /* Enlarge the left extent. */ - error = xfs_refcount_lookup_le(cur, left->rc_startblock, - &found_rec); + error = xfs_refcount_lookup_le(cur, left->rc_domain, + left->rc_startblock, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { @@ -491,10 +513,12 @@ xfs_refcount_merge_left_extent( trace_xfs_refcount_merge_left_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, left, cleft); + ASSERT(left->rc_domain == cleft->rc_domain); + /* If the extent at agbno (cleft) wasn't synthesized, remove it. */ if (cleft->rc_refcount > 1) { - error = xfs_refcount_lookup_le(cur, cleft->rc_startblock, - &found_rec); + error = xfs_refcount_lookup_le(cur, cleft->rc_domain, + cleft->rc_startblock, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { @@ -512,8 +536,8 @@ xfs_refcount_merge_left_extent( } /* Enlarge the left extent. */ - error = xfs_refcount_lookup_le(cur, left->rc_startblock, - &found_rec); + error = xfs_refcount_lookup_le(cur, left->rc_domain, + left->rc_startblock, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { @@ -552,13 +576,15 @@ xfs_refcount_merge_right_extent( trace_xfs_refcount_merge_right_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, cright, right); + ASSERT(right->rc_domain == cright->rc_domain); + /* * If the extent ending at agbno+aglen (cright) wasn't synthesized, * remove it. */ if (cright->rc_refcount > 1) { - error = xfs_refcount_lookup_le(cur, cright->rc_startblock, - &found_rec); + error = xfs_refcount_lookup_le(cur, cright->rc_domain, + cright->rc_startblock, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { @@ -576,8 +602,8 @@ xfs_refcount_merge_right_extent( } /* Enlarge the right extent. */ - error = xfs_refcount_lookup_le(cur, right->rc_startblock, - &found_rec); + error = xfs_refcount_lookup_le(cur, right->rc_domain, + right->rc_startblock, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { @@ -600,8 +626,6 @@ out_error: return error; } -#define XFS_FIND_RCEXT_SHARED 1 -#define XFS_FIND_RCEXT_COW 2 /* * Find the left extent and the one after it (cleft). This function assumes * that we've already split any extent crossing agbno. @@ -611,16 +635,16 @@ xfs_refcount_find_left_extents( struct xfs_btree_cur *cur, struct xfs_refcount_irec *left, struct xfs_refcount_irec *cleft, + enum xfs_refc_domain domain, xfs_agblock_t agbno, - xfs_extlen_t aglen, - int flags) + xfs_extlen_t aglen) { struct xfs_refcount_irec tmp; int error; int found_rec; left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK; - error = xfs_refcount_lookup_le(cur, agbno - 1, &found_rec); + error = xfs_refcount_lookup_le(cur, domain, agbno - 1, &found_rec); if (error) goto out_error; if (!found_rec) @@ -634,11 +658,9 @@ xfs_refcount_find_left_extents( goto out_error; } - if (xfs_refc_next(&tmp) != agbno) - return 0; - if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) + if (tmp.rc_domain != domain) return 0; - if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) + if (xfs_refc_next(&tmp) != agbno) return 0; /* We have a left extent; retrieve (or invent) the next right one */ *left = tmp; @@ -655,6 +677,9 @@ xfs_refcount_find_left_extents( goto out_error; } + if (tmp.rc_domain != domain) + goto not_found; + /* if tmp starts at the end of our range, just use that */ if (tmp.rc_startblock == agbno) *cleft = tmp; @@ -671,8 +696,10 @@ xfs_refcount_find_left_extents( cleft->rc_blockcount = min(aglen, tmp.rc_startblock - agbno); cleft->rc_refcount = 1; + cleft->rc_domain = domain; } } else { +not_found: /* * No extents, so pretend that there's one covering the whole * range. @@ -680,6 +707,7 @@ xfs_refcount_find_left_extents( cleft->rc_startblock = agbno; cleft->rc_blockcount = aglen; cleft->rc_refcount = 1; + cleft->rc_domain = domain; } trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, left, cleft, agbno); @@ -700,16 +728,16 @@ xfs_refcount_find_right_extents( struct xfs_btree_cur *cur, struct xfs_refcount_irec *right, struct xfs_refcount_irec *cright, + enum xfs_refc_domain domain, xfs_agblock_t agbno, - xfs_extlen_t aglen, - int flags) + xfs_extlen_t aglen) { struct xfs_refcount_irec tmp; int error; int found_rec; right->rc_startblock = cright->rc_startblock = NULLAGBLOCK; - error = xfs_refcount_lookup_ge(cur, agbno + aglen, &found_rec); + error = xfs_refcount_lookup_ge(cur, domain, agbno + aglen, &found_rec); if (error) goto out_error; if (!found_rec) @@ -723,11 +751,9 @@ xfs_refcount_find_right_extents( goto out_error; } - if (tmp.rc_startblock != agbno + aglen) + if (tmp.rc_domain != domain) return 0; - if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) - return 0; - if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) + if (tmp.rc_startblock != agbno + aglen) return 0; /* We have a right extent; retrieve (or invent) the next left one */ *right = tmp; @@ -744,6 +770,9 @@ xfs_refcount_find_right_extents( goto out_error; } + if (tmp.rc_domain != domain) + goto not_found; + /* if tmp ends at the end of our range, just use that */ if (xfs_refc_next(&tmp) == agbno + aglen) *cright = tmp; @@ -760,8 +789,10 @@ xfs_refcount_find_right_extents( cright->rc_blockcount = right->rc_startblock - cright->rc_startblock; cright->rc_refcount = 1; + cright->rc_domain = domain; } } else { +not_found: /* * No extents, so pretend that there's one covering the whole * range. @@ -769,6 +800,7 @@ xfs_refcount_find_right_extents( cright->rc_startblock = agbno; cright->rc_blockcount = aglen; cright->rc_refcount = 1; + cright->rc_domain = domain; } trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, cright, right, agbno + aglen); @@ -783,21 +815,146 @@ out_error: /* Is this extent valid? */ static inline bool xfs_refc_valid( - struct xfs_refcount_irec *rc) + const struct xfs_refcount_irec *rc) { return rc->rc_startblock != NULLAGBLOCK; } +static inline xfs_nlink_t +xfs_refc_merge_refcount( + const struct xfs_refcount_irec *irec, + enum xfs_refc_adjust_op adjust) +{ + /* Once a record hits MAXREFCOUNT, it is pinned there forever */ + if (irec->rc_refcount == MAXREFCOUNT) + return MAXREFCOUNT; + return irec->rc_refcount + adjust; +} + +static inline bool +xfs_refc_want_merge_center( + const struct xfs_refcount_irec *left, + const struct xfs_refcount_irec *cleft, + const struct xfs_refcount_irec *cright, + const struct xfs_refcount_irec *right, + bool cleft_is_cright, + enum xfs_refc_adjust_op adjust, + unsigned long long *ulenp) +{ + unsigned long long ulen = left->rc_blockcount; + xfs_nlink_t new_refcount; + + /* + * To merge with a center record, both shoulder records must be + * adjacent to the record we want to adjust. This is only true if + * find_left and find_right made all four records valid. + */ + if (!xfs_refc_valid(left) || !xfs_refc_valid(right) || + !xfs_refc_valid(cleft) || !xfs_refc_valid(cright)) + return false; + + /* There must only be one record for the entire range. */ + if (!cleft_is_cright) + return false; + + /* The shoulder record refcounts must match the new refcount. */ + new_refcount = xfs_refc_merge_refcount(cleft, adjust); + if (left->rc_refcount != new_refcount) + return false; + if (right->rc_refcount != new_refcount) + return false; + + /* + * The new record cannot exceed the max length. ulen is a ULL as the + * individual record block counts can be up to (u32 - 1) in length + * hence we need to catch u32 addition overflows here. + */ + ulen += cleft->rc_blockcount + right->rc_blockcount; + if (ulen >= MAXREFCEXTLEN) + return false; + + *ulenp = ulen; + return true; +} + +static inline bool +xfs_refc_want_merge_left( + const struct xfs_refcount_irec *left, + const struct xfs_refcount_irec *cleft, + enum xfs_refc_adjust_op adjust) +{ + unsigned long long ulen = left->rc_blockcount; + xfs_nlink_t new_refcount; + + /* + * For a left merge, the left shoulder record must be adjacent to the + * start of the range. If this is true, find_left made left and cleft + * contain valid contents. + */ + if (!xfs_refc_valid(left) || !xfs_refc_valid(cleft)) + return false; + + /* Left shoulder record refcount must match the new refcount. */ + new_refcount = xfs_refc_merge_refcount(cleft, adjust); + if (left->rc_refcount != new_refcount) + return false; + + /* + * The new record cannot exceed the max length. ulen is a ULL as the + * individual record block counts can be up to (u32 - 1) in length + * hence we need to catch u32 addition overflows here. + */ + ulen += cleft->rc_blockcount; + if (ulen >= MAXREFCEXTLEN) + return false; + + return true; +} + +static inline bool +xfs_refc_want_merge_right( + const struct xfs_refcount_irec *cright, + const struct xfs_refcount_irec *right, + enum xfs_refc_adjust_op adjust) +{ + unsigned long long ulen = right->rc_blockcount; + xfs_nlink_t new_refcount; + + /* + * For a right merge, the right shoulder record must be adjacent to the + * end of the range. If this is true, find_right made cright and right + * contain valid contents. + */ + if (!xfs_refc_valid(right) || !xfs_refc_valid(cright)) + return false; + + /* Right shoulder record refcount must match the new refcount. */ + new_refcount = xfs_refc_merge_refcount(cright, adjust); + if (right->rc_refcount != new_refcount) + return false; + + /* + * The new record cannot exceed the max length. ulen is a ULL as the + * individual record block counts can be up to (u32 - 1) in length + * hence we need to catch u32 addition overflows here. + */ + ulen += cright->rc_blockcount; + if (ulen >= MAXREFCEXTLEN) + return false; + + return true; +} + /* * Try to merge with any extents on the boundaries of the adjustment range. */ STATIC int xfs_refcount_merge_extents( struct xfs_btree_cur *cur, + enum xfs_refc_domain domain, xfs_agblock_t *agbno, xfs_extlen_t *aglen, enum xfs_refc_adjust_op adjust, - int flags, bool *shape_changed) { struct xfs_refcount_irec left = {0}, cleft = {0}; @@ -812,12 +969,12 @@ xfs_refcount_merge_extents( * just below (agbno + aglen) [cright], and just above (agbno + aglen) * [right]. */ - error = xfs_refcount_find_left_extents(cur, &left, &cleft, *agbno, - *aglen, flags); + error = xfs_refcount_find_left_extents(cur, &left, &cleft, domain, + *agbno, *aglen); if (error) return error; - error = xfs_refcount_find_right_extents(cur, &right, &cright, *agbno, - *aglen, flags); + error = xfs_refcount_find_right_extents(cur, &right, &cright, domain, + *agbno, *aglen); if (error) return error; @@ -829,23 +986,15 @@ xfs_refcount_merge_extents( (cleft.rc_blockcount == cright.rc_blockcount); /* Try to merge left, cleft, and right. cleft must == cright. */ - ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount + - right.rc_blockcount; - if (xfs_refc_valid(&left) && xfs_refc_valid(&right) && - xfs_refc_valid(&cleft) && xfs_refc_valid(&cright) && cequal && - left.rc_refcount == cleft.rc_refcount + adjust && - right.rc_refcount == cleft.rc_refcount + adjust && - ulen < MAXREFCEXTLEN) { + if (xfs_refc_want_merge_center(&left, &cleft, &cright, &right, cequal, + adjust, &ulen)) { *shape_changed = true; return xfs_refcount_merge_center_extents(cur, &left, &cleft, &right, ulen, aglen); } /* Try to merge left and cleft. */ - ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount; - if (xfs_refc_valid(&left) && xfs_refc_valid(&cleft) && - left.rc_refcount == cleft.rc_refcount + adjust && - ulen < MAXREFCEXTLEN) { + if (xfs_refc_want_merge_left(&left, &cleft, adjust)) { *shape_changed = true; error = xfs_refcount_merge_left_extent(cur, &left, &cleft, agbno, aglen); @@ -861,16 +1010,13 @@ xfs_refcount_merge_extents( } /* Try to merge cright and right. */ - ulen = (unsigned long long)right.rc_blockcount + cright.rc_blockcount; - if (xfs_refc_valid(&right) && xfs_refc_valid(&cright) && - right.rc_refcount == cright.rc_refcount + adjust && - ulen < MAXREFCEXTLEN) { + if (xfs_refc_want_merge_right(&cright, &right, adjust)) { *shape_changed = true; return xfs_refcount_merge_right_extent(cur, &right, &cright, aglen); } - return error; + return 0; } /* @@ -933,7 +1079,8 @@ xfs_refcount_adjust_extents( if (*aglen == 0) return 0; - error = xfs_refcount_lookup_ge(cur, *agbno, &found_rec); + error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_SHARED, *agbno, + &found_rec); if (error) goto out_error; @@ -941,10 +1088,11 @@ xfs_refcount_adjust_extents( error = xfs_refcount_get_rec(cur, &ext, &found_rec); if (error) goto out_error; - if (!found_rec) { + if (!found_rec || ext.rc_domain != XFS_REFC_DOMAIN_SHARED) { ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks; ext.rc_blockcount = 0; ext.rc_refcount = 0; + ext.rc_domain = XFS_REFC_DOMAIN_SHARED; } /* @@ -957,6 +1105,8 @@ xfs_refcount_adjust_extents( tmp.rc_blockcount = min(*aglen, ext.rc_startblock - *agbno); tmp.rc_refcount = 1 + adj; + tmp.rc_domain = XFS_REFC_DOMAIN_SHARED; + trace_xfs_refcount_modify_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, &tmp); @@ -986,15 +1136,30 @@ xfs_refcount_adjust_extents( (*agbno) += tmp.rc_blockcount; (*aglen) -= tmp.rc_blockcount; - error = xfs_refcount_lookup_ge(cur, *agbno, + /* Stop if there's nothing left to modify */ + if (*aglen == 0 || !xfs_refcount_still_have_space(cur)) + break; + + /* Move the cursor to the start of ext. */ + error = xfs_refcount_lookup_ge(cur, + XFS_REFC_DOMAIN_SHARED, *agbno, &found_rec); if (error) goto out_error; } - /* Stop if there's nothing left to modify */ - if (*aglen == 0 || !xfs_refcount_still_have_space(cur)) - break; + /* + * A previous step trimmed agbno/aglen such that the end of the + * range would not be in the middle of the record. If this is + * no longer the case, something is seriously wrong with the + * btree. Make sure we never feed the synthesized record into + * the processing loop below. + */ + if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount == 0) || + XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount > *aglen)) { + error = -EFSCORRUPTED; + goto out_error; + } /* * Adjust the reference count and either update the tree @@ -1070,13 +1235,15 @@ xfs_refcount_adjust( /* * Ensure that no rcextents cross the boundary of the adjustment range. */ - error = xfs_refcount_split_extent(cur, agbno, &shape_changed); + error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED, + agbno, &shape_changed); if (error) goto out_error; if (shape_changed) shape_changes++; - error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed); + error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED, + agbno + aglen, &shape_changed); if (error) goto out_error; if (shape_changed) @@ -1085,8 +1252,8 @@ xfs_refcount_adjust( /* * Try to merge with the left or right extents of the range. */ - error = xfs_refcount_merge_extents(cur, new_agbno, new_aglen, adj, - XFS_FIND_RCEXT_SHARED, &shape_changed); + error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_SHARED, + new_agbno, new_aglen, adj, &shape_changed); if (error) goto out_error; if (shape_changed) @@ -1125,6 +1292,32 @@ xfs_refcount_finish_one_cleanup( } /* + * Set up a continuation a deferred refcount operation by updating the intent. + * Checks to make sure we're not going to run off the end of the AG. + */ +static inline int +xfs_refcount_continue_op( + struct xfs_btree_cur *cur, + xfs_fsblock_t startblock, + xfs_agblock_t new_agbno, + xfs_extlen_t new_len, + xfs_fsblock_t *new_fsbno) +{ + struct xfs_mount *mp = cur->bc_mp; + struct xfs_perag *pag = cur->bc_ag.pag; + + if (XFS_IS_CORRUPT(mp, !xfs_verify_agbext(pag, new_agbno, new_len))) + return -EFSCORRUPTED; + + *new_fsbno = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); + + ASSERT(xfs_verify_fsbext(mp, *new_fsbno, new_len)); + ASSERT(pag->pag_agno == XFS_FSB_TO_AGNO(mp, *new_fsbno)); + + return 0; +} + +/* * Process one of the deferred refcount operations. We pass back the * btree cursor to maintain our lock on the btree between calls. * This saves time and eliminates a buffer deadlock between the @@ -1191,12 +1384,20 @@ xfs_refcount_finish_one( case XFS_REFCOUNT_INCREASE: error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, new_len, XFS_REFCOUNT_ADJUST_INCREASE); - *new_fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); + if (error) + goto out_drop; + if (*new_len > 0) + error = xfs_refcount_continue_op(rcur, startblock, + new_agbno, *new_len, new_fsb); break; case XFS_REFCOUNT_DECREASE: error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, new_len, XFS_REFCOUNT_ADJUST_DECREASE); - *new_fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); + if (error) + goto out_drop; + if (*new_len > 0) + error = xfs_refcount_continue_op(rcur, startblock, + new_agbno, *new_len, new_fsb); break; case XFS_REFCOUNT_ALLOC_COW: *new_fsb = startblock + blockcount; @@ -1307,7 +1508,8 @@ xfs_refcount_find_shared( *flen = 0; /* Try to find a refcount extent that crosses the start */ - error = xfs_refcount_lookup_le(cur, agbno, &have); + error = xfs_refcount_lookup_le(cur, XFS_REFC_DOMAIN_SHARED, agbno, + &have); if (error) goto out_error; if (!have) { @@ -1325,6 +1527,8 @@ xfs_refcount_find_shared( error = -EFSCORRUPTED; goto out_error; } + if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED) + goto done; /* If the extent ends before the start, look at the next one */ if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) { @@ -1340,6 +1544,8 @@ xfs_refcount_find_shared( error = -EFSCORRUPTED; goto out_error; } + if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED) + goto done; } /* If the extent starts after the range we want, bail out */ @@ -1371,7 +1577,8 @@ xfs_refcount_find_shared( error = -EFSCORRUPTED; goto out_error; } - if (tmp.rc_startblock >= agbno + aglen || + if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED || + tmp.rc_startblock >= agbno + aglen || tmp.rc_startblock != *fbno + *flen) break; *flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno); @@ -1455,17 +1662,23 @@ xfs_refcount_adjust_cow_extents( return 0; /* Find any overlapping refcount records */ - error = xfs_refcount_lookup_ge(cur, agbno, &found_rec); + error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_COW, agbno, + &found_rec); if (error) goto out_error; error = xfs_refcount_get_rec(cur, &ext, &found_rec); if (error) goto out_error; + if (XFS_IS_CORRUPT(cur->bc_mp, found_rec && + ext.rc_domain != XFS_REFC_DOMAIN_COW)) { + error = -EFSCORRUPTED; + goto out_error; + } if (!found_rec) { - ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks + - XFS_REFC_COW_START; + ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks; ext.rc_blockcount = 0; ext.rc_refcount = 0; + ext.rc_domain = XFS_REFC_DOMAIN_COW; } switch (adj) { @@ -1480,6 +1693,8 @@ xfs_refcount_adjust_cow_extents( tmp.rc_startblock = agbno; tmp.rc_blockcount = aglen; tmp.rc_refcount = 1; + tmp.rc_domain = XFS_REFC_DOMAIN_COW; + trace_xfs_refcount_modify_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, &tmp); @@ -1542,24 +1757,24 @@ xfs_refcount_adjust_cow( bool shape_changed; int error; - agbno += XFS_REFC_COW_START; - /* * Ensure that no rcextents cross the boundary of the adjustment range. */ - error = xfs_refcount_split_extent(cur, agbno, &shape_changed); + error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_COW, + agbno, &shape_changed); if (error) goto out_error; - error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed); + error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_COW, + agbno + aglen, &shape_changed); if (error) goto out_error; /* * Try to merge with the left or right extents of the range. */ - error = xfs_refcount_merge_extents(cur, &agbno, &aglen, adj, - XFS_FIND_RCEXT_COW, &shape_changed); + error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_COW, &agbno, + &aglen, adj, &shape_changed); if (error) goto out_error; @@ -1666,10 +1881,18 @@ xfs_refcount_recover_extent( be32_to_cpu(rec->refc.rc_refcount) != 1)) return -EFSCORRUPTED; - rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), 0); + rr = kmalloc(sizeof(struct xfs_refcount_recovery), + GFP_KERNEL | __GFP_NOFAIL); + INIT_LIST_HEAD(&rr->rr_list); xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec); - list_add_tail(&rr->rr_list, debris); + if (XFS_IS_CORRUPT(cur->bc_mp, + rr->rr_rrec.rc_domain != XFS_REFC_DOMAIN_COW)) { + kfree(rr); + return -EFSCORRUPTED; + } + + list_add_tail(&rr->rr_list, debris); return 0; } @@ -1687,10 +1910,11 @@ xfs_refcount_recover_cow_leftovers( union xfs_btree_irec low; union xfs_btree_irec high; xfs_fsblock_t fsb; - xfs_agblock_t agbno; int error; - if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START) + /* reflink filesystems mustn't have AGs larger than 2^31-1 blocks */ + BUILD_BUG_ON(XFS_MAX_CRC_AG_BLOCKS >= XFS_REFC_COWFLAG); + if (mp->m_sb.sb_agblocks > XFS_MAX_CRC_AG_BLOCKS) return -EOPNOTSUPP; INIT_LIST_HEAD(&debris); @@ -1717,7 +1941,7 @@ xfs_refcount_recover_cow_leftovers( /* Find all the leftover CoW staging extents. */ memset(&low, 0, sizeof(low)); memset(&high, 0, sizeof(high)); - low.rc.rc_startblock = XFS_REFC_COW_START; + low.rc.rc_domain = high.rc.rc_domain = XFS_REFC_DOMAIN_COW; high.rc.rc_startblock = -1U; error = xfs_btree_query_range(cur, &low, &high, xfs_refcount_recover_extent, &debris); @@ -1738,8 +1962,8 @@ xfs_refcount_recover_cow_leftovers( &rr->rr_rrec); /* Free the orphan record */ - agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START; - fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, agbno); + fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, + rr->rr_rrec.rc_startblock); xfs_refcount_free_cow_extent(tp, fsb, rr->rr_rrec.rc_blockcount); @@ -1751,7 +1975,7 @@ xfs_refcount_recover_cow_leftovers( goto out_free; list_del(&rr->rr_list); - kmem_free(rr); + kfree(rr); } return error; @@ -1761,7 +1985,7 @@ out_free: /* Free the leftover list */ list_for_each_entry_safe(rr, n, &debris, rr_list) { list_del(&rr->rr_list); - kmem_free(rr); + kfree(rr); } return error; } @@ -1770,6 +1994,7 @@ out_free: int xfs_refcount_has_record( struct xfs_btree_cur *cur, + enum xfs_refc_domain domain, xfs_agblock_t bno, xfs_extlen_t len, bool *exists) @@ -1781,6 +2006,7 @@ xfs_refcount_has_record( low.rc.rc_startblock = bno; memset(&high, 0xFF, sizeof(high)); high.rc.rc_startblock = bno + len - 1; + low.rc.rc_domain = high.rc.rc_domain = domain; return xfs_btree_has_record(cur, &low, &high, exists); } diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h index e8b322de7f3d..452f30556f5a 100644 --- a/fs/xfs/libxfs/xfs_refcount.h +++ b/fs/xfs/libxfs/xfs_refcount.h @@ -14,14 +14,33 @@ struct xfs_bmbt_irec; struct xfs_refcount_irec; extern int xfs_refcount_lookup_le(struct xfs_btree_cur *cur, - xfs_agblock_t bno, int *stat); + enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat); extern int xfs_refcount_lookup_ge(struct xfs_btree_cur *cur, - xfs_agblock_t bno, int *stat); + enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat); extern int xfs_refcount_lookup_eq(struct xfs_btree_cur *cur, - xfs_agblock_t bno, int *stat); + enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat); extern int xfs_refcount_get_rec(struct xfs_btree_cur *cur, struct xfs_refcount_irec *irec, int *stat); +static inline uint32_t +xfs_refcount_encode_startblock( + xfs_agblock_t startblock, + enum xfs_refc_domain domain) +{ + uint32_t start; + + /* + * low level btree operations need to handle the generic btree range + * query functions (which set rc_domain == -1U), so we check that the + * domain is /not/ shared. + */ + start = startblock & ~XFS_REFC_COWFLAG; + if (domain != XFS_REFC_DOMAIN_SHARED) + start |= XFS_REFC_COWFLAG; + + return start; +} + enum xfs_refcount_intent_type { XFS_REFCOUNT_INCREASE = 1, XFS_REFCOUNT_DECREASE, @@ -36,6 +55,18 @@ struct xfs_refcount_intent { xfs_fsblock_t ri_startblock; }; +/* Check that the refcount is appropriate for the record domain. */ +static inline bool +xfs_refcount_check_domain( + const struct xfs_refcount_irec *irec) +{ + if (irec->rc_domain == XFS_REFC_DOMAIN_COW && irec->rc_refcount != 1) + return false; + if (irec->rc_domain == XFS_REFC_DOMAIN_SHARED && irec->rc_refcount < 2) + return false; + return true; +} + void xfs_refcount_increase_extent(struct xfs_trans *tp, struct xfs_bmbt_irec *irec); void xfs_refcount_decrease_extent(struct xfs_trans *tp, @@ -79,7 +110,8 @@ extern int xfs_refcount_recover_cow_leftovers(struct xfs_mount *mp, #define XFS_REFCOUNT_ITEM_OVERHEAD 32 extern int xfs_refcount_has_record(struct xfs_btree_cur *cur, - xfs_agblock_t bno, xfs_extlen_t len, bool *exists); + enum xfs_refc_domain domain, xfs_agblock_t bno, + xfs_extlen_t len, bool *exists); union xfs_btree_rec; extern void xfs_refcount_btrec_to_irec(const union xfs_btree_rec *rec, struct xfs_refcount_irec *irec); diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 316c1ec0c3c2..e1f789866683 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -13,6 +13,7 @@ #include "xfs_btree.h" #include "xfs_btree_staging.h" #include "xfs_refcount_btree.h" +#include "xfs_refcount.h" #include "xfs_alloc.h" #include "xfs_error.h" #include "xfs_trace.h" @@ -160,7 +161,12 @@ xfs_refcountbt_init_rec_from_cur( struct xfs_btree_cur *cur, union xfs_btree_rec *rec) { - rec->refc.rc_startblock = cpu_to_be32(cur->bc_rec.rc.rc_startblock); + const struct xfs_refcount_irec *irec = &cur->bc_rec.rc; + uint32_t start; + + start = xfs_refcount_encode_startblock(irec->rc_startblock, + irec->rc_domain); + rec->refc.rc_startblock = cpu_to_be32(start); rec->refc.rc_blockcount = cpu_to_be32(cur->bc_rec.rc.rc_blockcount); rec->refc.rc_refcount = cpu_to_be32(cur->bc_rec.rc.rc_refcount); } @@ -182,10 +188,13 @@ xfs_refcountbt_key_diff( struct xfs_btree_cur *cur, const union xfs_btree_key *key) { - struct xfs_refcount_irec *rec = &cur->bc_rec.rc; const struct xfs_refcount_key *kp = &key->refc; + const struct xfs_refcount_irec *irec = &cur->bc_rec.rc; + uint32_t start; - return (int64_t)be32_to_cpu(kp->rc_startblock) - rec->rc_startblock; + start = xfs_refcount_encode_startblock(irec->rc_startblock, + irec->rc_domain); + return (int64_t)be32_to_cpu(kp->rc_startblock) - start; } STATIC int64_t diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index 094dfc897ebc..b56aca1e7c66 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -235,13 +235,8 @@ xfs_rmap_get_rec( goto out_bad_rec; } else { /* check for valid extent range, including overflow */ - if (!xfs_verify_agbno(pag, irec->rm_startblock)) - goto out_bad_rec; - if (irec->rm_startblock > - irec->rm_startblock + irec->rm_blockcount) - goto out_bad_rec; - if (!xfs_verify_agbno(pag, - irec->rm_startblock + irec->rm_blockcount - 1)) + if (!xfs_verify_agbext(pag, irec->rm_startblock, + irec->rm_blockcount)) goto out_bad_rec; } diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index a20cade590e9..1eeecf2eb2a7 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -972,7 +972,9 @@ xfs_log_sb( */ if (xfs_has_lazysbcount(mp)) { mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount); - mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree); + mp->m_sb.sb_ifree = min_t(uint64_t, + percpu_counter_sum(&mp->m_ifree), + mp->m_sb.sb_icount); mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks); } diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 2c4ad6e4bb14..5b2f27cbdb80 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -422,7 +422,7 @@ xfs_calc_itruncate_reservation_minlogsize( /* * In renaming a files we can modify: - * the four inodes involved: 4 * inode size + * the five inodes involved: 5 * inode size * the two directory btrees: 2 * (max depth + v2) * dir block size * the two directory bmap btrees: 2 * max depth * block size * And the bmap_finish transaction can free dir and bmap blocks (two sets @@ -437,7 +437,7 @@ xfs_calc_rename_reservation( struct xfs_mount *mp) { return XFS_DQUOT_LOGRES(mp) + - max((xfs_calc_inode_res(mp, 4) + + max((xfs_calc_inode_res(mp, 5) + xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1))), (xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) + diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index a6b7d98cf68f..5ebdda7e1078 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -166,6 +166,36 @@ typedef struct xfs_bmbt_irec xfs_exntst_t br_state; /* extent state */ } xfs_bmbt_irec_t; +enum xfs_refc_domain { + XFS_REFC_DOMAIN_SHARED = 0, + XFS_REFC_DOMAIN_COW, +}; + +#define XFS_REFC_DOMAIN_STRINGS \ + { XFS_REFC_DOMAIN_SHARED, "shared" }, \ + { XFS_REFC_DOMAIN_COW, "cow" } + +struct xfs_refcount_irec { + xfs_agblock_t rc_startblock; /* starting block number */ + xfs_extlen_t rc_blockcount; /* count of free blocks */ + xfs_nlink_t rc_refcount; /* number of inodes linked here */ + enum xfs_refc_domain rc_domain; /* shared or cow staging extent? */ +}; + +#define XFS_RMAP_ATTR_FORK (1 << 0) +#define XFS_RMAP_BMBT_BLOCK (1 << 1) +#define XFS_RMAP_UNWRITTEN (1 << 2) +#define XFS_RMAP_KEY_FLAGS (XFS_RMAP_ATTR_FORK | \ + XFS_RMAP_BMBT_BLOCK) +#define XFS_RMAP_REC_FLAGS (XFS_RMAP_UNWRITTEN) +struct xfs_rmap_irec { + xfs_agblock_t rm_startblock; /* extent start block */ + xfs_extlen_t rm_blockcount; /* extent length */ + uint64_t rm_owner; /* extent owner */ + uint64_t rm_offset; /* offset within the owner */ + unsigned int rm_flags; /* state flags */ +}; + /* per-AG block reservation types */ enum xfs_ag_resv_type { XFS_AG_RESV_NONE = 0, diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c index b7b838bd4ba4..4dd52b15f09c 100644 --- a/fs/xfs/scrub/agheader.c +++ b/fs/xfs/scrub/agheader.c @@ -609,9 +609,16 @@ out: /* AGFL */ struct xchk_agfl_info { - unsigned int sz_entries; + /* Number of AGFL entries that the AGF claims are in use. */ + unsigned int agflcount; + + /* Number of AGFL entries that we found. */ unsigned int nr_entries; + + /* Buffer to hold AGFL entries for extent checking. */ xfs_agblock_t *entries; + + struct xfs_buf *agfl_bp; struct xfs_scrub *sc; }; @@ -641,10 +648,10 @@ xchk_agfl_block( struct xfs_scrub *sc = sai->sc; if (xfs_verify_agbno(sc->sa.pag, agbno) && - sai->nr_entries < sai->sz_entries) + sai->nr_entries < sai->agflcount) sai->entries[sai->nr_entries++] = agbno; else - xchk_block_set_corrupt(sc, sc->sa.agfl_bp); + xchk_block_set_corrupt(sc, sai->agfl_bp); xchk_agfl_block_xref(sc, agbno); @@ -696,19 +703,26 @@ int xchk_agfl( struct xfs_scrub *sc) { - struct xchk_agfl_info sai; + struct xchk_agfl_info sai = { + .sc = sc, + }; struct xfs_agf *agf; xfs_agnumber_t agno = sc->sm->sm_agno; - unsigned int agflcount; unsigned int i; int error; + /* Lock the AGF and AGI so that nobody can touch this AG. */ error = xchk_ag_read_headers(sc, agno, &sc->sa); if (!xchk_process_error(sc, agno, XFS_AGFL_BLOCK(sc->mp), &error)) - goto out; + return error; if (!sc->sa.agf_bp) return -EFSCORRUPTED; - xchk_buffer_recheck(sc, sc->sa.agfl_bp); + + /* Try to read the AGFL, and verify its structure if we get it. */ + error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &sai.agfl_bp); + if (!xchk_process_error(sc, agno, XFS_AGFL_BLOCK(sc->mp), &error)) + return error; + xchk_buffer_recheck(sc, sai.agfl_bp); xchk_agfl_xref(sc); @@ -717,24 +731,21 @@ xchk_agfl( /* Allocate buffer to ensure uniqueness of AGFL entries. */ agf = sc->sa.agf_bp->b_addr; - agflcount = be32_to_cpu(agf->agf_flcount); - if (agflcount > xfs_agfl_size(sc->mp)) { + sai.agflcount = be32_to_cpu(agf->agf_flcount); + if (sai.agflcount > xfs_agfl_size(sc->mp)) { xchk_block_set_corrupt(sc, sc->sa.agf_bp); goto out; } - memset(&sai, 0, sizeof(sai)); - sai.sc = sc; - sai.sz_entries = agflcount; - sai.entries = kmem_zalloc(sizeof(xfs_agblock_t) * agflcount, - KM_MAYFAIL); + sai.entries = kvcalloc(sai.agflcount, sizeof(xfs_agblock_t), + XCHK_GFP_FLAGS); if (!sai.entries) { error = -ENOMEM; goto out; } /* Check the blocks in the AGFL. */ - error = xfs_agfl_walk(sc->mp, sc->sa.agf_bp->b_addr, - sc->sa.agfl_bp, xchk_agfl_block, &sai); + error = xfs_agfl_walk(sc->mp, sc->sa.agf_bp->b_addr, sai.agfl_bp, + xchk_agfl_block, &sai); if (error == -ECANCELED) { error = 0; goto out_free; @@ -742,7 +753,7 @@ xchk_agfl( if (error) goto out_free; - if (agflcount != sai.nr_entries) { + if (sai.agflcount != sai.nr_entries) { xchk_block_set_corrupt(sc, sc->sa.agf_bp); goto out_free; } @@ -758,7 +769,7 @@ xchk_agfl( } out_free: - kmem_free(sai.entries); + kvfree(sai.entries); out: return error; } diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c index 1b0b4e243f77..d75d82151eeb 100644 --- a/fs/xfs/scrub/agheader_repair.c +++ b/fs/xfs/scrub/agheader_repair.c @@ -442,12 +442,18 @@ out_revert: /* AGFL */ struct xrep_agfl { + /* Bitmap of alleged AGFL blocks that we're not going to add. */ + struct xbitmap crossed; + /* Bitmap of other OWN_AG metadata blocks. */ struct xbitmap agmetablocks; /* Bitmap of free space. */ struct xbitmap *freesp; + /* rmapbt cursor for finding crosslinked blocks */ + struct xfs_btree_cur *rmap_cur; + struct xfs_scrub *sc; }; @@ -477,6 +483,41 @@ xrep_agfl_walk_rmap( return xbitmap_set_btcur_path(&ra->agmetablocks, cur); } +/* Strike out the blocks that are cross-linked according to the rmapbt. */ +STATIC int +xrep_agfl_check_extent( + struct xrep_agfl *ra, + uint64_t start, + uint64_t len) +{ + xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(ra->sc->mp, start); + xfs_agblock_t last_agbno = agbno + len - 1; + int error; + + ASSERT(XFS_FSB_TO_AGNO(ra->sc->mp, start) == ra->sc->sa.pag->pag_agno); + + while (agbno <= last_agbno) { + bool other_owners; + + error = xfs_rmap_has_other_keys(ra->rmap_cur, agbno, 1, + &XFS_RMAP_OINFO_AG, &other_owners); + if (error) + return error; + + if (other_owners) { + error = xbitmap_set(&ra->crossed, agbno, 1); + if (error) + return error; + } + + if (xchk_should_terminate(ra->sc, &error)) + return error; + agbno++; + } + + return 0; +} + /* * Map out all the non-AGFL OWN_AG space in this AG so that we can deduce * which blocks belong to the AGFL. @@ -496,44 +537,58 @@ xrep_agfl_collect_blocks( struct xrep_agfl ra; struct xfs_mount *mp = sc->mp; struct xfs_btree_cur *cur; + struct xbitmap_range *br, *n; int error; ra.sc = sc; ra.freesp = agfl_extents; xbitmap_init(&ra.agmetablocks); + xbitmap_init(&ra.crossed); /* Find all space used by the free space btrees & rmapbt. */ cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.pag); error = xfs_rmap_query_all(cur, xrep_agfl_walk_rmap, &ra); - if (error) - goto err; xfs_btree_del_cursor(cur, error); + if (error) + goto out_bmp; /* Find all blocks currently being used by the bnobt. */ cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.pag, XFS_BTNUM_BNO); error = xbitmap_set_btblocks(&ra.agmetablocks, cur); - if (error) - goto err; xfs_btree_del_cursor(cur, error); + if (error) + goto out_bmp; /* Find all blocks currently being used by the cntbt. */ cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.pag, XFS_BTNUM_CNT); error = xbitmap_set_btblocks(&ra.agmetablocks, cur); - if (error) - goto err; - xfs_btree_del_cursor(cur, error); + if (error) + goto out_bmp; /* * Drop the freesp meta blocks that are in use by btrees. * The remaining blocks /should/ be AGFL blocks. */ error = xbitmap_disunion(agfl_extents, &ra.agmetablocks); - xbitmap_destroy(&ra.agmetablocks); if (error) - return error; + goto out_bmp; + + /* Strike out the blocks that are cross-linked. */ + ra.rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.pag); + for_each_xbitmap_extent(br, n, agfl_extents) { + error = xrep_agfl_check_extent(&ra, br->start, br->len); + if (error) + break; + } + xfs_btree_del_cursor(ra.rmap_cur, error); + if (error) + goto out_bmp; + error = xbitmap_disunion(agfl_extents, &ra.crossed); + if (error) + goto out_bmp; /* * Calculate the new AGFL size. If we found more blocks than fit in @@ -541,11 +596,10 @@ xrep_agfl_collect_blocks( */ *flcount = min_t(uint64_t, xbitmap_hweight(agfl_extents), xfs_agfl_size(mp)); - return 0; -err: +out_bmp: + xbitmap_destroy(&ra.crossed); xbitmap_destroy(&ra.agmetablocks); - xfs_btree_del_cursor(cur, error); return error; } @@ -631,7 +685,7 @@ xrep_agfl_init_header( if (br->len) break; list_del(&br->list); - kmem_free(br); + kfree(br); } /* Write new AGFL to disk. */ @@ -697,7 +751,6 @@ xrep_agfl( * freespace overflow to the freespace btrees. */ sc->sa.agf_bp = agf_bp; - sc->sa.agfl_bp = agfl_bp; error = xrep_roll_ag_trans(sc); if (error) goto err; diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c index ab427b4d7fe0..3b38f4e2a537 100644 --- a/fs/xfs/scrub/alloc.c +++ b/fs/xfs/scrub/alloc.c @@ -100,9 +100,7 @@ xchk_allocbt_rec( bno = be32_to_cpu(rec->alloc.ar_startblock); len = be32_to_cpu(rec->alloc.ar_blockcount); - if (bno + len <= bno || - !xfs_verify_agbno(pag, bno) || - !xfs_verify_agbno(pag, bno + len - 1)) + if (!xfs_verify_agbext(pag, bno, len)) xchk_btree_set_corrupt(bs->sc, bs->cur, 0); xchk_allocbt_xref(bs->sc, bno, len); diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c index b6f0c9f3f124..31529b9bf389 100644 --- a/fs/xfs/scrub/attr.c +++ b/fs/xfs/scrub/attr.c @@ -49,7 +49,7 @@ xchk_setup_xattr_buf( if (ab) { if (sz <= ab->sz) return 0; - kmem_free(ab); + kvfree(ab); sc->buf = NULL; } @@ -79,7 +79,8 @@ xchk_setup_xattr( * without the inode lock held, which means we can sleep. */ if (sc->flags & XCHK_TRY_HARDER) { - error = xchk_setup_xattr_buf(sc, XATTR_SIZE_MAX, GFP_KERNEL); + error = xchk_setup_xattr_buf(sc, XATTR_SIZE_MAX, + XCHK_GFP_FLAGS); if (error) return error; } @@ -138,8 +139,7 @@ xchk_xattr_listent( * doesn't work, we overload the seen_enough variable to convey * the error message back to the main scrub function. */ - error = xchk_setup_xattr_buf(sx->sc, valuelen, - GFP_KERNEL | __GFP_RETRY_MAYFAIL); + error = xchk_setup_xattr_buf(sx->sc, valuelen, XCHK_GFP_FLAGS); if (error == -ENOMEM) error = -EDEADLOCK; if (error) { @@ -324,8 +324,7 @@ xchk_xattr_block( return 0; /* Allocate memory for block usage checking. */ - error = xchk_setup_xattr_buf(ds->sc, 0, - GFP_KERNEL | __GFP_RETRY_MAYFAIL); + error = xchk_setup_xattr_buf(ds->sc, 0, XCHK_GFP_FLAGS); if (error == -ENOMEM) return -EDEADLOCK; if (error) diff --git a/fs/xfs/scrub/bitmap.c b/fs/xfs/scrub/bitmap.c index b89bf9de9b1c..a255f09e9f0a 100644 --- a/fs/xfs/scrub/bitmap.c +++ b/fs/xfs/scrub/bitmap.c @@ -10,6 +10,7 @@ #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_btree.h" +#include "scrub/scrub.h" #include "scrub/bitmap.h" /* @@ -25,7 +26,7 @@ xbitmap_set( { struct xbitmap_range *bmr; - bmr = kmem_alloc(sizeof(struct xbitmap_range), KM_MAYFAIL); + bmr = kmalloc(sizeof(struct xbitmap_range), XCHK_GFP_FLAGS); if (!bmr) return -ENOMEM; @@ -47,7 +48,7 @@ xbitmap_destroy( for_each_xbitmap_extent(bmr, n, bitmap) { list_del(&bmr->list); - kmem_free(bmr); + kfree(bmr); } } @@ -174,15 +175,15 @@ xbitmap_disunion( /* Total overlap, just delete ex. */ lp = lp->next; list_del(&br->list); - kmem_free(br); + kfree(br); break; case 0: /* * Deleting from the middle: add the new right extent * and then shrink the left extent. */ - new_br = kmem_alloc(sizeof(struct xbitmap_range), - KM_MAYFAIL); + new_br = kmalloc(sizeof(struct xbitmap_range), + XCHK_GFP_FLAGS); if (!new_br) { error = -ENOMEM; goto out; diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index f0b9cb6506fd..d50d0eab196a 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -90,6 +90,7 @@ out: struct xchk_bmap_info { struct xfs_scrub *sc; + struct xfs_iext_cursor icur; xfs_fileoff_t lastoff; bool is_rt; bool is_shared; @@ -146,6 +147,48 @@ xchk_bmap_get_rmap( return has_rmap; } +static inline bool +xchk_bmap_has_prev( + struct xchk_bmap_info *info, + struct xfs_bmbt_irec *irec) +{ + struct xfs_bmbt_irec got; + struct xfs_ifork *ifp; + + ifp = xfs_ifork_ptr(info->sc->ip, info->whichfork); + + if (!xfs_iext_peek_prev_extent(ifp, &info->icur, &got)) + return false; + if (got.br_startoff + got.br_blockcount != irec->br_startoff) + return false; + if (got.br_startblock + got.br_blockcount != irec->br_startblock) + return false; + if (got.br_state != irec->br_state) + return false; + return true; +} + +static inline bool +xchk_bmap_has_next( + struct xchk_bmap_info *info, + struct xfs_bmbt_irec *irec) +{ + struct xfs_bmbt_irec got; + struct xfs_ifork *ifp; + + ifp = xfs_ifork_ptr(info->sc->ip, info->whichfork); + + if (!xfs_iext_peek_next_extent(ifp, &info->icur, &got)) + return false; + if (irec->br_startoff + irec->br_blockcount != got.br_startoff) + return false; + if (irec->br_startblock + irec->br_blockcount != got.br_startblock) + return false; + if (got.br_state != irec->br_state) + return false; + return true; +} + /* Make sure that we have rmapbt records for this extent. */ STATIC void xchk_bmap_xref_rmap( @@ -214,6 +257,34 @@ xchk_bmap_xref_rmap( if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK) xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, irec->br_startoff); + + /* + * If the rmap starts before this bmbt record, make sure there's a bmbt + * record for the previous offset that is contiguous with this mapping. + * Skip this for CoW fork extents because the refcount btree (and not + * the inode) is the ondisk owner for those extents. + */ + if (info->whichfork != XFS_COW_FORK && rmap.rm_startblock < agbno && + !xchk_bmap_has_prev(info, irec)) { + xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); + return; + } + + /* + * If the rmap ends after this bmbt record, make sure there's a bmbt + * record for the next offset that is contiguous with this mapping. + * Skip this for CoW fork extents because the refcount btree (and not + * the inode) is the ondisk owner for those extents. + */ + rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount; + if (info->whichfork != XFS_COW_FORK && + rmap_end > agbno + irec->br_blockcount && + !xchk_bmap_has_next(info, irec)) { + xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); + return; + } } /* Cross-reference a single rtdev extent record. */ @@ -264,6 +335,8 @@ xchk_bmap_iextent_xref( case XFS_COW_FORK: xchk_xref_is_cow_staging(info->sc, agbno, irec->br_blockcount); + xchk_xref_is_not_shared(info->sc, agbno, + irec->br_blockcount); break; } @@ -297,14 +370,13 @@ xchk_bmap_dirattr_extent( } /* Scrub a single extent record. */ -STATIC int +STATIC void xchk_bmap_iextent( struct xfs_inode *ip, struct xchk_bmap_info *info, struct xfs_bmbt_irec *irec) { struct xfs_mount *mp = info->sc->mp; - int error = 0; /* * Check for out-of-order extents. This record could have come @@ -325,14 +397,6 @@ xchk_bmap_iextent( xchk_fblock_set_corrupt(info->sc, info->whichfork, irec->br_startoff); - /* - * Check for delalloc extents. We never iterate the ones in the - * in-core extent scan, and we should never see these in the bmbt. - */ - if (isnullstartblock(irec->br_startblock)) - xchk_fblock_set_corrupt(info->sc, info->whichfork, - irec->br_startoff); - /* Make sure the extent points to a valid place. */ if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN) xchk_fblock_set_corrupt(info->sc, info->whichfork, @@ -353,15 +417,12 @@ xchk_bmap_iextent( irec->br_startoff); if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) - return 0; + return; if (info->is_rt) xchk_bmap_rt_iextent_xref(ip, info, irec); else xchk_bmap_iextent_xref(ip, info, irec); - - info->lastoff = irec->br_startoff + irec->br_blockcount; - return error; } /* Scrub a bmbt record. */ @@ -599,14 +660,41 @@ xchk_bmap_check_rmaps( for_each_perag(sc->mp, agno, pag) { error = xchk_bmap_check_ag_rmaps(sc, whichfork, pag); - if (error) - break; - if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) - break; + if (error || + (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) { + xfs_perag_put(pag); + return error; + } } - if (pag) - xfs_perag_put(pag); - return error; + + return 0; +} + +/* Scrub a delalloc reservation from the incore extent map tree. */ +STATIC void +xchk_bmap_iextent_delalloc( + struct xfs_inode *ip, + struct xchk_bmap_info *info, + struct xfs_bmbt_irec *irec) +{ + struct xfs_mount *mp = info->sc->mp; + + /* + * Check for out-of-order extents. This record could have come + * from the incore list, for which there is no ordering check. + */ + if (irec->br_startoff < info->lastoff) + xchk_fblock_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); + + if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount)) + xchk_fblock_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); + + /* Make sure the extent points to a valid place. */ + if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN) + xchk_fblock_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); } /* @@ -626,7 +714,6 @@ xchk_bmap( struct xfs_inode *ip = sc->ip; struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); xfs_fileoff_t endoff; - struct xfs_iext_cursor icur; int error = 0; /* Non-existent forks can be ignored. */ @@ -661,6 +748,8 @@ xchk_bmap( case XFS_DINODE_FMT_DEV: case XFS_DINODE_FMT_LOCAL: /* No mappings to check. */ + if (whichfork == XFS_COW_FORK) + xchk_fblock_set_corrupt(sc, whichfork, 0); goto out; case XFS_DINODE_FMT_EXTENTS: break; @@ -690,20 +779,22 @@ xchk_bmap( /* Scrub extent records. */ info.lastoff = 0; ifp = xfs_ifork_ptr(ip, whichfork); - for_each_xfs_iext(ifp, &icur, &irec) { + for_each_xfs_iext(ifp, &info.icur, &irec) { if (xchk_should_terminate(sc, &error) || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) goto out; - if (isnullstartblock(irec.br_startblock)) - continue; + if (irec.br_startoff >= endoff) { xchk_fblock_set_corrupt(sc, whichfork, irec.br_startoff); goto out; } - error = xchk_bmap_iextent(ip, &info, &irec); - if (error) - goto out; + + if (isnullstartblock(irec.br_startblock)) + xchk_bmap_iextent_delalloc(ip, &info, &irec); + else + xchk_bmap_iextent(ip, &info, &irec); + info.lastoff = irec.br_startoff + irec.br_blockcount; } error = xchk_bmap_check_rmaps(sc, whichfork); diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c index 2f4519590dc1..0fd36d5b4646 100644 --- a/fs/xfs/scrub/btree.c +++ b/fs/xfs/scrub/btree.c @@ -408,7 +408,6 @@ xchk_btree_check_owner( struct xfs_buf *bp) { struct xfs_btree_cur *cur = bs->cur; - struct check_owner *co; /* * In theory, xfs_btree_get_block should only give us a null buffer @@ -431,10 +430,13 @@ xchk_btree_check_owner( * later scanning. */ if (cur->bc_btnum == XFS_BTNUM_BNO || cur->bc_btnum == XFS_BTNUM_RMAP) { - co = kmem_alloc(sizeof(struct check_owner), - KM_MAYFAIL); + struct check_owner *co; + + co = kmalloc(sizeof(struct check_owner), XCHK_GFP_FLAGS); if (!co) return -ENOMEM; + + INIT_LIST_HEAD(&co->list); co->level = level; co->daddr = xfs_buf_daddr(bp); list_add_tail(&co->list, &bs->to_check); @@ -649,7 +651,7 @@ xchk_btree( xchk_btree_set_corrupt(sc, cur, 0); return 0; } - bs = kmem_zalloc(cur_sz, KM_NOFS | KM_MAYFAIL); + bs = kzalloc(cur_sz, XCHK_GFP_FLAGS); if (!bs) return -ENOMEM; bs->cur = cur; @@ -740,9 +742,9 @@ out: error = xchk_btree_check_block_owner(bs, co->level, co->daddr); list_del(&co->list); - kmem_free(co); + kfree(co); } - kmem_free(bs); + kfree(bs); return error; } diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 9bbbf20f401b..613260b04a3d 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -424,10 +424,6 @@ xchk_ag_read_headers( if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGF)) return error; - error = xfs_alloc_read_agfl(sa->pag, sc->tp, &sa->agfl_bp); - if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL)) - return error; - return 0; } @@ -515,10 +511,6 @@ xchk_ag_free( struct xchk_ag *sa) { xchk_ag_btcur_free(sa); - if (sa->agfl_bp) { - xfs_trans_brelse(sc->tp, sa->agfl_bp); - sa->agfl_bp = NULL; - } if (sa->agf_bp) { xfs_trans_brelse(sc->tp, sa->agf_bp); sa->agf_bp = NULL; @@ -789,6 +781,33 @@ xchk_buffer_recheck( trace_xchk_block_error(sc, xfs_buf_daddr(bp), fa); } +static inline int +xchk_metadata_inode_subtype( + struct xfs_scrub *sc, + unsigned int scrub_type) +{ + __u32 smtype = sc->sm->sm_type; + int error; + + sc->sm->sm_type = scrub_type; + + switch (scrub_type) { + case XFS_SCRUB_TYPE_INODE: + error = xchk_inode(sc); + break; + case XFS_SCRUB_TYPE_BMBTD: + error = xchk_bmap_data(sc); + break; + default: + ASSERT(0); + error = -EFSCORRUPTED; + break; + } + + sc->sm->sm_type = smtype; + return error; +} + /* * Scrub the attr/data forks of a metadata inode. The metadata inode must be * pointed to by sc->ip and the ILOCK must be held. @@ -797,13 +816,17 @@ int xchk_metadata_inode_forks( struct xfs_scrub *sc) { - __u32 smtype; bool shared; int error; if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return 0; + /* Check the inode record. */ + error = xchk_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_INODE); + if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) + return error; + /* Metadata inodes don't live on the rt device. */ if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME) { xchk_ino_set_corrupt(sc, sc->ip->i_ino); @@ -823,10 +846,7 @@ xchk_metadata_inode_forks( } /* Invoke the data fork scrubber. */ - smtype = sc->sm->sm_type; - sc->sm->sm_type = XFS_SCRUB_TYPE_BMBTD; - error = xchk_bmap_data(sc); - sc->sm->sm_type = smtype; + error = xchk_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTD); if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) return error; @@ -841,7 +861,7 @@ xchk_metadata_inode_forks( xchk_ino_set_corrupt(sc, sc->ip->i_ino); } - return error; + return 0; } /* diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index 454145db10e7..b73648d81d23 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -25,7 +25,7 @@ xchk_should_terminate( if (fatal_signal_pending(current)) { if (*error == 0) - *error = -EAGAIN; + *error = -EINTR; return true; } return false; diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c index 84fe3d33d699..d17cee177085 100644 --- a/fs/xfs/scrub/dabtree.c +++ b/fs/xfs/scrub/dabtree.c @@ -486,7 +486,7 @@ xchk_da_btree( return 0; /* Set up initial da state. */ - ds = kmem_zalloc(sizeof(struct xchk_da_btree), KM_NOFS | KM_MAYFAIL); + ds = kzalloc(sizeof(struct xchk_da_btree), XCHK_GFP_FLAGS); if (!ds) return -ENOMEM; ds->dargs.dp = sc->ip; @@ -591,6 +591,6 @@ out: out_state: xfs_da_state_free(ds->state); - kmem_free(ds); + kfree(ds); return error; } diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c index 5c87800ab223..d1b0f23c2c59 100644 --- a/fs/xfs/scrub/dir.c +++ b/fs/xfs/scrub/dir.c @@ -666,7 +666,12 @@ xchk_directory_blocks( struct xfs_scrub *sc) { struct xfs_bmbt_irec got; - struct xfs_da_args args; + struct xfs_da_args args = { + .dp = sc ->ip, + .whichfork = XFS_DATA_FORK, + .geo = sc->mp->m_dir_geo, + .trans = sc->tp, + }; struct xfs_ifork *ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK); struct xfs_mount *mp = sc->mp; xfs_fileoff_t leaf_lblk; @@ -689,9 +694,6 @@ xchk_directory_blocks( free_lblk = XFS_B_TO_FSB(mp, XFS_DIR2_FREE_OFFSET); /* Is this a block dir? */ - args.dp = sc->ip; - args.geo = mp->m_dir_geo; - args.trans = sc->tp; error = xfs_dir2_isblock(&args, &is_block); if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) goto out; diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c index 6a6f8fe7f87c..4777e7b89fdc 100644 --- a/fs/xfs/scrub/fscounters.c +++ b/fs/xfs/scrub/fscounters.c @@ -14,6 +14,8 @@ #include "xfs_health.h" #include "xfs_btree.h" #include "xfs_ag.h" +#include "xfs_rtalloc.h" +#include "xfs_inode.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" @@ -43,6 +45,16 @@ * our tolerance for mismatch between expected and actual counter values. */ +struct xchk_fscounters { + struct xfs_scrub *sc; + uint64_t icount; + uint64_t ifree; + uint64_t fdblocks; + uint64_t frextents; + unsigned long long icount_min; + unsigned long long icount_max; +}; + /* * Since the expected value computation is lockless but only browses incore * values, the percpu counters should be fairly close to each other. However, @@ -116,10 +128,11 @@ xchk_setup_fscounters( struct xchk_fscounters *fsc; int error; - sc->buf = kmem_zalloc(sizeof(struct xchk_fscounters), 0); + sc->buf = kzalloc(sizeof(struct xchk_fscounters), XCHK_GFP_FLAGS); if (!sc->buf) return -ENOMEM; fsc = sc->buf; + fsc->sc = sc; xfs_icount_range(sc->mp, &fsc->icount_min, &fsc->icount_max); @@ -138,6 +151,18 @@ xchk_setup_fscounters( return xchk_trans_alloc(sc, 0); } +/* + * Part 1: Collecting filesystem summary counts. For each AG, we add its + * summary counts (total inodes, free inodes, free data blocks) to an incore + * copy of the overall filesystem summary counts. + * + * To avoid false corruption reports in part 2, any failure in this part must + * set the INCOMPLETE flag even when a negative errno is returned. This care + * must be taken with certain errno values (i.e. EFSBADCRC, EFSCORRUPTED, + * ECANCELED) that are absorbed into a scrub state flag update by + * xchk_*_process_error. + */ + /* Count free space btree blocks manually for pre-lazysbcount filesystems. */ static int xchk_fscount_btreeblks( @@ -225,8 +250,10 @@ retry: } if (pag) xfs_perag_put(pag); - if (error) + if (error) { + xchk_set_incomplete(sc); return error; + } /* * The global incore space reservation is taken from the incore @@ -267,6 +294,64 @@ retry: return 0; } +#ifdef CONFIG_XFS_RT +STATIC int +xchk_fscount_add_frextent( + struct xfs_mount *mp, + struct xfs_trans *tp, + const struct xfs_rtalloc_rec *rec, + void *priv) +{ + struct xchk_fscounters *fsc = priv; + int error = 0; + + fsc->frextents += rec->ar_extcount; + + xchk_should_terminate(fsc->sc, &error); + return error; +} + +/* Calculate the number of free realtime extents from the realtime bitmap. */ +STATIC int +xchk_fscount_count_frextents( + struct xfs_scrub *sc, + struct xchk_fscounters *fsc) +{ + struct xfs_mount *mp = sc->mp; + int error; + + fsc->frextents = 0; + if (!xfs_has_realtime(mp)) + return 0; + + xfs_ilock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); + error = xfs_rtalloc_query_all(sc->mp, sc->tp, + xchk_fscount_add_frextent, fsc); + if (error) { + xchk_set_incomplete(sc); + goto out_unlock; + } + +out_unlock: + xfs_iunlock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); + return error; +} +#else +STATIC int +xchk_fscount_count_frextents( + struct xfs_scrub *sc, + struct xchk_fscounters *fsc) +{ + fsc->frextents = 0; + return 0; +} +#endif /* CONFIG_XFS_RT */ + +/* + * Part 2: Comparing filesystem summary counters. All we have to do here is + * sum the percpu counters and compare them to what we've observed. + */ + /* * Is the @counter reasonably close to the @expected value? * @@ -333,16 +418,17 @@ xchk_fscounters( { struct xfs_mount *mp = sc->mp; struct xchk_fscounters *fsc = sc->buf; - int64_t icount, ifree, fdblocks; + int64_t icount, ifree, fdblocks, frextents; int error; /* Snapshot the percpu counters. */ icount = percpu_counter_sum(&mp->m_icount); ifree = percpu_counter_sum(&mp->m_ifree); fdblocks = percpu_counter_sum(&mp->m_fdblocks); + frextents = percpu_counter_sum(&mp->m_frextents); /* No negative values, please! */ - if (icount < 0 || ifree < 0 || fdblocks < 0) + if (icount < 0 || ifree < 0 || fdblocks < 0 || frextents < 0) xchk_set_corrupt(sc); /* See if icount is obviously wrong. */ @@ -353,6 +439,10 @@ xchk_fscounters( if (fdblocks > mp->m_sb.sb_dblocks) xchk_set_corrupt(sc); + /* See if frextents is obviously wrong. */ + if (frextents > mp->m_sb.sb_rextents) + xchk_set_corrupt(sc); + /* * If ifree exceeds icount by more than the minimum variance then * something's probably wrong with the counters. @@ -367,6 +457,13 @@ xchk_fscounters( if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE) return 0; + /* Count the free extents counter for rt volumes. */ + error = xchk_fscount_count_frextents(sc, fsc); + if (!xchk_process_error(sc, 0, XFS_SB_BLOCK(mp), &error)) + return error; + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE) + return 0; + /* Compare the in-core counters with whatever we counted. */ if (!xchk_fscount_within_range(sc, icount, &mp->m_icount, fsc->icount)) xchk_set_corrupt(sc); @@ -378,5 +475,9 @@ xchk_fscounters( fsc->fdblocks)) xchk_set_corrupt(sc); + if (!xchk_fscount_within_range(sc, frextents, &mp->m_frextents, + fsc->frextents)) + xchk_set_corrupt(sc); + return 0; } diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c index e1026e07bf94..e312be7cd375 100644 --- a/fs/xfs/scrub/ialloc.c +++ b/fs/xfs/scrub/ialloc.c @@ -108,9 +108,8 @@ xchk_iallocbt_chunk( xfs_agblock_t bno; bno = XFS_AGINO_TO_AGBNO(mp, agino); - if (bno + len <= bno || - !xfs_verify_agbno(pag, bno) || - !xfs_verify_agbno(pag, bno + len - 1)) + + if (!xfs_verify_agbext(pag, bno, len)) xchk_btree_set_corrupt(bs->sc, bs->cur, 0); xchk_iallocbt_chunk_xref(bs->sc, irec, agino, bno, len); diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index 51820b40ab1c..7a2f38e5202c 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -365,7 +365,7 @@ xchk_dinode( * pagecache can't cache all the blocks in this file due to * overly large offsets, flag the inode for admin review. */ - if (isize >= mp->m_super->s_maxbytes) + if (isize > mp->m_super->s_maxbytes) xchk_ino_set_warning(sc, ino); /* di_nblocks */ diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c index 21b4c9006859..9eeac8565394 100644 --- a/fs/xfs/scrub/quota.c +++ b/fs/xfs/scrub/quota.c @@ -14,6 +14,7 @@ #include "xfs_inode.h" #include "xfs_quota.h" #include "xfs_qm.h" +#include "xfs_bmap.h" #include "scrub/scrub.h" #include "scrub/common.h" @@ -84,7 +85,7 @@ xchk_quota_item( int error = 0; if (xchk_should_terminate(sc, &error)) - return -ECANCELED; + return error; /* * Except for the root dquot, the actual dquot we got must either have @@ -189,11 +190,12 @@ xchk_quota_data_fork( for_each_xfs_iext(ifp, &icur, &irec) { if (xchk_should_terminate(sc, &error)) break; + /* - * delalloc extents or blocks mapped above the highest + * delalloc/unwritten extents or blocks mapped above the highest * quota id shouldn't happen. */ - if (isnullstartblock(irec.br_startblock) || + if (!xfs_bmap_is_written_extent(&irec) || irec.br_startoff > max_dqid_off || irec.br_startoff + irec.br_blockcount - 1 > max_dqid_off) { xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index c68b767dc08f..d9c1b3cea4a5 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -127,8 +127,8 @@ xchk_refcountbt_rmap_check( * is healthy each rmap_irec we see will be in agbno order * so we don't need insertion sort here. */ - frag = kmem_alloc(sizeof(struct xchk_refcnt_frag), - KM_MAYFAIL); + frag = kmalloc(sizeof(struct xchk_refcnt_frag), + XCHK_GFP_FLAGS); if (!frag) return -ENOMEM; memcpy(&frag->rm, rec, sizeof(frag->rm)); @@ -215,7 +215,7 @@ xchk_refcountbt_process_rmap_fragments( continue; } list_del(&frag->list); - kmem_free(frag); + kfree(frag); nr++; } @@ -257,11 +257,11 @@ done: /* Delete fragments and work list. */ list_for_each_entry_safe(frag, n, &worklist, list) { list_del(&frag->list); - kmem_free(frag); + kfree(frag); } list_for_each_entry_safe(frag, n, &refchk->fragments, list) { list_del(&frag->list); - kmem_free(frag); + kfree(frag); } } @@ -269,15 +269,13 @@ done: STATIC void xchk_refcountbt_xref_rmap( struct xfs_scrub *sc, - xfs_agblock_t bno, - xfs_extlen_t len, - xfs_nlink_t refcount) + const struct xfs_refcount_irec *irec) { struct xchk_refcnt_check refchk = { - .sc = sc, - .bno = bno, - .len = len, - .refcount = refcount, + .sc = sc, + .bno = irec->rc_startblock, + .len = irec->rc_blockcount, + .refcount = irec->rc_refcount, .seen = 0, }; struct xfs_rmap_irec low; @@ -291,9 +289,9 @@ xchk_refcountbt_xref_rmap( /* Cross-reference with the rmapbt to confirm the refcount. */ memset(&low, 0, sizeof(low)); - low.rm_startblock = bno; + low.rm_startblock = irec->rc_startblock; memset(&high, 0xFF, sizeof(high)); - high.rm_startblock = bno + len - 1; + high.rm_startblock = irec->rc_startblock + irec->rc_blockcount - 1; INIT_LIST_HEAD(&refchk.fragments); error = xfs_rmap_query_range(sc->sa.rmap_cur, &low, &high, @@ -302,30 +300,29 @@ xchk_refcountbt_xref_rmap( goto out_free; xchk_refcountbt_process_rmap_fragments(&refchk); - if (refcount != refchk.seen) + if (irec->rc_refcount != refchk.seen) xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); out_free: list_for_each_entry_safe(frag, n, &refchk.fragments, list) { list_del(&frag->list); - kmem_free(frag); + kfree(frag); } } /* Cross-reference with the other btrees. */ STATIC void xchk_refcountbt_xref( - struct xfs_scrub *sc, - xfs_agblock_t agbno, - xfs_extlen_t len, - xfs_nlink_t refcount) + struct xfs_scrub *sc, + const struct xfs_refcount_irec *irec) { if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return; - xchk_xref_is_used_space(sc, agbno, len); - xchk_xref_is_not_inode_chunk(sc, agbno, len); - xchk_refcountbt_xref_rmap(sc, agbno, len, refcount); + xchk_xref_is_used_space(sc, irec->rc_startblock, irec->rc_blockcount); + xchk_xref_is_not_inode_chunk(sc, irec->rc_startblock, + irec->rc_blockcount); + xchk_refcountbt_xref_rmap(sc, irec); } /* Scrub a refcountbt record. */ @@ -334,35 +331,27 @@ xchk_refcountbt_rec( struct xchk_btree *bs, const union xfs_btree_rec *rec) { + struct xfs_refcount_irec irec; xfs_agblock_t *cow_blocks = bs->private; struct xfs_perag *pag = bs->cur->bc_ag.pag; - xfs_agblock_t bno; - xfs_extlen_t len; - xfs_nlink_t refcount; - bool has_cowflag; - bno = be32_to_cpu(rec->refc.rc_startblock); - len = be32_to_cpu(rec->refc.rc_blockcount); - refcount = be32_to_cpu(rec->refc.rc_refcount); + xfs_refcount_btrec_to_irec(rec, &irec); - /* Only CoW records can have refcount == 1. */ - has_cowflag = (bno & XFS_REFC_COW_START); - if ((refcount == 1 && !has_cowflag) || (refcount != 1 && has_cowflag)) + /* Check the domain and refcount are not incompatible. */ + if (!xfs_refcount_check_domain(&irec)) xchk_btree_set_corrupt(bs->sc, bs->cur, 0); - if (has_cowflag) - (*cow_blocks) += len; + + if (irec.rc_domain == XFS_REFC_DOMAIN_COW) + (*cow_blocks) += irec.rc_blockcount; /* Check the extent. */ - bno &= ~XFS_REFC_COW_START; - if (bno + len <= bno || - !xfs_verify_agbno(pag, bno) || - !xfs_verify_agbno(pag, bno + len - 1)) + if (!xfs_verify_agbext(pag, irec.rc_startblock, irec.rc_blockcount)) xchk_btree_set_corrupt(bs->sc, bs->cur, 0); - if (refcount == 0) + if (irec.rc_refcount == 0) xchk_btree_set_corrupt(bs->sc, bs->cur, 0); - xchk_refcountbt_xref(bs->sc, bno, len, refcount); + xchk_refcountbt_xref(bs->sc, &irec); return 0; } @@ -426,7 +415,6 @@ xchk_xref_is_cow_staging( xfs_extlen_t len) { struct xfs_refcount_irec rc; - bool has_cowflag; int has_refcount; int error; @@ -434,8 +422,8 @@ xchk_xref_is_cow_staging( return; /* Find the CoW staging extent. */ - error = xfs_refcount_lookup_le(sc->sa.refc_cur, - agbno + XFS_REFC_COW_START, &has_refcount); + error = xfs_refcount_lookup_le(sc->sa.refc_cur, XFS_REFC_DOMAIN_COW, + agbno, &has_refcount); if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur)) return; if (!has_refcount) { @@ -451,9 +439,8 @@ xchk_xref_is_cow_staging( return; } - /* CoW flag must be set, refcount must be 1. */ - has_cowflag = (rc.rc_startblock & XFS_REFC_COW_START); - if (!has_cowflag || rc.rc_refcount != 1) + /* CoW lookup returned a shared extent record? */ + if (rc.rc_domain != XFS_REFC_DOMAIN_COW) xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); /* Must be at least as long as what was passed in */ @@ -477,7 +464,8 @@ xchk_xref_is_not_shared( if (!sc->sa.refc_cur || xchk_skip_xref(sc->sm)) return; - error = xfs_refcount_has_record(sc->sa.refc_cur, agbno, len, &shared); + error = xfs_refcount_has_record(sc->sa.refc_cur, XFS_REFC_DOMAIN_SHARED, + agbno, len, &shared); if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur)) return; if (shared) diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index c18bd039fce9..4b92f9253ccd 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -61,7 +61,6 @@ xrep_attempt( sc->flags |= XREP_ALREADY_FIXED; return -EAGAIN; case -EDEADLOCK: - case -EAGAIN: /* Tell the caller to try again having grabbed all the locks. */ if (!(sc->flags & XCHK_TRY_HARDER)) { sc->flags |= XCHK_TRY_HARDER; @@ -70,10 +69,15 @@ xrep_attempt( /* * We tried harder but still couldn't grab all the resources * we needed to fix it. The corruption has not been fixed, - * so report back to userspace. + * so exit to userspace with the scan's output flags unchanged. */ - return -EFSCORRUPTED; + return 0; default: + /* + * EAGAIN tells the caller to re-scrub, so we cannot return + * that here. + */ + ASSERT(error != -EAGAIN); return error; } } @@ -121,32 +125,40 @@ xrep_roll_ag_trans( { int error; - /* Keep the AG header buffers locked so we can keep going. */ - if (sc->sa.agi_bp) + /* + * Keep the AG header buffers locked while we roll the transaction. + * Ensure that both AG buffers are dirty and held when we roll the + * transaction so that they move forward in the log without losing the + * bli (and hence the bli type) when the transaction commits. + * + * Normal code would never hold clean buffers across a roll, but repair + * needs both buffers to maintain a total lock on the AG. + */ + if (sc->sa.agi_bp) { + xfs_ialloc_log_agi(sc->tp, sc->sa.agi_bp, XFS_AGI_MAGICNUM); xfs_trans_bhold(sc->tp, sc->sa.agi_bp); - if (sc->sa.agf_bp) + } + + if (sc->sa.agf_bp) { + xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp, XFS_AGF_MAGICNUM); xfs_trans_bhold(sc->tp, sc->sa.agf_bp); - if (sc->sa.agfl_bp) - xfs_trans_bhold(sc->tp, sc->sa.agfl_bp); + } /* - * Roll the transaction. We still own the buffer and the buffer lock - * regardless of whether or not the roll succeeds. If the roll fails, - * the buffers will be released during teardown on our way out of the - * kernel. If it succeeds, we join them to the new transaction and - * move on. + * Roll the transaction. We still hold the AG header buffers locked + * regardless of whether or not that succeeds. On failure, the buffers + * will be released during teardown on our way out of the kernel. If + * successful, join the buffers to the new transaction and move on. */ error = xfs_trans_roll(&sc->tp); if (error) return error; - /* Join AG headers to the new transaction. */ + /* Join the AG headers to the new transaction. */ if (sc->sa.agi_bp) xfs_trans_bjoin(sc->tp, sc->sa.agi_bp); if (sc->sa.agf_bp) xfs_trans_bjoin(sc->tp, sc->sa.agf_bp); - if (sc->sa.agfl_bp) - xfs_trans_bjoin(sc->tp, sc->sa.agfl_bp); return 0; } @@ -498,6 +510,7 @@ xrep_put_freelist( struct xfs_scrub *sc, xfs_agblock_t agbno) { + struct xfs_buf *agfl_bp; int error; /* Make sure there's space on the freelist. */ @@ -516,8 +529,12 @@ xrep_put_freelist( return error; /* Put the block on the AGFL. */ + error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp); + if (error) + return error; + error = xfs_alloc_put_freelist(sc->sa.pag, sc->tp, sc->sa.agf_bp, - sc->sa.agfl_bp, agbno, 0); + agfl_bp, agbno, 0); if (error) return error; xfs_extent_busy_insert(sc->tp, sc->sa.pag, agbno, 1, diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 2e8e400f10a9..07a7a75f987f 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -174,7 +174,7 @@ xchk_teardown( if (sc->flags & XCHK_REAPING_DISABLED) xchk_start_reaping(sc); if (sc->buf) { - kmem_free(sc->buf); + kvfree(sc->buf); sc->buf = NULL; } return error; @@ -467,7 +467,7 @@ xfs_scrub_metadata( xfs_warn_mount(mp, XFS_OPSTATE_WARNED_SCRUB, "EXPERIMENTAL online scrub feature in use. Use at your own risk!"); - sc = kmem_zalloc(sizeof(struct xfs_scrub), KM_NOFS | KM_MAYFAIL); + sc = kzalloc(sizeof(struct xfs_scrub), XCHK_GFP_FLAGS); if (!sc) { error = -ENOMEM; goto out; @@ -557,7 +557,7 @@ out_nofix: out_teardown: error = xchk_teardown(sc, error); out_sc: - kmem_free(sc); + kfree(sc); out: trace_xchk_done(XFS_I(file_inode(file)), sm, error); if (error == -EFSCORRUPTED || error == -EFSBADCRC) { diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h index 3de5287e98d8..b4d391b4c938 100644 --- a/fs/xfs/scrub/scrub.h +++ b/fs/xfs/scrub/scrub.h @@ -8,6 +8,15 @@ struct xfs_scrub; +/* + * Standard flags for allocating memory within scrub. NOFS context is + * configured by the process allocation scope. Scrub and repair must be able + * to back out gracefully if there isn't enough memory. Force-cast to avoid + * complaints from static checkers. + */ +#define XCHK_GFP_FLAGS ((__force gfp_t)(GFP_KERNEL | __GFP_NOWARN | \ + __GFP_RETRY_MAYFAIL)) + /* Type info and names for the scrub types. */ enum xchk_type { ST_NONE = 1, /* disabled */ @@ -39,7 +48,6 @@ struct xchk_ag { /* AG btree roots */ struct xfs_buf *agf_bp; - struct xfs_buf *agfl_bp; struct xfs_buf *agi_bp; /* AG btrees */ @@ -161,12 +169,4 @@ void xchk_xref_is_used_rt_space(struct xfs_scrub *sc, xfs_rtblock_t rtbno, # define xchk_xref_is_used_rt_space(sc, rtbno, len) do { } while (0) #endif -struct xchk_fscounters { - uint64_t icount; - uint64_t ifree; - uint64_t fdblocks; - unsigned long long icount_min; - unsigned long long icount_max; -}; - #endif /* __XFS_SCRUB_SCRUB_H__ */ diff --git a/fs/xfs/scrub/symlink.c b/fs/xfs/scrub/symlink.c index 75311f8daeeb..c1c99ffe7408 100644 --- a/fs/xfs/scrub/symlink.c +++ b/fs/xfs/scrub/symlink.c @@ -21,7 +21,7 @@ xchk_setup_symlink( struct xfs_scrub *sc) { /* Allocate the buffer without the inode lock held. */ - sc->buf = kvzalloc(XFS_SYMLINK_MAXLEN + 1, GFP_KERNEL); + sc->buf = kvzalloc(XFS_SYMLINK_MAXLEN + 1, XCHK_GFP_FLAGS); if (!sc->buf) return -ENOMEM; diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index b744c62052b6..a05f44eb8178 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -242,12 +242,13 @@ xfs_acl_set_mode( } int -xfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, +xfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, struct posix_acl *acl, int type) { umode_t mode; bool set_mode = false; int error = 0; + struct inode *inode = d_inode(dentry); if (!acl) goto set_acl; diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index 263404d0bfda..dcd176149c7a 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h @@ -11,7 +11,7 @@ struct posix_acl; #ifdef CONFIG_XFS_POSIX_ACL extern struct posix_acl *xfs_get_acl(struct inode *inode, int type, bool rcu); -extern int xfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, +extern int xfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, struct posix_acl *acl, int type); extern int __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type); void xfs_forget_acl(struct inode *inode, const char *name); diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 5d1a995b15f8..41734202796f 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -17,6 +17,8 @@ #include "xfs_bmap.h" #include "xfs_bmap_util.h" #include "xfs_reflink.h" +#include "xfs_errortag.h" +#include "xfs_error.h" struct xfs_writepage_ctx { struct iomap_writepage_ctx ctx; @@ -114,9 +116,8 @@ xfs_end_ioend( if (unlikely(error)) { if (ioend->io_flags & IOMAP_F_SHARED) { xfs_reflink_cancel_cow_range(ip, offset, size, true); - xfs_bmap_punch_delalloc_range(ip, - XFS_B_TO_FSBT(mp, offset), - XFS_B_TO_FSB(mp, size)); + xfs_bmap_punch_delalloc_range(ip, offset, + offset + size); } goto done; } @@ -218,11 +219,17 @@ xfs_imap_valid( * checked (and found nothing at this offset) could have added * overlapping blocks. */ - if (XFS_WPC(wpc)->data_seq != READ_ONCE(ip->i_df.if_seq)) + if (XFS_WPC(wpc)->data_seq != READ_ONCE(ip->i_df.if_seq)) { + trace_xfs_wb_data_iomap_invalid(ip, &wpc->iomap, + XFS_WPC(wpc)->data_seq, XFS_DATA_FORK); return false; + } if (xfs_inode_has_cow_data(ip) && - XFS_WPC(wpc)->cow_seq != READ_ONCE(ip->i_cowfp->if_seq)) + XFS_WPC(wpc)->cow_seq != READ_ONCE(ip->i_cowfp->if_seq)) { + trace_xfs_wb_cow_iomap_invalid(ip, &wpc->iomap, + XFS_WPC(wpc)->cow_seq, XFS_COW_FORK); return false; + } return true; } @@ -286,6 +293,8 @@ xfs_map_blocks( if (xfs_is_shutdown(mp)) return -EIO; + XFS_ERRORTAG_DELAY(mp, XFS_ERRTAG_WB_DELAY_MS); + /* * COW fork blocks can overlap data fork blocks even if the blocks * aren't shared. COW I/O always takes precedent, so we must always @@ -373,7 +382,7 @@ retry: isnullstartblock(imap.br_startblock)) goto allocate_blocks; - xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0); + xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0, XFS_WPC(wpc)->data_seq); trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap); return 0; allocate_blocks: @@ -455,12 +464,8 @@ xfs_discard_folio( struct folio *folio, loff_t pos) { - struct inode *inode = folio->mapping->host; - struct xfs_inode *ip = XFS_I(inode); + struct xfs_inode *ip = XFS_I(folio->mapping->host); struct xfs_mount *mp = ip->i_mount; - size_t offset = offset_in_folio(folio, pos); - xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, pos); - xfs_fileoff_t pageoff_fsb = XFS_B_TO_FSBT(mp, offset); int error; if (xfs_is_shutdown(mp)) @@ -470,8 +475,9 @@ xfs_discard_folio( "page discard on page "PTR_FMT", inode 0x%llx, pos %llu.", folio, ip->i_ino, pos); - error = xfs_bmap_punch_delalloc_range(ip, start_fsb, - i_blocks_per_folio(inode, folio) - pageoff_fsb); + error = xfs_bmap_punch_delalloc_range(ip, pos, + round_up(pos, folio_size(folio))); + if (error && !xfs_is_shutdown(mp)) xfs_alert(mp, "page discard unable to remove delalloc mapping."); } diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index cf5ce607dc05..2788a6f2edcd 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -245,28 +245,6 @@ xfs_attri_init( return attrip; } -/* - * Copy an attr format buffer from the given buf, and into the destination attr - * format structure. - */ -STATIC int -xfs_attri_copy_format( - struct xfs_log_iovec *buf, - struct xfs_attri_log_format *dst_attr_fmt) -{ - struct xfs_attri_log_format *src_attr_fmt = buf->i_addr; - size_t len; - - len = sizeof(struct xfs_attri_log_format); - if (buf->i_len != len) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); - return -EFSCORRUPTED; - } - - memcpy((char *)dst_attr_fmt, (char *)src_attr_fmt, len); - return 0; -} - static inline struct xfs_attrd_log_item *ATTRD_ITEM(struct xfs_log_item *lip) { return container_of(lip, struct xfs_attrd_log_item, attrd_item); @@ -731,24 +709,50 @@ xlog_recover_attri_commit_pass2( struct xfs_attri_log_nameval *nv; const void *attr_value = NULL; const void *attr_name; - int error; + size_t len; attri_formatp = item->ri_buf[0].i_addr; attr_name = item->ri_buf[1].i_addr; /* Validate xfs_attri_log_format before the large memory allocation */ + len = sizeof(struct xfs_attri_log_format); + if (item->ri_buf[0].i_len != len) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + return -EFSCORRUPTED; + } + if (!xfs_attri_validate(mp, attri_formatp)) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + return -EFSCORRUPTED; + } + + /* Validate the attr name */ + if (item->ri_buf[1].i_len != + xlog_calc_iovec_len(attri_formatp->alfi_name_len)) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } if (!xfs_attr_namecheck(attr_name, attri_formatp->alfi_name_len)) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[1].i_addr, item->ri_buf[1].i_len); return -EFSCORRUPTED; } - if (attri_formatp->alfi_value_len) + /* Validate the attr value, if present */ + if (attri_formatp->alfi_value_len != 0) { + if (item->ri_buf[2].i_len != xlog_calc_iovec_len(attri_formatp->alfi_value_len)) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, + item->ri_buf[0].i_len); + return -EFSCORRUPTED; + } + attr_value = item->ri_buf[2].i_addr; + } /* * Memory alloc failure will cause replay to abort. We attach the @@ -760,9 +764,7 @@ xlog_recover_attri_commit_pass2( attri_formatp->alfi_value_len); attrip = xfs_attri_init(mp, nv); - error = xfs_attri_copy_format(&item->ri_buf[0], &attrip->attri_format); - if (error) - goto out; + memcpy(&attrip->attri_format, attri_formatp, len); /* * The ATTRI has two references. One for the ATTRD and one for ATTRI to @@ -774,10 +776,6 @@ xlog_recover_attri_commit_pass2( xfs_attri_release(attrip); xfs_attri_log_nameval_put(nv); return 0; -out: - xfs_attri_item_free(attrip); - xfs_attri_log_nameval_put(nv); - return error; } /* @@ -842,7 +840,8 @@ xlog_recover_attrd_commit_pass2( attrd_formatp = item->ri_buf[0].i_addr; if (item->ri_buf[0].i_len != sizeof(struct xfs_attrd_log_format)) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index 51f66e982484..41323da523d1 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -608,28 +608,18 @@ static const struct xfs_item_ops xfs_bui_item_ops = { .iop_relog = xfs_bui_item_relog, }; -/* - * Copy an BUI format buffer from the given buf, and into the destination - * BUI format structure. The BUI/BUD items were designed not to need any - * special alignment handling. - */ -static int +static inline void xfs_bui_copy_format( - struct xfs_log_iovec *buf, - struct xfs_bui_log_format *dst_bui_fmt) + struct xfs_bui_log_format *dst, + const struct xfs_bui_log_format *src) { - struct xfs_bui_log_format *src_bui_fmt; - uint len; + unsigned int i; - src_bui_fmt = buf->i_addr; - len = xfs_bui_log_format_sizeof(src_bui_fmt->bui_nextents); + memcpy(dst, src, offsetof(struct xfs_bui_log_format, bui_extents)); - if (buf->i_len == len) { - memcpy(dst_bui_fmt, src_bui_fmt, len); - return 0; - } - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); - return -EFSCORRUPTED; + for (i = 0; i < src->bui_nextents; i++) + memcpy(&dst->bui_extents[i], &src->bui_extents[i], + sizeof(struct xfs_map_extent)); } /* @@ -646,23 +636,34 @@ xlog_recover_bui_commit_pass2( struct xlog_recover_item *item, xfs_lsn_t lsn) { - int error; struct xfs_mount *mp = log->l_mp; struct xfs_bui_log_item *buip; struct xfs_bui_log_format *bui_formatp; + size_t len; bui_formatp = item->ri_buf[0].i_addr; + if (item->ri_buf[0].i_len < xfs_bui_log_format_sizeof(0)) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + return -EFSCORRUPTED; + } + if (bui_formatp->bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } - buip = xfs_bui_init(mp); - error = xfs_bui_copy_format(&item->ri_buf[0], &buip->bui_format); - if (error) { - xfs_bui_item_free(buip); - return error; + + len = xfs_bui_log_format_sizeof(bui_formatp->bui_nextents); + if (item->ri_buf[0].i_len != len) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + return -EFSCORRUPTED; } + + buip = xfs_bui_init(mp); + xfs_bui_copy_format(&buip->bui_format, bui_formatp); atomic_set(&buip->bui_next_extent, bui_formatp->bui_nextents); /* * Insert the intent into the AIL directly and drop one reference so @@ -696,7 +697,8 @@ xlog_recover_bud_commit_pass2( bud_formatp = item->ri_buf[0].i_addr; if (item->ri_buf[0].i_len != sizeof(struct xfs_bud_log_format)) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 04d0c2bff67c..867645b74d88 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -590,11 +590,13 @@ out_unlock_iolock: int xfs_bmap_punch_delalloc_range( struct xfs_inode *ip, - xfs_fileoff_t start_fsb, - xfs_fileoff_t length) + xfs_off_t start_byte, + xfs_off_t end_byte) { + struct xfs_mount *mp = ip->i_mount; struct xfs_ifork *ifp = &ip->i_df; - xfs_fileoff_t end_fsb = start_fsb + length; + xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, start_byte); + xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, end_byte); struct xfs_bmbt_irec got, del; struct xfs_iext_cursor icur; int error = 0; @@ -607,7 +609,7 @@ xfs_bmap_punch_delalloc_range( while (got.br_startoff + got.br_blockcount > start_fsb) { del = got; - xfs_trim_extent(&del, start_fsb, length); + xfs_trim_extent(&del, start_fsb, end_fsb - start_fsb); /* * A delete can push the cursor forward. Step back to the diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 24b37d211f1d..6888078f5c31 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -31,7 +31,7 @@ xfs_bmap_rtalloc(struct xfs_bmalloca *ap) #endif /* CONFIG_XFS_RT */ int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip, - xfs_fileoff_t start_fsb, xfs_fileoff_t length); + xfs_off_t start_byte, xfs_off_t end_byte); struct kgetbmap { __s64 bmv_offset; /* file offset of segment in blocks */ diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index dde346450952..54c774af6e1c 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1945,6 +1945,7 @@ xfs_free_buftarg( list_lru_destroy(&btp->bt_lru); blkdev_issue_flush(btp->bt_bdev); + invalidate_bdev(btp->bt_bdev); fs_put_dax(btp->bt_daxdev, btp->bt_mount); kmem_free(btp); diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 522d450a94b1..df7322ed73fa 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -1018,6 +1018,8 @@ xfs_buf_item_relse( trace_xfs_buf_item_relse(bp, _RET_IP_); ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)); + if (atomic_read(&bip->bli_refcount)) + return; bp->b_log_item = NULL; xfs_buf_rele(bp); xfs_buf_item_free(bip); diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 7db588ed0be5..ae082808cfed 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -46,7 +46,7 @@ static unsigned int xfs_errortag_random_default[] = { XFS_RANDOM_REFCOUNT_FINISH_ONE, XFS_RANDOM_BMAP_FINISH_ONE, XFS_RANDOM_AG_RESV_CRITICAL, - XFS_RANDOM_DROP_WRITES, + 0, /* XFS_RANDOM_DROP_WRITES has been removed */ XFS_RANDOM_LOG_BAD_CRC, XFS_RANDOM_LOG_ITEM_PIN, XFS_RANDOM_BUF_LRU_REF, @@ -60,6 +60,8 @@ static unsigned int xfs_errortag_random_default[] = { XFS_RANDOM_LARP, XFS_RANDOM_DA_LEAF_SPLIT, XFS_RANDOM_ATTR_LEAF_TO_NODE, + XFS_RANDOM_WB_DELAY_MS, + XFS_RANDOM_WRITE_DELAY_MS, }; struct xfs_errortag_attr { @@ -162,7 +164,6 @@ XFS_ERRORTAG_ATTR_RW(refcount_continue_update, XFS_ERRTAG_REFCOUNT_CONTINUE_UPDA XFS_ERRORTAG_ATTR_RW(refcount_finish_one, XFS_ERRTAG_REFCOUNT_FINISH_ONE); XFS_ERRORTAG_ATTR_RW(bmap_finish_one, XFS_ERRTAG_BMAP_FINISH_ONE); XFS_ERRORTAG_ATTR_RW(ag_resv_critical, XFS_ERRTAG_AG_RESV_CRITICAL); -XFS_ERRORTAG_ATTR_RW(drop_writes, XFS_ERRTAG_DROP_WRITES); XFS_ERRORTAG_ATTR_RW(log_bad_crc, XFS_ERRTAG_LOG_BAD_CRC); XFS_ERRORTAG_ATTR_RW(log_item_pin, XFS_ERRTAG_LOG_ITEM_PIN); XFS_ERRORTAG_ATTR_RW(buf_lru_ref, XFS_ERRTAG_BUF_LRU_REF); @@ -176,6 +177,8 @@ XFS_ERRORTAG_ATTR_RW(ag_resv_fail, XFS_ERRTAG_AG_RESV_FAIL); XFS_ERRORTAG_ATTR_RW(larp, XFS_ERRTAG_LARP); XFS_ERRORTAG_ATTR_RW(da_leaf_split, XFS_ERRTAG_DA_LEAF_SPLIT); XFS_ERRORTAG_ATTR_RW(attr_leaf_to_node, XFS_ERRTAG_ATTR_LEAF_TO_NODE); +XFS_ERRORTAG_ATTR_RW(wb_delay_ms, XFS_ERRTAG_WB_DELAY_MS); +XFS_ERRORTAG_ATTR_RW(write_delay_ms, XFS_ERRTAG_WRITE_DELAY_MS); static struct attribute *xfs_errortag_attrs[] = { XFS_ERRORTAG_ATTR_LIST(noerror), @@ -206,7 +209,6 @@ static struct attribute *xfs_errortag_attrs[] = { XFS_ERRORTAG_ATTR_LIST(refcount_finish_one), XFS_ERRORTAG_ATTR_LIST(bmap_finish_one), XFS_ERRORTAG_ATTR_LIST(ag_resv_critical), - XFS_ERRORTAG_ATTR_LIST(drop_writes), XFS_ERRORTAG_ATTR_LIST(log_bad_crc), XFS_ERRORTAG_ATTR_LIST(log_item_pin), XFS_ERRORTAG_ATTR_LIST(buf_lru_ref), @@ -220,6 +222,8 @@ static struct attribute *xfs_errortag_attrs[] = { XFS_ERRORTAG_ATTR_LIST(larp), XFS_ERRORTAG_ATTR_LIST(da_leaf_split), XFS_ERRORTAG_ATTR_LIST(attr_leaf_to_node), + XFS_ERRORTAG_ATTR_LIST(wb_delay_ms), + XFS_ERRORTAG_ATTR_LIST(write_delay_ms), NULL, }; ATTRIBUTE_GROUPS(xfs_errortag); @@ -234,13 +238,18 @@ int xfs_errortag_init( struct xfs_mount *mp) { + int ret; + mp->m_errortag = kmem_zalloc(sizeof(unsigned int) * XFS_ERRTAG_MAX, KM_MAYFAIL); if (!mp->m_errortag) return -ENOMEM; - return xfs_sysfs_init(&mp->m_errortag_kobj, &xfs_errortag_ktype, - &mp->m_kobj, "errortag"); + ret = xfs_sysfs_init(&mp->m_errortag_kobj, &xfs_errortag_ktype, + &mp->m_kobj, "errortag"); + if (ret) + kmem_free(mp->m_errortag); + return ret; } void @@ -251,6 +260,32 @@ xfs_errortag_del( kmem_free(mp->m_errortag); } +static bool +xfs_errortag_valid( + unsigned int error_tag) +{ + if (error_tag >= XFS_ERRTAG_MAX) + return false; + + /* Error out removed injection types */ + if (error_tag == XFS_ERRTAG_DROP_WRITES) + return false; + return true; +} + +bool +xfs_errortag_enabled( + struct xfs_mount *mp, + unsigned int tag) +{ + if (!mp->m_errortag) + return false; + if (!xfs_errortag_valid(tag)) + return false; + + return mp->m_errortag[tag] != 0; +} + bool xfs_errortag_test( struct xfs_mount *mp, @@ -272,9 +307,11 @@ xfs_errortag_test( if (!mp->m_errortag) return false; - ASSERT(error_tag < XFS_ERRTAG_MAX); + if (!xfs_errortag_valid(error_tag)) + return false; + randfactor = mp->m_errortag[error_tag]; - if (!randfactor || prandom_u32_max(randfactor)) + if (!randfactor || get_random_u32_below(randfactor)) return false; xfs_warn_ratelimited(mp, @@ -288,7 +325,7 @@ xfs_errortag_get( struct xfs_mount *mp, unsigned int error_tag) { - if (error_tag >= XFS_ERRTAG_MAX) + if (!xfs_errortag_valid(error_tag)) return -EINVAL; return mp->m_errortag[error_tag]; @@ -300,7 +337,7 @@ xfs_errortag_set( unsigned int error_tag, unsigned int tag_value) { - if (error_tag >= XFS_ERRTAG_MAX) + if (!xfs_errortag_valid(error_tag)) return -EINVAL; mp->m_errortag[error_tag] = tag_value; @@ -314,7 +351,7 @@ xfs_errortag_add( { BUILD_BUG_ON(ARRAY_SIZE(xfs_errortag_random_default) != XFS_ERRTAG_MAX); - if (error_tag >= XFS_ERRTAG_MAX) + if (!xfs_errortag_valid(error_tag)) return -EINVAL; return xfs_errortag_set(mp, error_tag, diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index 5191e9145e55..dbe6c37dc697 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -45,6 +45,18 @@ extern bool xfs_errortag_test(struct xfs_mount *mp, const char *expression, const char *file, int line, unsigned int error_tag); #define XFS_TEST_ERROR(expr, mp, tag) \ ((expr) || xfs_errortag_test((mp), #expr, __FILE__, __LINE__, (tag))) +bool xfs_errortag_enabled(struct xfs_mount *mp, unsigned int tag); +#define XFS_ERRORTAG_DELAY(mp, tag) \ + do { \ + might_sleep(); \ + if (!xfs_errortag_enabled((mp), (tag))) \ + break; \ + xfs_warn_ratelimited((mp), \ +"Injecting %ums delay at file %s, line %d, on filesystem \"%s\"", \ + (mp)->m_errortag[(tag)], __FILE__, __LINE__, \ + (mp)->m_super->s_id); \ + mdelay((mp)->m_errortag[(tag)]); \ + } while (0) extern int xfs_errortag_get(struct xfs_mount *mp, unsigned int error_tag); extern int xfs_errortag_set(struct xfs_mount *mp, unsigned int error_tag, @@ -55,6 +67,7 @@ extern int xfs_errortag_clearall(struct xfs_mount *mp); #define xfs_errortag_init(mp) (0) #define xfs_errortag_del(mp) #define XFS_TEST_ERROR(expr, mp, tag) (expr) +#define XFS_ERRORTAG_DELAY(mp, tag) ((void)0) #define xfs_errortag_set(mp, tag, val) (ENOSYS) #define xfs_errortag_add(mp, tag) (ENOSYS) #define xfs_errortag_clearall(mp) (ENOSYS) diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 27ccfcd82f04..d5130d1fcfae 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -66,27 +66,16 @@ xfs_efi_release( xfs_efi_item_free(efip); } -/* - * This returns the number of iovecs needed to log the given efi item. - * We only need 1 iovec for an efi item. It just logs the efi_log_format - * structure. - */ -static inline int -xfs_efi_item_sizeof( - struct xfs_efi_log_item *efip) -{ - return sizeof(struct xfs_efi_log_format) + - (efip->efi_format.efi_nextents - 1) * sizeof(xfs_extent_t); -} - STATIC void xfs_efi_item_size( struct xfs_log_item *lip, int *nvecs, int *nbytes) { + struct xfs_efi_log_item *efip = EFI_ITEM(lip); + *nvecs += 1; - *nbytes += xfs_efi_item_sizeof(EFI_ITEM(lip)); + *nbytes += xfs_efi_log_format_sizeof(efip->efi_format.efi_nextents); } /* @@ -112,7 +101,7 @@ xfs_efi_item_format( xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFI_FORMAT, &efip->efi_format, - xfs_efi_item_sizeof(efip)); + xfs_efi_log_format_sizeof(efip->efi_format.efi_nextents)); } @@ -155,13 +144,11 @@ xfs_efi_init( { struct xfs_efi_log_item *efip; - uint size; ASSERT(nextents > 0); if (nextents > XFS_EFI_MAX_FAST_EXTENTS) { - size = (uint)(sizeof(struct xfs_efi_log_item) + - ((nextents - 1) * sizeof(xfs_extent_t))); - efip = kmem_zalloc(size, 0); + efip = kzalloc(xfs_efi_log_item_sizeof(nextents), + GFP_KERNEL | __GFP_NOFAIL); } else { efip = kmem_cache_zalloc(xfs_efi_cache, GFP_KERNEL | __GFP_NOFAIL); @@ -188,15 +175,17 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt) { xfs_efi_log_format_t *src_efi_fmt = buf->i_addr; uint i; - uint len = sizeof(xfs_efi_log_format_t) + - (src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_t); - uint len32 = sizeof(xfs_efi_log_format_32_t) + - (src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_32_t); - uint len64 = sizeof(xfs_efi_log_format_64_t) + - (src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_64_t); + uint len = xfs_efi_log_format_sizeof(src_efi_fmt->efi_nextents); + uint len32 = xfs_efi_log_format32_sizeof(src_efi_fmt->efi_nextents); + uint len64 = xfs_efi_log_format64_sizeof(src_efi_fmt->efi_nextents); if (buf->i_len == len) { - memcpy((char *)dst_efi_fmt, (char*)src_efi_fmt, len); + memcpy(dst_efi_fmt, src_efi_fmt, + offsetof(struct xfs_efi_log_format, efi_extents)); + for (i = 0; i < src_efi_fmt->efi_nextents; i++) + memcpy(&dst_efi_fmt->efi_extents[i], + &src_efi_fmt->efi_extents[i], + sizeof(struct xfs_extent)); return 0; } else if (buf->i_len == len32) { xfs_efi_log_format_32_t *src_efi_fmt_32 = buf->i_addr; @@ -227,7 +216,8 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt) } return 0; } - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, NULL, buf->i_addr, + buf->i_len); return -EFSCORRUPTED; } @@ -246,27 +236,16 @@ xfs_efd_item_free(struct xfs_efd_log_item *efdp) kmem_cache_free(xfs_efd_cache, efdp); } -/* - * This returns the number of iovecs needed to log the given efd item. - * We only need 1 iovec for an efd item. It just logs the efd_log_format - * structure. - */ -static inline int -xfs_efd_item_sizeof( - struct xfs_efd_log_item *efdp) -{ - return sizeof(xfs_efd_log_format_t) + - (efdp->efd_format.efd_nextents - 1) * sizeof(xfs_extent_t); -} - STATIC void xfs_efd_item_size( struct xfs_log_item *lip, int *nvecs, int *nbytes) { + struct xfs_efd_log_item *efdp = EFD_ITEM(lip); + *nvecs += 1; - *nbytes += xfs_efd_item_sizeof(EFD_ITEM(lip)); + *nbytes += xfs_efd_log_format_sizeof(efdp->efd_format.efd_nextents); } /* @@ -291,7 +270,7 @@ xfs_efd_item_format( xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFD_FORMAT, &efdp->efd_format, - xfs_efd_item_sizeof(efdp)); + xfs_efd_log_format_sizeof(efdp->efd_format.efd_nextents)); } /* @@ -340,9 +319,8 @@ xfs_trans_get_efd( ASSERT(nextents > 0); if (nextents > XFS_EFD_MAX_FAST_EXTENTS) { - efdp = kmem_zalloc(sizeof(struct xfs_efd_log_item) + - (nextents - 1) * sizeof(struct xfs_extent), - 0); + efdp = kzalloc(xfs_efd_log_item_sizeof(nextents), + GFP_KERNEL | __GFP_NOFAIL); } else { efdp = kmem_cache_zalloc(xfs_efd_cache, GFP_KERNEL | __GFP_NOFAIL); @@ -733,6 +711,12 @@ xlog_recover_efi_commit_pass2( efi_formatp = item->ri_buf[0].i_addr; + if (item->ri_buf[0].i_len < xfs_efi_log_format_sizeof(0)) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + return -EFSCORRUPTED; + } + efip = xfs_efi_init(mp, efi_formatp->efi_nextents); error = xfs_efi_copy_format(&item->ri_buf[0], &efip->efi_format); if (error) { @@ -769,12 +753,24 @@ xlog_recover_efd_commit_pass2( xfs_lsn_t lsn) { struct xfs_efd_log_format *efd_formatp; + int buflen = item->ri_buf[0].i_len; efd_formatp = item->ri_buf[0].i_addr; - ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) + - ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) || - (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) + - ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t))))); + + if (buflen < sizeof(struct xfs_efd_log_format)) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, + efd_formatp, buflen); + return -EFSCORRUPTED; + } + + if (item->ri_buf[0].i_len != xfs_efd_log_format32_sizeof( + efd_formatp->efd_nextents) && + item->ri_buf[0].i_len != xfs_efd_log_format64_sizeof( + efd_formatp->efd_nextents)) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, + efd_formatp, buflen); + return -EFSCORRUPTED; + } xlog_recover_release_intent(log, XFS_LI_EFI, efd_formatp->efd_efi_id); return 0; diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h index 186d0f2137f1..da6a5afa607c 100644 --- a/fs/xfs/xfs_extfree_item.h +++ b/fs/xfs/xfs_extfree_item.h @@ -52,6 +52,14 @@ struct xfs_efi_log_item { xfs_efi_log_format_t efi_format; }; +static inline size_t +xfs_efi_log_item_sizeof( + unsigned int nr) +{ + return offsetof(struct xfs_efi_log_item, efi_format) + + xfs_efi_log_format_sizeof(nr); +} + /* * This is the "extent free done" log item. It is used to log * the fact that some extents earlier mentioned in an efi item @@ -64,6 +72,14 @@ struct xfs_efd_log_item { xfs_efd_log_format_t efd_format; }; +static inline size_t +xfs_efd_log_item_sizeof( + unsigned int nr) +{ + return offsetof(struct xfs_efd_log_item, efd_format) + + xfs_efd_log_format_sizeof(nr); +} + /* * Max number of extents in fast allocation path. */ diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index c6c80265c0b2..595a5bcf46b9 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1261,7 +1261,7 @@ xfs_file_llseek( } #ifdef CONFIG_FS_DAX -static int +static inline vm_fault_t xfs_dax_fault( struct vm_fault *vmf, enum page_entry_size pe_size, @@ -1274,14 +1274,15 @@ xfs_dax_fault( &xfs_read_iomap_ops); } #else -static int +static inline vm_fault_t xfs_dax_fault( struct vm_fault *vmf, enum page_entry_size pe_size, bool write_fault, pfn_t *pfn) { - return 0; + ASSERT(0); + return VM_FAULT_SIGBUS; } #endif @@ -1324,7 +1325,7 @@ __xfs_filemap_fault( if (write_fault) { xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); ret = iomap_page_mkwrite(vmf, - &xfs_buffered_write_iomap_ops); + &xfs_page_mkwrite_iomap_ops); xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); } else { ret = filemap_fault(vmf); diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index d8337274c74d..88a88506ffff 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c @@ -524,7 +524,7 @@ xfs_getfsmap_rtdev_rtbitmap_query( struct xfs_mount *mp = tp->t_mountp; int error; - xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED); + xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); /* * Set up query parameters to return free rtextents covering the range @@ -551,7 +551,7 @@ xfs_getfsmap_rtdev_rtbitmap_query( if (error) goto err; err: - xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED); + xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); return error; } diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index eae7427062cf..f35e2cee5265 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -342,6 +342,9 @@ xfs_iget_recycle( trace_xfs_iget_recycle(ip); + if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) + return -EAGAIN; + /* * We need to make it look like the inode is being reclaimed to prevent * the actual reclaim workers from stomping over us while we recycle @@ -355,6 +358,7 @@ xfs_iget_recycle( ASSERT(!rwsem_is_locked(&inode->i_rwsem)); error = xfs_reinit_inode(mp, inode); + xfs_iunlock(ip, XFS_ILOCK_EXCL); if (error) { /* * Re-initializing the inode failed, and we are in deep @@ -518,6 +522,8 @@ xfs_iget_cache_hit( if (ip->i_flags & XFS_IRECLAIMABLE) { /* Drops i_flags_lock and RCU read lock. */ error = xfs_iget_recycle(pag, ip); + if (error == -EAGAIN) + goto out_skip; if (error) return error; } else { diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index c000b74dd203..d354ea2b74f9 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2479,7 +2479,7 @@ xfs_remove( error = xfs_dir_replace(tp, ip, &xfs_name_dotdot, tp->t_mountp->m_sb.sb_rootino, 0); if (error) - return error; + goto out_trans_cancel; } } else { /* @@ -2818,7 +2818,7 @@ retry: * Lock all the participating inodes. Depending upon whether * the target_name exists in the target directory, and * whether the target directory is the same as the source - * directory, we can lock from 2 to 4 inodes. + * directory, we can lock from 2 to 5 inodes. */ xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL); diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 1f783e979629..13f1b2add390 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1138,10 +1138,6 @@ xfs_ioctl_setattr_xflags( if ((fa->fsx_xflags & FS_XFLAG_REALTIME) && xfs_is_reflink_inode(ip)) ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK; - /* Don't allow us to set DAX mode for a reflinked file for now. */ - if ((fa->fsx_xflags & FS_XFLAG_DAX) && xfs_is_reflink_inode(ip)) - return -EINVAL; - /* diflags2 only valid for v3 inodes. */ i_flags2 = xfs_flags2diflags2(ip, fa->fsx_xflags); if (i_flags2 && !xfs_has_v3inodes(mp)) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 07da03976ec1..669c1bc5c3a7 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -48,13 +48,53 @@ xfs_alert_fsblock_zero( return -EFSCORRUPTED; } +u64 +xfs_iomap_inode_sequence( + struct xfs_inode *ip, + u16 iomap_flags) +{ + u64 cookie = 0; + + if (iomap_flags & IOMAP_F_XATTR) + return READ_ONCE(ip->i_af.if_seq); + if ((iomap_flags & IOMAP_F_SHARED) && ip->i_cowfp) + cookie = (u64)READ_ONCE(ip->i_cowfp->if_seq) << 32; + return cookie | READ_ONCE(ip->i_df.if_seq); +} + +/* + * Check that the iomap passed to us is still valid for the given offset and + * length. + */ +static bool +xfs_iomap_valid( + struct inode *inode, + const struct iomap *iomap) +{ + struct xfs_inode *ip = XFS_I(inode); + + if (iomap->validity_cookie != + xfs_iomap_inode_sequence(ip, iomap->flags)) { + trace_xfs_iomap_invalid(ip, iomap); + return false; + } + + XFS_ERRORTAG_DELAY(ip->i_mount, XFS_ERRTAG_WRITE_DELAY_MS); + return true; +} + +const struct iomap_page_ops xfs_iomap_page_ops = { + .iomap_valid = xfs_iomap_valid, +}; + int xfs_bmbt_to_iomap( struct xfs_inode *ip, struct iomap *iomap, struct xfs_bmbt_irec *imap, unsigned int mapping_flags, - u16 iomap_flags) + u16 iomap_flags, + u64 sequence_cookie) { struct xfs_mount *mp = ip->i_mount; struct xfs_buftarg *target = xfs_inode_buftarg(ip); @@ -91,6 +131,9 @@ xfs_bmbt_to_iomap( if (xfs_ipincount(ip) && (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP)) iomap->flags |= IOMAP_F_DIRTY; + + iomap->validity_cookie = sequence_cookie; + iomap->page_ops = &xfs_iomap_page_ops; return 0; } @@ -195,7 +238,8 @@ xfs_iomap_write_direct( xfs_fileoff_t offset_fsb, xfs_fileoff_t count_fsb, unsigned int flags, - struct xfs_bmbt_irec *imap) + struct xfs_bmbt_irec *imap, + u64 *seq) { struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; @@ -285,6 +329,7 @@ xfs_iomap_write_direct( error = xfs_alert_fsblock_zero(ip, imap); out_unlock: + *seq = xfs_iomap_inode_sequence(ip, 0); xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; @@ -743,6 +788,7 @@ xfs_direct_write_iomap_begin( bool shared = false; u16 iomap_flags = 0; unsigned int lockmode = XFS_ILOCK_SHARED; + u64 seq; ASSERT(flags & (IOMAP_WRITE | IOMAP_ZERO)); @@ -811,9 +857,10 @@ xfs_direct_write_iomap_begin( goto out_unlock; } + seq = xfs_iomap_inode_sequence(ip, iomap_flags); xfs_iunlock(ip, lockmode); trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap); - return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, iomap_flags); + return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, iomap_flags, seq); allocate_blocks: error = -EAGAIN; @@ -839,24 +886,26 @@ allocate_blocks: xfs_iunlock(ip, lockmode); error = xfs_iomap_write_direct(ip, offset_fsb, end_fsb - offset_fsb, - flags, &imap); + flags, &imap, &seq); if (error) return error; trace_xfs_iomap_alloc(ip, offset, length, XFS_DATA_FORK, &imap); return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, - iomap_flags | IOMAP_F_NEW); + iomap_flags | IOMAP_F_NEW, seq); out_found_cow: - xfs_iunlock(ip, lockmode); length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount); trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap); if (imap.br_startblock != HOLESTARTBLOCK) { - error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0); + seq = xfs_iomap_inode_sequence(ip, 0); + error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, seq); if (error) - return error; + goto out_unlock; } - return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED); + seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED); + xfs_iunlock(ip, lockmode); + return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED, seq); out_unlock: if (lockmode) @@ -915,6 +964,7 @@ xfs_buffered_write_iomap_begin( int allocfork = XFS_DATA_FORK; int error = 0; unsigned int lockmode = XFS_ILOCK_EXCL; + u64 seq; if (xfs_is_shutdown(mp)) return -EIO; @@ -926,6 +976,10 @@ xfs_buffered_write_iomap_begin( ASSERT(!XFS_IS_REALTIME_INODE(ip)); + error = xfs_qm_dqattach(ip); + if (error) + return error; + error = xfs_ilock_for_iomap(ip, flags, &lockmode); if (error) return error; @@ -1029,10 +1083,6 @@ xfs_buffered_write_iomap_begin( allocfork = XFS_COW_FORK; } - error = xfs_qm_dqattach_locked(ip, false); - if (error) - goto out_unlock; - if (eof && offset + count > XFS_ISIZE(ip)) { /* * Determine the initial size of the preallocation. @@ -1094,26 +1144,31 @@ retry: * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch * them out if the write happens to fail. */ + seq = xfs_iomap_inode_sequence(ip, IOMAP_F_NEW); xfs_iunlock(ip, XFS_ILOCK_EXCL); trace_xfs_iomap_alloc(ip, offset, count, allocfork, &imap); - return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_NEW); + return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_NEW, seq); found_imap: + seq = xfs_iomap_inode_sequence(ip, 0); xfs_iunlock(ip, XFS_ILOCK_EXCL); - return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0); + return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0, seq); found_cow: - xfs_iunlock(ip, XFS_ILOCK_EXCL); + seq = xfs_iomap_inode_sequence(ip, 0); if (imap.br_startoff <= offset_fsb) { - error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0); + error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, seq); if (error) - return error; + goto out_unlock; + seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED); + xfs_iunlock(ip, XFS_ILOCK_EXCL); return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, - IOMAP_F_SHARED); + IOMAP_F_SHARED, seq); } xfs_trim_extent(&cmap, offset_fsb, imap.br_startoff - offset_fsb); - return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 0); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 0, seq); out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL); @@ -1121,6 +1176,16 @@ out_unlock: } static int +xfs_buffered_write_delalloc_punch( + struct inode *inode, + loff_t offset, + loff_t length) +{ + return xfs_bmap_punch_delalloc_range(XFS_I(inode), offset, + offset + length); +} + +static int xfs_buffered_write_iomap_end( struct inode *inode, loff_t offset, @@ -1129,56 +1194,17 @@ xfs_buffered_write_iomap_end( unsigned flags, struct iomap *iomap) { - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - xfs_fileoff_t start_fsb; - xfs_fileoff_t end_fsb; - int error = 0; - - if (iomap->type != IOMAP_DELALLOC) - return 0; - - /* - * Behave as if the write failed if drop writes is enabled. Set the NEW - * flag to force delalloc cleanup. - */ - if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_DROP_WRITES)) { - iomap->flags |= IOMAP_F_NEW; - written = 0; - } - /* - * start_fsb refers to the first unused block after a short write. If - * nothing was written, round offset down to point at the first block in - * the range. - */ - if (unlikely(!written)) - start_fsb = XFS_B_TO_FSBT(mp, offset); - else - start_fsb = XFS_B_TO_FSB(mp, offset + written); - end_fsb = XFS_B_TO_FSB(mp, offset + length); + struct xfs_mount *mp = XFS_M(inode->i_sb); + int error; - /* - * Trim delalloc blocks if they were allocated by this write and we - * didn't manage to write the whole range. - * - * We don't need to care about racing delalloc as we hold i_mutex - * across the reserve/allocate/unreserve calls. If there are delalloc - * blocks in the range, they are ours. - */ - if ((iomap->flags & IOMAP_F_NEW) && start_fsb < end_fsb) { - truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb), - XFS_FSB_TO_B(mp, end_fsb) - 1); - - error = xfs_bmap_punch_delalloc_range(ip, start_fsb, - end_fsb - start_fsb); - if (error && !xfs_is_shutdown(mp)) { - xfs_alert(mp, "%s: unable to clean up ino %lld", - __func__, ip->i_ino); - return error; - } + error = iomap_file_buffered_write_punch_delalloc(inode, iomap, offset, + length, written, &xfs_buffered_write_delalloc_punch); + if (error && !xfs_is_shutdown(mp)) { + xfs_alert(mp, "%s: unable to clean up ino 0x%llx", + __func__, XFS_I(inode)->i_ino); + return error; } - return 0; } @@ -1187,6 +1213,15 @@ const struct iomap_ops xfs_buffered_write_iomap_ops = { .iomap_end = xfs_buffered_write_iomap_end, }; +/* + * iomap_page_mkwrite() will never fail in a way that requires delalloc extents + * that it allocated to be revoked. Hence we do not need an .iomap_end method + * for this operation. + */ +const struct iomap_ops xfs_page_mkwrite_iomap_ops = { + .iomap_begin = xfs_buffered_write_iomap_begin, +}; + static int xfs_read_iomap_begin( struct inode *inode, @@ -1204,6 +1239,7 @@ xfs_read_iomap_begin( int nimaps = 1, error = 0; bool shared = false; unsigned int lockmode = XFS_ILOCK_SHARED; + u64 seq; ASSERT(!(flags & (IOMAP_WRITE | IOMAP_ZERO))); @@ -1215,15 +1251,16 @@ xfs_read_iomap_begin( return error; error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, &nimaps, 0); - if (!error && (flags & IOMAP_REPORT)) + if (!error && ((flags & IOMAP_REPORT) || IS_DAX(inode))) error = xfs_reflink_trim_around_shared(ip, &imap, &shared); + seq = xfs_iomap_inode_sequence(ip, shared ? IOMAP_F_SHARED : 0); xfs_iunlock(ip, lockmode); if (error) return error; trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap); return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, - shared ? IOMAP_F_SHARED : 0); + shared ? IOMAP_F_SHARED : 0, seq); } const struct iomap_ops xfs_read_iomap_ops = { @@ -1248,6 +1285,7 @@ xfs_seek_iomap_begin( struct xfs_bmbt_irec imap, cmap; int error = 0; unsigned lockmode; + u64 seq; if (xfs_is_shutdown(mp)) return -EIO; @@ -1282,8 +1320,9 @@ xfs_seek_iomap_begin( if (data_fsb < cow_fsb + cmap.br_blockcount) end_fsb = min(end_fsb, data_fsb); xfs_trim_extent(&cmap, offset_fsb, end_fsb); + seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED); error = xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, - IOMAP_F_SHARED); + IOMAP_F_SHARED, seq); /* * This is a COW extent, so we must probe the page cache * because there could be dirty page cache being backed @@ -1304,8 +1343,9 @@ xfs_seek_iomap_begin( imap.br_startblock = HOLESTARTBLOCK; imap.br_state = XFS_EXT_NORM; done: + seq = xfs_iomap_inode_sequence(ip, 0); xfs_trim_extent(&imap, offset_fsb, end_fsb); - error = xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0); + error = xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0, seq); out_unlock: xfs_iunlock(ip, lockmode); return error; @@ -1331,6 +1371,7 @@ xfs_xattr_iomap_begin( struct xfs_bmbt_irec imap; int nimaps = 1, error = 0; unsigned lockmode; + int seq; if (xfs_is_shutdown(mp)) return -EIO; @@ -1347,12 +1388,14 @@ xfs_xattr_iomap_begin( error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, &nimaps, XFS_BMAPI_ATTRFORK); out_unlock: + + seq = xfs_iomap_inode_sequence(ip, IOMAP_F_XATTR); xfs_iunlock(ip, lockmode); if (error) return error; ASSERT(nimaps); - return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0); + return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_XATTR, seq); } const struct iomap_ops xfs_xattr_iomap_ops = { @@ -1370,7 +1413,7 @@ xfs_zero_range( if (IS_DAX(inode)) return dax_zero_range(inode, pos, len, did_zero, - &xfs_direct_write_iomap_ops); + &xfs_dax_write_iomap_ops); return iomap_zero_range(inode, pos, len, did_zero, &xfs_buffered_write_iomap_ops); } @@ -1385,7 +1428,7 @@ xfs_truncate_page( if (IS_DAX(inode)) return dax_truncate_page(inode, pos, did_zero, - &xfs_direct_write_iomap_ops); + &xfs_dax_write_iomap_ops); return iomap_truncate_page(inode, pos, did_zero, &xfs_buffered_write_iomap_ops); } diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index c782e8c0479c..4da13440bae9 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -13,14 +13,15 @@ struct xfs_bmbt_irec; int xfs_iomap_write_direct(struct xfs_inode *ip, xfs_fileoff_t offset_fsb, xfs_fileoff_t count_fsb, unsigned int flags, - struct xfs_bmbt_irec *imap); + struct xfs_bmbt_irec *imap, u64 *sequence); int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool); xfs_fileoff_t xfs_iomap_eof_align_last_fsb(struct xfs_inode *ip, xfs_fileoff_t end_fsb); +u64 xfs_iomap_inode_sequence(struct xfs_inode *ip, u16 iomap_flags); int xfs_bmbt_to_iomap(struct xfs_inode *ip, struct iomap *iomap, struct xfs_bmbt_irec *imap, unsigned int mapping_flags, - u16 iomap_flags); + u16 iomap_flags, u64 sequence_cookie); int xfs_zero_range(struct xfs_inode *ip, loff_t pos, loff_t len, bool *did_zero); @@ -47,6 +48,7 @@ xfs_aligned_fsb_count( } extern const struct iomap_ops xfs_buffered_write_iomap_ops; +extern const struct iomap_ops xfs_page_mkwrite_iomap_ops; extern const struct iomap_ops xfs_direct_write_iomap_ops; extern const struct iomap_ops xfs_read_iomap_ops; extern const struct iomap_ops xfs_seek_iomap_ops; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 2e10e1c66ad6..515318dfbc38 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -651,6 +651,7 @@ xfs_vn_change_ok( static int xfs_setattr_nonsize( struct user_namespace *mnt_userns, + struct dentry *dentry, struct xfs_inode *ip, struct iattr *iattr) { @@ -757,7 +758,7 @@ xfs_setattr_nonsize( * Posix ACL code seems to care about this issue either. */ if (mask & ATTR_MODE) { - error = posix_acl_chmod(mnt_userns, inode, inode->i_mode); + error = posix_acl_chmod(mnt_userns, dentry, inode->i_mode); if (error) return error; } @@ -779,6 +780,7 @@ out_dqrele: STATIC int xfs_setattr_size( struct user_namespace *mnt_userns, + struct dentry *dentry, struct xfs_inode *ip, struct iattr *iattr) { @@ -810,7 +812,7 @@ xfs_setattr_size( * Use the regular setattr path to update the timestamps. */ iattr->ia_valid &= ~ATTR_SIZE; - return xfs_setattr_nonsize(mnt_userns, ip, iattr); + return xfs_setattr_nonsize(mnt_userns, dentry, ip, iattr); } /* @@ -987,7 +989,7 @@ xfs_vn_setattr_size( error = xfs_vn_change_ok(mnt_userns, dentry, iattr); if (error) return error; - return xfs_setattr_size(mnt_userns, ip, iattr); + return xfs_setattr_size(mnt_userns, dentry, ip, iattr); } STATIC int @@ -1019,7 +1021,7 @@ xfs_vn_setattr( error = xfs_vn_change_ok(mnt_userns, dentry, iattr); if (!error) - error = xfs_setattr_nonsize(mnt_userns, ip, iattr); + error = xfs_setattr_nonsize(mnt_userns, dentry, ip, iattr); } return error; @@ -1101,7 +1103,7 @@ xfs_vn_tmpfile( } static const struct inode_operations xfs_inode_operations = { - .get_acl = xfs_get_acl, + .get_inode_acl = xfs_get_acl, .set_acl = xfs_set_acl, .getattr = xfs_vn_getattr, .setattr = xfs_vn_setattr, @@ -1128,7 +1130,7 @@ static const struct inode_operations xfs_dir_inode_operations = { .rmdir = xfs_vn_unlink, .mknod = xfs_vn_mknod, .rename = xfs_vn_rename, - .get_acl = xfs_get_acl, + .get_inode_acl = xfs_get_acl, .set_acl = xfs_set_acl, .getattr = xfs_vn_getattr, .setattr = xfs_vn_setattr, @@ -1155,7 +1157,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = { .rmdir = xfs_vn_unlink, .mknod = xfs_vn_mknod, .rename = xfs_vn_rename, - .get_acl = xfs_get_acl, + .get_inode_acl = xfs_get_acl, .set_acl = xfs_set_acl, .getattr = xfs_vn_getattr, .setattr = xfs_vn_setattr, @@ -1185,10 +1187,6 @@ xfs_inode_supports_dax( if (!S_ISREG(VFS_I(ip)->i_mode)) return false; - /* Only supported on non-reflinked files. */ - if (xfs_is_reflink_inode(ip)) - return false; - /* Block size must match page size */ if (mp->m_sb.sb_blocksize != PAGE_SIZE) return false; diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index f02a0dd522b3..fc61cc024023 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -644,12 +644,14 @@ xfs_log_mount( int min_logfsbs; if (!xfs_has_norecovery(mp)) { - xfs_notice(mp, "Mounting V%d Filesystem", - XFS_SB_VERSION_NUM(&mp->m_sb)); + xfs_notice(mp, "Mounting V%d Filesystem %pU", + XFS_SB_VERSION_NUM(&mp->m_sb), + &mp->m_sb.sb_uuid); } else { xfs_notice(mp, -"Mounting V%d filesystem in no-recovery mode. Filesystem will be inconsistent.", - XFS_SB_VERSION_NUM(&mp->m_sb)); +"Mounting V%d filesystem %pU in no-recovery mode. Filesystem will be inconsistent.", + XFS_SB_VERSION_NUM(&mp->m_sb), + &mp->m_sb.sb_uuid); ASSERT(xfs_is_readonly(mp)); } @@ -887,6 +889,23 @@ xlog_force_iclog( } /* + * Cycle all the iclogbuf locks to make sure all log IO completion + * is done before we tear down these buffers. + */ +static void +xlog_wait_iclog_completion(struct xlog *log) +{ + int i; + struct xlog_in_core *iclog = log->l_iclog; + + for (i = 0; i < log->l_iclog_bufs; i++) { + down(&iclog->ic_sema); + up(&iclog->ic_sema); + iclog = iclog->ic_next; + } +} + +/* * Wait for the iclog and all prior iclogs to be written disk as required by the * log force state machine. Waiting on ic_force_wait ensures iclog completions * have been ordered and callbacks run before we are woken here, hence @@ -1111,6 +1130,14 @@ xfs_log_unmount( { xfs_log_clean(mp); + /* + * If shutdown has come from iclog IO context, the log + * cleaning will have been skipped and so we need to wait + * for the iclog to complete shutdown processing before we + * tear anything down. + */ + xlog_wait_iclog_completion(mp->m_log); + xfs_buftarg_drain(mp->m_ddev_targp); xfs_trans_ail_destroy(mp); @@ -2114,17 +2141,6 @@ xlog_dealloc_log( int i; /* - * Cycle all the iclogbuf locks to make sure all log IO completion - * is done before we tear down these buffers. - */ - iclog = log->l_iclog; - for (i = 0; i < log->l_iclog_bufs; i++) { - down(&iclog->ic_sema); - up(&iclog->ic_sema); - iclog = iclog->ic_next; - } - - /* * Destroy the CIL after waiting for iclog IO completion because an * iclog EIO error will try to shut down the log, which accesses the * CIL to wake up the waiters. diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 17e923b9c5fa..322eb2ee6c55 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2552,6 +2552,8 @@ xlog_recover_process_intents( for (lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); lip != NULL; lip = xfs_trans_ail_cursor_next(ailp, &cur)) { + const struct xfs_item_ops *ops; + if (!xlog_item_is_intent(lip)) break; @@ -2567,13 +2569,17 @@ xlog_recover_process_intents( * deferred ops, you /must/ attach them to the capture list in * the recover routine or else those subsequent intents will be * replayed in the wrong order! + * + * The recovery function can free the log item, so we must not + * access lip after it returns. */ spin_unlock(&ailp->ail_lock); - error = lip->li_ops->iop_recover(lip, &capture_list); + ops = lip->li_ops; + error = ops->iop_recover(lip, &capture_list); spin_lock(&ailp->ail_lock); if (error) { trace_xlog_intent_recovery_failed(log->l_mp, error, - lip->li_ops->iop_recover); + ops->iop_recover); break; } } diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index e8bb3c2e847e..fb87ffb48f7f 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -538,6 +538,20 @@ xfs_check_summary_counts( return 0; } +static void +xfs_unmount_check( + struct xfs_mount *mp) +{ + if (xfs_is_shutdown(mp)) + return; + + if (percpu_counter_sum(&mp->m_ifree) > + percpu_counter_sum(&mp->m_icount)) { + xfs_alert(mp, "ifree/icount mismatch at unmount"); + xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS); + } +} + /* * Flush and reclaim dirty inodes in preparation for unmount. Inodes and * internal inode structures can be sitting in the CIL and AIL at this point, @@ -1077,6 +1091,7 @@ xfs_unmountfs( if (error) xfs_warn(mp, "Unable to free reserved block pool. " "Freespace may not be correct on next mount."); + xfs_unmount_check(mp); xfs_log_unmount(mp); xfs_da_unmount(mp); diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h index 758702b9495f..9737b5a9f405 100644 --- a/fs/xfs/xfs_ondisk.h +++ b/fs/xfs/xfs_ondisk.h @@ -118,10 +118,10 @@ xfs_check_ondisk_structs(void) /* log structures */ XFS_CHECK_STRUCT_SIZE(struct xfs_buf_log_format, 88); XFS_CHECK_STRUCT_SIZE(struct xfs_dq_logformat, 24); - XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_32, 28); - XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_64, 32); - XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_32, 28); - XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_64, 32); + XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_32, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_64, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_32, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_64, 16); XFS_CHECK_STRUCT_SIZE(struct xfs_extent_32, 12); XFS_CHECK_STRUCT_SIZE(struct xfs_extent_64, 16); XFS_CHECK_STRUCT_SIZE(struct xfs_log_dinode, 176); @@ -134,6 +134,21 @@ xfs_check_ondisk_structs(void) XFS_CHECK_STRUCT_SIZE(struct xfs_trans_header, 16); XFS_CHECK_STRUCT_SIZE(struct xfs_attri_log_format, 40); XFS_CHECK_STRUCT_SIZE(struct xfs_attrd_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_bui_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_bud_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_cui_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_cud_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_rui_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_rud_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_map_extent, 32); + XFS_CHECK_STRUCT_SIZE(struct xfs_phys_extent, 16); + + XFS_CHECK_OFFSET(struct xfs_bui_log_format, bui_extents, 16); + XFS_CHECK_OFFSET(struct xfs_cui_log_format, cui_extents, 16); + XFS_CHECK_OFFSET(struct xfs_rui_log_format, rui_extents, 16); + XFS_CHECK_OFFSET(struct xfs_efi_log_format, efi_extents, 16); + XFS_CHECK_OFFSET(struct xfs_efi_log_format_32, efi_extents, 16); + XFS_CHECK_OFFSET(struct xfs_efi_log_format_64, efi_extents, 16); /* * The v5 superblock format extended several v4 header structures with diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index 37a24f0f7cd4..38d23f0e703a 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -125,6 +125,7 @@ xfs_fs_map_blocks( int nimaps = 1; uint lock_flags; int error = 0; + u64 seq; if (xfs_is_shutdown(mp)) return -EIO; @@ -176,6 +177,7 @@ xfs_fs_map_blocks( lock_flags = xfs_ilock_data_map_shared(ip); error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, &nimaps, bmapi_flags); + seq = xfs_iomap_inode_sequence(ip, 0); ASSERT(!nimaps || imap.br_startblock != DELAYSTARTBLOCK); @@ -189,7 +191,7 @@ xfs_fs_map_blocks( xfs_iunlock(ip, lock_flags); error = xfs_iomap_write_direct(ip, offset_fsb, - end_fsb - offset_fsb, 0, &imap); + end_fsb - offset_fsb, 0, &imap, &seq); if (error) goto out_unlock; @@ -209,7 +211,7 @@ xfs_fs_map_blocks( } xfs_iunlock(ip, XFS_IOLOCK_EXCL); - error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0, 0); + error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0, 0, seq); *device_generation = mp->m_generation; return error; out_unlock: diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 18bb4ec4d7c9..ff53d40a2dae 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -423,6 +423,14 @@ xfs_qm_dquot_isolate( goto out_miss_busy; /* + * If something else is freeing this dquot and hasn't yet removed it + * from the LRU, leave it for the freeing task to complete the freeing + * process rather than risk it being free from under us here. + */ + if (dqp->q_flags & XFS_DQFLAG_FREEING) + goto out_miss_unlock; + + /* * This dquot has acquired a reference in the meantime remove it from * the freelist and try again. */ @@ -441,10 +449,8 @@ xfs_qm_dquot_isolate( * skip it so there is time for the IO to complete before we try to * reclaim it again on the next LRU pass. */ - if (!xfs_dqflock_nowait(dqp)) { - xfs_dqunlock(dqp); - goto out_miss_busy; - } + if (!xfs_dqflock_nowait(dqp)) + goto out_miss_unlock; if (XFS_DQ_IS_DIRTY(dqp)) { struct xfs_buf *bp = NULL; @@ -478,6 +484,8 @@ xfs_qm_dquot_isolate( XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaims); return LRU_REMOVED; +out_miss_unlock: + xfs_dqunlock(dqp); out_miss_busy: trace_xfs_dqreclaim_busy(dqp); XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses); diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 7e97bf19793d..858e3e9eb4a8 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -523,7 +523,9 @@ xfs_cui_item_recover( type = refc_type; break; default: - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + &cuip->cui_format, + sizeof(cuip->cui_format)); error = -EFSCORRUPTED; goto abort_error; } @@ -536,7 +538,8 @@ xfs_cui_item_recover( &new_fsb, &new_len, &rcur); if (error == -EFSCORRUPTED) XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - refc, sizeof(*refc)); + &cuip->cui_format, + sizeof(cuip->cui_format)); if (error) goto abort_error; @@ -622,28 +625,18 @@ static const struct xfs_item_ops xfs_cui_item_ops = { .iop_relog = xfs_cui_item_relog, }; -/* - * Copy an CUI format buffer from the given buf, and into the destination - * CUI format structure. The CUI/CUD items were designed not to need any - * special alignment handling. - */ -static int +static inline void xfs_cui_copy_format( - struct xfs_log_iovec *buf, - struct xfs_cui_log_format *dst_cui_fmt) + struct xfs_cui_log_format *dst, + const struct xfs_cui_log_format *src) { - struct xfs_cui_log_format *src_cui_fmt; - uint len; + unsigned int i; - src_cui_fmt = buf->i_addr; - len = xfs_cui_log_format_sizeof(src_cui_fmt->cui_nextents); + memcpy(dst, src, offsetof(struct xfs_cui_log_format, cui_extents)); - if (buf->i_len == len) { - memcpy(dst_cui_fmt, src_cui_fmt, len); - return 0; - } - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); - return -EFSCORRUPTED; + for (i = 0; i < src->cui_nextents; i++) + memcpy(&dst->cui_extents[i], &src->cui_extents[i], + sizeof(struct xfs_phys_extent)); } /* @@ -660,19 +653,28 @@ xlog_recover_cui_commit_pass2( struct xlog_recover_item *item, xfs_lsn_t lsn) { - int error; struct xfs_mount *mp = log->l_mp; struct xfs_cui_log_item *cuip; struct xfs_cui_log_format *cui_formatp; + size_t len; cui_formatp = item->ri_buf[0].i_addr; - cuip = xfs_cui_init(mp, cui_formatp->cui_nextents); - error = xfs_cui_copy_format(&item->ri_buf[0], &cuip->cui_format); - if (error) { - xfs_cui_item_free(cuip); - return error; + if (item->ri_buf[0].i_len < xfs_cui_log_format_sizeof(0)) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + return -EFSCORRUPTED; } + + len = xfs_cui_log_format_sizeof(cui_formatp->cui_nextents); + if (item->ri_buf[0].i_len != len) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + return -EFSCORRUPTED; + } + + cuip = xfs_cui_init(mp, cui_formatp->cui_nextents); + xfs_cui_copy_format(&cuip->cui_format, cui_formatp); atomic_set(&cuip->cui_next_extent, cui_formatp->cui_nextents); /* * Insert the intent into the AIL directly and drop one reference so @@ -706,7 +708,8 @@ xlog_recover_cud_commit_pass2( cud_formatp = item->ri_buf[0].i_addr; if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format)) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 93bdd25680bc..fe46bce8cae6 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1693,8 +1693,12 @@ xfs_reflink_unshare( inode_dio_wait(inode); - error = iomap_file_unshare(inode, offset, len, - &xfs_buffered_write_iomap_ops); + if (IS_DAX(inode)) + error = dax_file_unshare(inode, offset, len, + &xfs_dax_write_iomap_ops); + else + error = iomap_file_unshare(inode, offset, len, + &xfs_buffered_write_iomap_ops); if (error) goto out; diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index fef92e02f3bb..534504ede1a3 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -155,31 +155,6 @@ xfs_rui_init( return ruip; } -/* - * Copy an RUI format buffer from the given buf, and into the destination - * RUI format structure. The RUI/RUD items were designed not to need any - * special alignment handling. - */ -STATIC int -xfs_rui_copy_format( - struct xfs_log_iovec *buf, - struct xfs_rui_log_format *dst_rui_fmt) -{ - struct xfs_rui_log_format *src_rui_fmt; - uint len; - - src_rui_fmt = buf->i_addr; - len = xfs_rui_log_format_sizeof(src_rui_fmt->rui_nextents); - - if (buf->i_len != len) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); - return -EFSCORRUPTED; - } - - memcpy(dst_rui_fmt, src_rui_fmt, len); - return 0; -} - static inline struct xfs_rud_log_item *RUD_ITEM(struct xfs_log_item *lip) { return container_of(lip, struct xfs_rud_log_item, rud_item); @@ -582,7 +557,9 @@ xfs_rui_item_recover( type = XFS_RMAP_FREE; break; default: - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + &ruip->rui_format, + sizeof(ruip->rui_format)); error = -EFSCORRUPTED; goto abort_error; } @@ -652,6 +629,20 @@ static const struct xfs_item_ops xfs_rui_item_ops = { .iop_relog = xfs_rui_item_relog, }; +static inline void +xfs_rui_copy_format( + struct xfs_rui_log_format *dst, + const struct xfs_rui_log_format *src) +{ + unsigned int i; + + memcpy(dst, src, offsetof(struct xfs_rui_log_format, rui_extents)); + + for (i = 0; i < src->rui_nextents; i++) + memcpy(&dst->rui_extents[i], &src->rui_extents[i], + sizeof(struct xfs_map_extent)); +} + /* * This routine is called to create an in-core extent rmap update * item from the rui format structure which was logged on disk. @@ -666,19 +657,28 @@ xlog_recover_rui_commit_pass2( struct xlog_recover_item *item, xfs_lsn_t lsn) { - int error; struct xfs_mount *mp = log->l_mp; struct xfs_rui_log_item *ruip; struct xfs_rui_log_format *rui_formatp; + size_t len; rui_formatp = item->ri_buf[0].i_addr; - ruip = xfs_rui_init(mp, rui_formatp->rui_nextents); - error = xfs_rui_copy_format(&item->ri_buf[0], &ruip->rui_format); - if (error) { - xfs_rui_item_free(ruip); - return error; + if (item->ri_buf[0].i_len < xfs_rui_log_format_sizeof(0)) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + return -EFSCORRUPTED; + } + + len = xfs_rui_log_format_sizeof(rui_formatp->rui_nextents); + if (item->ri_buf[0].i_len != len) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + return -EFSCORRUPTED; } + + ruip = xfs_rui_init(mp, rui_formatp->rui_nextents); + xfs_rui_copy_format(&ruip->rui_format, rui_formatp); atomic_set(&ruip->rui_next_extent, rui_formatp->rui_nextents); /* * Insert the intent into the AIL directly and drop one reference so @@ -711,7 +711,11 @@ xlog_recover_rud_commit_pass2( struct xfs_rud_log_format *rud_formatp; rud_formatp = item->ri_buf[0].i_addr; - ASSERT(item->ri_buf[0].i_len == sizeof(struct xfs_rud_log_format)); + if (item->ri_buf[0].i_len != sizeof(struct xfs_rud_log_format)) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, + rud_formatp, item->ri_buf[0].i_len); + return -EFSCORRUPTED; + } xlog_recover_release_intent(log, XFS_LI_RUI, rud_formatp->rud_rui_id); return 0; diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 292d5e54a92c..16534e9873f6 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -1311,10 +1311,10 @@ xfs_rtalloc_reinit_frextents( uint64_t val = 0; int error; - xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); + xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); error = xfs_rtalloc_query_all(mp, NULL, xfs_rtalloc_count_frextent, &val); - xfs_iunlock(mp->m_rbmip, XFS_ILOCK_EXCL); + xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); if (error) return error; @@ -1326,6 +1326,41 @@ xfs_rtalloc_reinit_frextents( } /* + * Read in the bmbt of an rt metadata inode so that we never have to load them + * at runtime. This enables the use of shared ILOCKs for rtbitmap scans. Use + * an empty transaction to avoid deadlocking on loops in the bmbt. + */ +static inline int +xfs_rtmount_iread_extents( + struct xfs_inode *ip, + unsigned int lock_class) +{ + struct xfs_trans *tp; + int error; + + error = xfs_trans_alloc_empty(ip->i_mount, &tp); + if (error) + return error; + + xfs_ilock(ip, XFS_ILOCK_EXCL | lock_class); + + error = xfs_iread_extents(tp, ip, XFS_DATA_FORK); + if (error) + goto out_unlock; + + if (xfs_inode_has_attr_fork(ip)) { + error = xfs_iread_extents(tp, ip, XFS_ATTR_FORK); + if (error) + goto out_unlock; + } + +out_unlock: + xfs_iunlock(ip, XFS_ILOCK_EXCL | lock_class); + xfs_trans_cancel(tp); + return error; +} + +/* * Get the bitmap and summary inodes and the summary cache into the mount * structure at mount time. */ @@ -1342,14 +1377,27 @@ xfs_rtmount_inodes( return error; ASSERT(mp->m_rbmip != NULL); + error = xfs_rtmount_iread_extents(mp->m_rbmip, XFS_ILOCK_RTBITMAP); + if (error) + goto out_rele_bitmap; + error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip); - if (error) { - xfs_irele(mp->m_rbmip); - return error; - } + if (error) + goto out_rele_bitmap; ASSERT(mp->m_rsumip != NULL); + + error = xfs_rtmount_iread_extents(mp->m_rsumip, XFS_ILOCK_RTSUM); + if (error) + goto out_rele_summary; + xfs_alloc_rsum_cache(mp, sbp->sb_rbmblocks); return 0; + +out_rele_summary: + xfs_irele(mp->m_rsumip); +out_rele_bitmap: + xfs_irele(mp->m_rbmip); + return error; } void diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index f029c6702dda..0c4b73e9b29d 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1110,7 +1110,7 @@ xfs_fs_put_super( if (!sb->s_fs_info) return; - xfs_notice(mp, "Unmounting Filesystem"); + xfs_notice(mp, "Unmounting Filesystem %pU", &mp->m_sb.sb_uuid); xfs_filestream_unmount(mp); xfs_unmountfs(mp); @@ -2028,18 +2028,14 @@ xfs_init_caches(void) goto out_destroy_trans_cache; xfs_efd_cache = kmem_cache_create("xfs_efd_item", - (sizeof(struct xfs_efd_log_item) + - (XFS_EFD_MAX_FAST_EXTENTS - 1) * - sizeof(struct xfs_extent)), - 0, 0, NULL); + xfs_efd_log_item_sizeof(XFS_EFD_MAX_FAST_EXTENTS), + 0, 0, NULL); if (!xfs_efd_cache) goto out_destroy_buf_item_cache; xfs_efi_cache = kmem_cache_create("xfs_efi_item", - (sizeof(struct xfs_efi_log_item) + - (XFS_EFI_MAX_FAST_EXTENTS - 1) * - sizeof(struct xfs_extent)), - 0, 0, NULL); + xfs_efi_log_item_sizeof(XFS_EFI_MAX_FAST_EXTENTS), + 0, 0, NULL); if (!xfs_efi_cache) goto out_destroy_efd_cache; diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h index 43585850f154..513095e353a5 100644 --- a/fs/xfs/xfs_sysfs.h +++ b/fs/xfs/xfs_sysfs.h @@ -33,10 +33,15 @@ xfs_sysfs_init( const char *name) { struct kobject *parent; + int err; parent = parent_kobj ? &parent_kobj->kobject : NULL; init_completion(&kobj->complete); - return kobject_init_and_add(&kobj->kobject, ktype, parent, "%s", name); + err = kobject_init_and_add(&kobj->kobject, ktype, parent, "%s", name); + if (err) + kobject_put(&kobj->kobject); + + return err; } static inline void diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c index d269ef57ff01..8a5dc1538aa8 100644 --- a/fs/xfs/xfs_trace.c +++ b/fs/xfs/xfs_trace.c @@ -34,6 +34,8 @@ #include "xfs_ag.h" #include "xfs_ag_resv.h" #include "xfs_error.h" +#include <linux/iomap.h> +#include "xfs_iomap.h" /* * We include this last to have the helpers above available for the trace diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index cb7c81ba7fa3..421d1e504ac4 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -799,6 +799,9 @@ TRACE_DEFINE_ENUM(PE_SIZE_PTE); TRACE_DEFINE_ENUM(PE_SIZE_PMD); TRACE_DEFINE_ENUM(PE_SIZE_PUD); +TRACE_DEFINE_ENUM(XFS_REFC_DOMAIN_SHARED); +TRACE_DEFINE_ENUM(XFS_REFC_DOMAIN_COW); + TRACE_EVENT(xfs_filemap_fault, TP_PROTO(struct xfs_inode *ip, enum page_entry_size pe_size, bool write_fault), @@ -2925,6 +2928,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_class, TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) + __field(enum xfs_refc_domain, domain) __field(xfs_agblock_t, startblock) __field(xfs_extlen_t, blockcount) __field(xfs_nlink_t, refcount) @@ -2932,13 +2936,15 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_class, TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = agno; + __entry->domain = irec->rc_domain; __entry->startblock = irec->rc_startblock; __entry->blockcount = irec->rc_blockcount; __entry->refcount = irec->rc_refcount; ), - TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u", + TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, + __print_symbolic(__entry->domain, XFS_REFC_DOMAIN_STRINGS), __entry->startblock, __entry->blockcount, __entry->refcount) @@ -2958,6 +2964,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_at_class, TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) + __field(enum xfs_refc_domain, domain) __field(xfs_agblock_t, startblock) __field(xfs_extlen_t, blockcount) __field(xfs_nlink_t, refcount) @@ -2966,14 +2973,16 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_at_class, TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = agno; + __entry->domain = irec->rc_domain; __entry->startblock = irec->rc_startblock; __entry->blockcount = irec->rc_blockcount; __entry->refcount = irec->rc_refcount; __entry->agbno = agbno; ), - TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u @ agbno 0x%x", + TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u @ agbno 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, + __print_symbolic(__entry->domain, XFS_REFC_DOMAIN_STRINGS), __entry->startblock, __entry->blockcount, __entry->refcount, @@ -2994,9 +3003,11 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_class, TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) + __field(enum xfs_refc_domain, i1_domain) __field(xfs_agblock_t, i1_startblock) __field(xfs_extlen_t, i1_blockcount) __field(xfs_nlink_t, i1_refcount) + __field(enum xfs_refc_domain, i2_domain) __field(xfs_agblock_t, i2_startblock) __field(xfs_extlen_t, i2_blockcount) __field(xfs_nlink_t, i2_refcount) @@ -3004,20 +3015,24 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_class, TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = agno; + __entry->i1_domain = i1->rc_domain; __entry->i1_startblock = i1->rc_startblock; __entry->i1_blockcount = i1->rc_blockcount; __entry->i1_refcount = i1->rc_refcount; + __entry->i2_domain = i2->rc_domain; __entry->i2_startblock = i2->rc_startblock; __entry->i2_blockcount = i2->rc_blockcount; __entry->i2_refcount = i2->rc_refcount; ), - TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u -- " - "agbno 0x%x fsbcount 0x%x refcount %u", + TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u -- " + "dom %s agbno 0x%x fsbcount 0x%x refcount %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, + __print_symbolic(__entry->i1_domain, XFS_REFC_DOMAIN_STRINGS), __entry->i1_startblock, __entry->i1_blockcount, __entry->i1_refcount, + __print_symbolic(__entry->i2_domain, XFS_REFC_DOMAIN_STRINGS), __entry->i2_startblock, __entry->i2_blockcount, __entry->i2_refcount) @@ -3038,9 +3053,11 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_at_class, TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) + __field(enum xfs_refc_domain, i1_domain) __field(xfs_agblock_t, i1_startblock) __field(xfs_extlen_t, i1_blockcount) __field(xfs_nlink_t, i1_refcount) + __field(enum xfs_refc_domain, i2_domain) __field(xfs_agblock_t, i2_startblock) __field(xfs_extlen_t, i2_blockcount) __field(xfs_nlink_t, i2_refcount) @@ -3049,21 +3066,25 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_at_class, TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = agno; + __entry->i1_domain = i1->rc_domain; __entry->i1_startblock = i1->rc_startblock; __entry->i1_blockcount = i1->rc_blockcount; __entry->i1_refcount = i1->rc_refcount; + __entry->i2_domain = i2->rc_domain; __entry->i2_startblock = i2->rc_startblock; __entry->i2_blockcount = i2->rc_blockcount; __entry->i2_refcount = i2->rc_refcount; __entry->agbno = agbno; ), - TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u -- " - "agbno 0x%x fsbcount 0x%x refcount %u @ agbno 0x%x", + TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u -- " + "dom %s agbno 0x%x fsbcount 0x%x refcount %u @ agbno 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, + __print_symbolic(__entry->i1_domain, XFS_REFC_DOMAIN_STRINGS), __entry->i1_startblock, __entry->i1_blockcount, __entry->i1_refcount, + __print_symbolic(__entry->i2_domain, XFS_REFC_DOMAIN_STRINGS), __entry->i2_startblock, __entry->i2_blockcount, __entry->i2_refcount, @@ -3086,12 +3107,15 @@ DECLARE_EVENT_CLASS(xfs_refcount_triple_extent_class, TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) + __field(enum xfs_refc_domain, i1_domain) __field(xfs_agblock_t, i1_startblock) __field(xfs_extlen_t, i1_blockcount) __field(xfs_nlink_t, i1_refcount) + __field(enum xfs_refc_domain, i2_domain) __field(xfs_agblock_t, i2_startblock) __field(xfs_extlen_t, i2_blockcount) __field(xfs_nlink_t, i2_refcount) + __field(enum xfs_refc_domain, i3_domain) __field(xfs_agblock_t, i3_startblock) __field(xfs_extlen_t, i3_blockcount) __field(xfs_nlink_t, i3_refcount) @@ -3099,27 +3123,33 @@ DECLARE_EVENT_CLASS(xfs_refcount_triple_extent_class, TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = agno; + __entry->i1_domain = i1->rc_domain; __entry->i1_startblock = i1->rc_startblock; __entry->i1_blockcount = i1->rc_blockcount; __entry->i1_refcount = i1->rc_refcount; + __entry->i2_domain = i2->rc_domain; __entry->i2_startblock = i2->rc_startblock; __entry->i2_blockcount = i2->rc_blockcount; __entry->i2_refcount = i2->rc_refcount; + __entry->i3_domain = i3->rc_domain; __entry->i3_startblock = i3->rc_startblock; __entry->i3_blockcount = i3->rc_blockcount; __entry->i3_refcount = i3->rc_refcount; ), - TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u -- " - "agbno 0x%x fsbcount 0x%x refcount %u -- " - "agbno 0x%x fsbcount 0x%x refcount %u", + TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u -- " + "dom %s agbno 0x%x fsbcount 0x%x refcount %u -- " + "dom %s agbno 0x%x fsbcount 0x%x refcount %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, + __print_symbolic(__entry->i1_domain, XFS_REFC_DOMAIN_STRINGS), __entry->i1_startblock, __entry->i1_blockcount, __entry->i1_refcount, + __print_symbolic(__entry->i2_domain, XFS_REFC_DOMAIN_STRINGS), __entry->i2_startblock, __entry->i2_blockcount, __entry->i2_refcount, + __print_symbolic(__entry->i3_domain, XFS_REFC_DOMAIN_STRINGS), __entry->i3_startblock, __entry->i3_blockcount, __entry->i3_refcount) @@ -3322,6 +3352,92 @@ DEFINE_EVENT(xfs_inode_irec_class, name, \ TP_PROTO(struct xfs_inode *ip, struct xfs_bmbt_irec *irec), \ TP_ARGS(ip, irec)) +/* inode iomap invalidation events */ +DECLARE_EVENT_CLASS(xfs_wb_invalid_class, + TP_PROTO(struct xfs_inode *ip, const struct iomap *iomap, unsigned int wpcseq, int whichfork), + TP_ARGS(ip, iomap, wpcseq, whichfork), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(u64, addr) + __field(loff_t, pos) + __field(u64, len) + __field(u16, type) + __field(u16, flags) + __field(u32, wpcseq) + __field(u32, forkseq) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->addr = iomap->addr; + __entry->pos = iomap->offset; + __entry->len = iomap->length; + __entry->type = iomap->type; + __entry->flags = iomap->flags; + __entry->wpcseq = wpcseq; + __entry->forkseq = READ_ONCE(xfs_ifork_ptr(ip, whichfork)->if_seq); + ), + TP_printk("dev %d:%d ino 0x%llx pos 0x%llx addr 0x%llx bytecount 0x%llx type 0x%x flags 0x%x wpcseq 0x%x forkseq 0x%x", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->pos, + __entry->addr, + __entry->len, + __entry->type, + __entry->flags, + __entry->wpcseq, + __entry->forkseq) +); +#define DEFINE_WB_INVALID_EVENT(name) \ +DEFINE_EVENT(xfs_wb_invalid_class, name, \ + TP_PROTO(struct xfs_inode *ip, const struct iomap *iomap, unsigned int wpcseq, int whichfork), \ + TP_ARGS(ip, iomap, wpcseq, whichfork)) +DEFINE_WB_INVALID_EVENT(xfs_wb_cow_iomap_invalid); +DEFINE_WB_INVALID_EVENT(xfs_wb_data_iomap_invalid); + +DECLARE_EVENT_CLASS(xfs_iomap_invalid_class, + TP_PROTO(struct xfs_inode *ip, const struct iomap *iomap), + TP_ARGS(ip, iomap), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(u64, addr) + __field(loff_t, pos) + __field(u64, len) + __field(u64, validity_cookie) + __field(u64, inodeseq) + __field(u16, type) + __field(u16, flags) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->addr = iomap->addr; + __entry->pos = iomap->offset; + __entry->len = iomap->length; + __entry->validity_cookie = iomap->validity_cookie; + __entry->type = iomap->type; + __entry->flags = iomap->flags; + __entry->inodeseq = xfs_iomap_inode_sequence(ip, iomap->flags); + ), + TP_printk("dev %d:%d ino 0x%llx pos 0x%llx addr 0x%llx bytecount 0x%llx type 0x%x flags 0x%x validity_cookie 0x%llx inodeseq 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->pos, + __entry->addr, + __entry->len, + __entry->type, + __entry->flags, + __entry->validity_cookie, + __entry->inodeseq) +); +#define DEFINE_IOMAP_INVALID_EVENT(name) \ +DEFINE_EVENT(xfs_iomap_invalid_class, name, \ + TP_PROTO(struct xfs_inode *ip, const struct iomap *iomap), \ + TP_ARGS(ip, iomap)) +DEFINE_IOMAP_INVALID_EVENT(xfs_iomap_invalid); + /* refcount/reflink tracepoint definitions */ /* reflink tracepoints */ diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 16fbf2a1144c..7d4109af193e 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -422,7 +422,7 @@ xfsaild_push( struct xfs_ail_cursor cur; struct xfs_log_item *lip; xfs_lsn_t lsn; - xfs_lsn_t target; + xfs_lsn_t target = NULLCOMMITLSN; long tout; int stuck = 0; int flushing = 0; @@ -472,6 +472,8 @@ xfsaild_push( XFS_STATS_INC(mp, xs_push_ail); + ASSERT(target != NULLCOMMITLSN); + lsn = lip->li_lsn; while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) { int lock_result; @@ -730,11 +732,10 @@ void xfs_ail_push_all_sync( struct xfs_ail *ailp) { - struct xfs_log_item *lip; DEFINE_WAIT(wait); spin_lock(&ailp->ail_lock); - while ((lip = xfs_ail_max(ailp)) != NULL) { + while (xfs_ail_max(ailp) != NULL) { prepare_to_wait(&ailp->ail_empty, &wait, TASK_UNINTERRUPTIBLE); wake_up_process(ailp->ail_task); spin_unlock(&ailp->ail_lock); diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index c325a28b89a8..10aa1fd39d2b 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -210,7 +210,7 @@ __xfs_xattr_put_listent( return; } offset = context->buffer + context->count; - strncpy(offset, prefix, prefix_len); + memcpy(offset, prefix, prefix_len); offset += prefix_len; strncpy(offset, (char *)name, namelen); /* real name */ offset += namelen; |