summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/export.c8
-rw-r--r--fs/btrfs/extent_io.c23
-rw-r--r--fs/cramfs/inode.c11
-rw-r--r--fs/eventpoll.c139
-rw-r--r--fs/ext4/ext4.h12
-rw-r--r--fs/ext4/file.c2
-rw-r--r--fs/ext4/fsmap.c14
-rw-r--r--fs/ext4/indirect.c2
-rw-r--r--fs/ext4/inode.c12
-rw-r--r--fs/ext4/move_extent.c2
-rw-r--r--fs/ext4/orphan.c23
-rw-r--r--fs/ext4/super.c30
-rw-r--r--fs/ext4/xattr.c19
-rw-r--r--fs/f2fs/data.c9
-rw-r--r--fs/f2fs/f2fs.h4
-rw-r--r--fs/f2fs/file.c49
-rw-r--r--fs/file.c5
-rw-r--r--fs/fs-writeback.c32
-rw-r--r--fs/fsopen.c70
-rw-r--r--fs/fuse/dev.c2
-rw-r--r--fs/fuse/file.c8
-rw-r--r--fs/gfs2/glock.c2
-rw-r--r--fs/minix/inode.c8
-rw-r--r--fs/namei.c8
-rw-r--r--fs/namespace.c108
-rw-r--r--fs/nfs/callback.c4
-rw-r--r--fs/nfs/callback_xdr.c1
-rw-r--r--fs/nfs/nfs4proc.c2
-rw-r--r--fs/nfsd/export.c96
-rw-r--r--fs/nfsd/export.h6
-rw-r--r--fs/nfsd/lockd.c28
-rw-r--r--fs/nfsd/netns.h4
-rw-r--r--fs/nfsd/nfs4proc.c4
-rw-r--r--fs/nfsd/nfs4state.c6
-rw-r--r--fs/nfsd/nfs4xdr.c2
-rw-r--r--fs/nfsd/nfsfh.c40
-rw-r--r--fs/nfsd/trace.h2
-rw-r--r--fs/nfsd/vfs.c14
-rw-r--r--fs/nfsd/vfs.h2
-rw-r--r--fs/ntfs3/bitmap.c1
-rw-r--r--fs/ntfs3/index.c10
-rw-r--r--fs/ntfs3/run.c12
-rw-r--r--fs/ocfs2/stack_user.c1
-rw-r--r--fs/quota/dquot.c10
-rw-r--r--fs/read_write.c14
-rw-r--r--fs/smb/client/smb1ops.c62
-rw-r--r--fs/smb/client/smb2inode.c22
-rw-r--r--fs/smb/client/smb2ops.c27
-rw-r--r--fs/smb/server/ksmbd_netlink.h5
-rw-r--r--fs/smb/server/mgmt/user_session.c26
-rw-r--r--fs/smb/server/server.h1
-rw-r--r--fs/smb/server/smb2pdu.c3
-rw-r--r--fs/smb/server/transport_ipc.c3
-rw-r--r--fs/smb/server/transport_rdma.c99
-rw-r--r--fs/smb/server/transport_tcp.c27
-rw-r--r--fs/squashfs/inode.c31
-rw-r--r--fs/squashfs/squashfs_fs_i.h2
-rw-r--r--fs/udf/inode.c3
58 files changed, 760 insertions, 412 deletions
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index e2b22bea348a..c403117ac013 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -23,7 +23,11 @@ static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
int type;
if (parent && (len < BTRFS_FID_SIZE_CONNECTABLE)) {
- *max_len = BTRFS_FID_SIZE_CONNECTABLE;
+ if (btrfs_root_id(BTRFS_I(inode)->root) !=
+ btrfs_root_id(BTRFS_I(parent)->root))
+ *max_len = BTRFS_FID_SIZE_CONNECTABLE_ROOT;
+ else
+ *max_len = BTRFS_FID_SIZE_CONNECTABLE;
return FILEID_INVALID;
} else if (len < BTRFS_FID_SIZE_NON_CONNECTABLE) {
*max_len = BTRFS_FID_SIZE_NON_CONNECTABLE;
@@ -45,6 +49,8 @@ static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
parent_root_id = btrfs_root_id(BTRFS_I(parent)->root);
if (parent_root_id != fid->root_objectid) {
+ if (*max_len < BTRFS_FID_SIZE_CONNECTABLE_ROOT)
+ return FILEID_INVALID;
fid->parent_root_objectid = parent_root_id;
len = BTRFS_FID_SIZE_CONNECTABLE_ROOT;
type = FILEID_BTRFS_WITH_PARENT_ROOT;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index afebc91882be..b310a07a84ad 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -355,6 +355,13 @@ again:
/* step one, find a bunch of delalloc bytes starting at start */
delalloc_start = *start;
delalloc_end = 0;
+
+ /*
+ * If @max_bytes is smaller than a block, btrfs_find_delalloc_range() can
+ * return early without handling any dirty ranges.
+ */
+ ASSERT(max_bytes >= fs_info->sectorsize);
+
found = btrfs_find_delalloc_range(tree, &delalloc_start, &delalloc_end,
max_bytes, &cached_state);
if (!found || delalloc_end <= *start || delalloc_start > orig_end) {
@@ -385,13 +392,14 @@ again:
delalloc_end);
ASSERT(!ret || ret == -EAGAIN);
if (ret == -EAGAIN) {
- /* some of the folios are gone, lets avoid looping by
- * shortening the size of the delalloc range we're searching
+ /*
+ * Some of the folios are gone, lets avoid looping by
+ * shortening the size of the delalloc range we're searching.
*/
free_extent_state(cached_state);
cached_state = NULL;
if (!loops) {
- max_bytes = PAGE_SIZE;
+ max_bytes = fs_info->sectorsize;
loops = 1;
goto again;
} else {
@@ -1479,7 +1487,7 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
struct btrfs_fs_info *fs_info = inode->root->fs_info;
unsigned long range_bitmap = 0;
bool submitted_io = false;
- bool error = false;
+ int found_error = 0;
const u64 folio_start = folio_pos(folio);
u64 cur;
int bit;
@@ -1536,7 +1544,8 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
*/
btrfs_mark_ordered_io_finished(inode, folio, cur,
fs_info->sectorsize, false);
- error = true;
+ if (!found_error)
+ found_error = ret;
continue;
}
submitted_io = true;
@@ -1553,11 +1562,11 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
* If we hit any error, the corresponding sector will still be dirty
* thus no need to clear PAGECACHE_TAG_DIRTY.
*/
- if (!submitted_io && !error) {
+ if (!submitted_io && !found_error) {
btrfs_folio_set_writeback(fs_info, folio, start, len);
btrfs_folio_clear_writeback(fs_info, folio, start, len);
}
- return ret;
+ return found_error;
}
/*
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index b84d1747a020..e7d192f7ab3b 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -117,9 +117,18 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
inode_nohighmem(inode);
inode->i_data.a_ops = &cramfs_aops;
break;
- default:
+ case S_IFCHR:
+ case S_IFBLK:
+ case S_IFIFO:
+ case S_IFSOCK:
init_special_inode(inode, cramfs_inode->mode,
old_decode_dev(cramfs_inode->size));
+ break;
+ default:
+ printk(KERN_DEBUG "CRAMFS: Invalid file type 0%04o for inode %lu.\n",
+ inode->i_mode, inode->i_ino);
+ iget_failed(inode);
+ return ERR_PTR(-EIO);
}
inode->i_mode = cramfs_inode->mode;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 075fee4ba29b..8711fac20804 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -46,10 +46,10 @@
*
* 1) epnested_mutex (mutex)
* 2) ep->mtx (mutex)
- * 3) ep->lock (rwlock)
+ * 3) ep->lock (spinlock)
*
* The acquire order is the one listed above, from 1 to 3.
- * We need a rwlock (ep->lock) because we manipulate objects
+ * We need a spinlock (ep->lock) because we manipulate objects
* from inside the poll callback, that might be triggered from
* a wake_up() that in turn might be called from IRQ context.
* So we can't sleep inside the poll callback and hence we need
@@ -195,7 +195,7 @@ struct eventpoll {
struct list_head rdllist;
/* Lock which protects rdllist and ovflist */
- rwlock_t lock;
+ spinlock_t lock;
/* RB tree root used to store monitored fd structs */
struct rb_root_cached rbr;
@@ -713,10 +713,10 @@ static void ep_start_scan(struct eventpoll *ep, struct list_head *txlist)
* in a lockless way.
*/
lockdep_assert_irqs_enabled();
- write_lock_irq(&ep->lock);
+ spin_lock_irq(&ep->lock);
list_splice_init(&ep->rdllist, txlist);
WRITE_ONCE(ep->ovflist, NULL);
- write_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->lock);
}
static void ep_done_scan(struct eventpoll *ep,
@@ -724,7 +724,7 @@ static void ep_done_scan(struct eventpoll *ep,
{
struct epitem *epi, *nepi;
- write_lock_irq(&ep->lock);
+ spin_lock_irq(&ep->lock);
/*
* During the time we spent inside the "sproc" callback, some
* other events might have been queued by the poll callback.
@@ -765,7 +765,7 @@ static void ep_done_scan(struct eventpoll *ep,
wake_up(&ep->wq);
}
- write_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->lock);
}
static void ep_get(struct eventpoll *ep)
@@ -839,10 +839,10 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force)
rb_erase_cached(&epi->rbn, &ep->rbr);
- write_lock_irq(&ep->lock);
+ spin_lock_irq(&ep->lock);
if (ep_is_linked(epi))
list_del_init(&epi->rdllink);
- write_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->lock);
wakeup_source_unregister(ep_wakeup_source(epi));
/*
@@ -1123,7 +1123,7 @@ static int ep_alloc(struct eventpoll **pep)
return -ENOMEM;
mutex_init(&ep->mtx);
- rwlock_init(&ep->lock);
+ spin_lock_init(&ep->lock);
init_waitqueue_head(&ep->wq);
init_waitqueue_head(&ep->poll_wait);
INIT_LIST_HEAD(&ep->rdllist);
@@ -1211,99 +1211,9 @@ struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd,
#endif /* CONFIG_KCMP */
/*
- * Adds a new entry to the tail of the list in a lockless way, i.e.
- * multiple CPUs are allowed to call this function concurrently.
- *
- * Beware: it is necessary to prevent any other modifications of the
- * existing list until all changes are completed, in other words
- * concurrent list_add_tail_lockless() calls should be protected
- * with a read lock, where write lock acts as a barrier which
- * makes sure all list_add_tail_lockless() calls are fully
- * completed.
- *
- * Also an element can be locklessly added to the list only in one
- * direction i.e. either to the tail or to the head, otherwise
- * concurrent access will corrupt the list.
- *
- * Return: %false if element has been already added to the list, %true
- * otherwise.
- */
-static inline bool list_add_tail_lockless(struct list_head *new,
- struct list_head *head)
-{
- struct list_head *prev;
-
- /*
- * This is simple 'new->next = head' operation, but cmpxchg()
- * is used in order to detect that same element has been just
- * added to the list from another CPU: the winner observes
- * new->next == new.
- */
- if (!try_cmpxchg(&new->next, &new, head))
- return false;
-
- /*
- * Initially ->next of a new element must be updated with the head
- * (we are inserting to the tail) and only then pointers are atomically
- * exchanged. XCHG guarantees memory ordering, thus ->next should be
- * updated before pointers are actually swapped and pointers are
- * swapped before prev->next is updated.
- */
-
- prev = xchg(&head->prev, new);
-
- /*
- * It is safe to modify prev->next and new->prev, because a new element
- * is added only to the tail and new->next is updated before XCHG.
- */
-
- prev->next = new;
- new->prev = prev;
-
- return true;
-}
-
-/*
- * Chains a new epi entry to the tail of the ep->ovflist in a lockless way,
- * i.e. multiple CPUs are allowed to call this function concurrently.
- *
- * Return: %false if epi element has been already chained, %true otherwise.
- */
-static inline bool chain_epi_lockless(struct epitem *epi)
-{
- struct eventpoll *ep = epi->ep;
-
- /* Fast preliminary check */
- if (epi->next != EP_UNACTIVE_PTR)
- return false;
-
- /* Check that the same epi has not been just chained from another CPU */
- if (cmpxchg(&epi->next, EP_UNACTIVE_PTR, NULL) != EP_UNACTIVE_PTR)
- return false;
-
- /* Atomically exchange tail */
- epi->next = xchg(&ep->ovflist, epi);
-
- return true;
-}
-
-/*
* This is the callback that is passed to the wait queue wakeup
* mechanism. It is called by the stored file descriptors when they
* have events to report.
- *
- * This callback takes a read lock in order not to contend with concurrent
- * events from another file descriptor, thus all modifications to ->rdllist
- * or ->ovflist are lockless. Read lock is paired with the write lock from
- * ep_start/done_scan(), which stops all list modifications and guarantees
- * that lists state is seen correctly.
- *
- * Another thing worth to mention is that ep_poll_callback() can be called
- * concurrently for the same @epi from different CPUs if poll table was inited
- * with several wait queues entries. Plural wakeup from different CPUs of a
- * single wait queue is serialized by wq.lock, but the case when multiple wait
- * queues are used should be detected accordingly. This is detected using
- * cmpxchg() operation.
*/
static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
{
@@ -1314,7 +1224,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
unsigned long flags;
int ewake = 0;
- read_lock_irqsave(&ep->lock, flags);
+ spin_lock_irqsave(&ep->lock, flags);
ep_set_busy_poll_napi_id(epi);
@@ -1343,12 +1253,15 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
* chained in ep->ovflist and requeued later on.
*/
if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) {
- if (chain_epi_lockless(epi))
+ if (epi->next == EP_UNACTIVE_PTR) {
+ epi->next = READ_ONCE(ep->ovflist);
+ WRITE_ONCE(ep->ovflist, epi);
ep_pm_stay_awake_rcu(epi);
+ }
} else if (!ep_is_linked(epi)) {
/* In the usual case, add event to ready list. */
- if (list_add_tail_lockless(&epi->rdllink, &ep->rdllist))
- ep_pm_stay_awake_rcu(epi);
+ list_add_tail(&epi->rdllink, &ep->rdllist);
+ ep_pm_stay_awake_rcu(epi);
}
/*
@@ -1381,7 +1294,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
pwake++;
out_unlock:
- read_unlock_irqrestore(&ep->lock, flags);
+ spin_unlock_irqrestore(&ep->lock, flags);
/* We have to call this outside the lock */
if (pwake)
@@ -1716,7 +1629,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
}
/* We have to drop the new item inside our item list to keep track of it */
- write_lock_irq(&ep->lock);
+ spin_lock_irq(&ep->lock);
/* record NAPI ID of new item if present */
ep_set_busy_poll_napi_id(epi);
@@ -1733,7 +1646,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
pwake++;
}
- write_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->lock);
/* We have to call this outside the lock */
if (pwake)
@@ -1797,7 +1710,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi,
* list, push it inside.
*/
if (ep_item_poll(epi, &pt, 1)) {
- write_lock_irq(&ep->lock);
+ spin_lock_irq(&ep->lock);
if (!ep_is_linked(epi)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
ep_pm_stay_awake(epi);
@@ -1808,7 +1721,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi,
if (waitqueue_active(&ep->poll_wait))
pwake++;
}
- write_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->lock);
}
/* We have to call this outside the lock */
@@ -2041,7 +1954,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
init_wait(&wait);
wait.func = ep_autoremove_wake_function;
- write_lock_irq(&ep->lock);
+ spin_lock_irq(&ep->lock);
/*
* Barrierless variant, waitqueue_active() is called under
* the same lock on wakeup ep_poll_callback() side, so it
@@ -2060,7 +1973,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
if (!eavail)
__add_wait_queue_exclusive(&ep->wq, &wait);
- write_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->lock);
if (!eavail)
timed_out = !schedule_hrtimeout_range(to, slack,
@@ -2075,7 +1988,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
eavail = 1;
if (!list_empty_careful(&wait.entry)) {
- write_lock_irq(&ep->lock);
+ spin_lock_irq(&ep->lock);
/*
* If the thread timed out and is not on the wait queue,
* it means that the thread was woken up after its
@@ -2086,7 +1999,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
if (timed_out)
eavail = list_empty(&wait.entry);
__remove_wait_queue(&ep->wq, &wait);
- write_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->lock);
}
}
}
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index d8120b88fa00..d8a059ec1ad6 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1970,6 +1970,16 @@ static inline bool ext4_verity_in_progress(struct inode *inode)
#define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime
/*
+ * Check whether the inode is tracked as orphan (either in orphan file or
+ * orphan list).
+ */
+static inline bool ext4_inode_orphan_tracked(struct inode *inode)
+{
+ return ext4_test_inode_state(inode, EXT4_STATE_ORPHAN_FILE) ||
+ !list_empty(&EXT4_I(inode)->i_orphan);
+}
+
+/*
* Codes for operating systems
*/
#define EXT4_OS_LINUX 0
@@ -3097,6 +3107,8 @@ extern struct buffer_head *ext4_sb_bread(struct super_block *sb,
sector_t block, blk_opf_t op_flags);
extern struct buffer_head *ext4_sb_bread_unmovable(struct super_block *sb,
sector_t block);
+extern struct buffer_head *ext4_sb_bread_nofail(struct super_block *sb,
+ sector_t block);
extern void ext4_read_bh_nowait(struct buffer_head *bh, blk_opf_t op_flags,
bh_end_io_t *end_io, bool simu_fail);
extern int ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags,
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 6c692151b0d6..7d949ed0ab5f 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -354,7 +354,7 @@ static void ext4_inode_extension_cleanup(struct inode *inode, bool need_trunc)
* to cleanup the orphan list in ext4_handle_inode_extension(). Do it
* now.
*/
- if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) {
+ if (ext4_inode_orphan_tracked(inode) && inode->i_nlink) {
handle_t *handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
if (IS_ERR(handle)) {
diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c
index 91185c40f755..22fc333244ef 100644
--- a/fs/ext4/fsmap.c
+++ b/fs/ext4/fsmap.c
@@ -74,7 +74,8 @@ static int ext4_getfsmap_dev_compare(const void *p1, const void *p2)
static bool ext4_getfsmap_rec_before_low_key(struct ext4_getfsmap_info *info,
struct ext4_fsmap *rec)
{
- return rec->fmr_physical < info->gfi_low.fmr_physical;
+ return rec->fmr_physical + rec->fmr_length <=
+ info->gfi_low.fmr_physical;
}
/*
@@ -200,15 +201,18 @@ static int ext4_getfsmap_meta_helper(struct super_block *sb,
ext4_group_first_block_no(sb, agno));
fs_end = fs_start + EXT4_C2B(sbi, len);
- /* Return relevant extents from the meta_list */
+ /*
+ * Return relevant extents from the meta_list. We emit all extents that
+ * partially/fully overlap with the query range
+ */
list_for_each_entry_safe(p, tmp, &info->gfi_meta_list, fmr_list) {
- if (p->fmr_physical < info->gfi_next_fsblk) {
+ if (p->fmr_physical + p->fmr_length <= info->gfi_next_fsblk) {
list_del(&p->fmr_list);
kfree(p);
continue;
}
- if (p->fmr_physical <= fs_start ||
- p->fmr_physical + p->fmr_length <= fs_end) {
+ if (p->fmr_physical <= fs_end &&
+ p->fmr_physical + p->fmr_length > fs_start) {
/* Emit the retained free extent record if present */
if (info->gfi_lastfree.fmr_owner) {
error = ext4_getfsmap_helper(sb, info,
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index d45124318200..da76353b3a57 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -1025,7 +1025,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
}
/* Go read the buffer for the next level down */
- bh = ext4_sb_bread(inode->i_sb, nr, 0);
+ bh = ext4_sb_bread_nofail(inode->i_sb, nr);
/*
* A read failure? Report error and clear slot
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7923602271ad..6af572425596 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3859,7 +3859,11 @@ int ext4_can_truncate(struct inode *inode)
* We have to make sure i_disksize gets properly updated before we truncate
* page cache due to hole punching or zero range. Otherwise i_disksize update
* can get lost as it may have been postponed to submission of writeback but
- * that will never happen after we truncate page cache.
+ * that will never happen if we remove the folio containing i_size from the
+ * page cache. Also if we punch hole within i_size but above i_disksize,
+ * following ext4_page_mkwrite() may mistakenly allocate written blocks over
+ * the hole and thus introduce allocated blocks beyond i_disksize which is
+ * not allowed (e2fsck would complain in case of crash).
*/
int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
loff_t len)
@@ -3870,9 +3874,11 @@ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
loff_t size = i_size_read(inode);
WARN_ON(!inode_is_locked(inode));
- if (offset > size || offset + len < size)
+ if (offset > size)
return 0;
+ if (offset + len < size)
+ size = offset + len;
if (EXT4_I(inode)->i_disksize >= size)
return 0;
@@ -4330,7 +4336,7 @@ static int ext4_fill_raw_inode(struct inode *inode, struct ext4_inode *raw_inode
* old inodes get re-used with the upper 16 bits of the
* uid/gid intact.
*/
- if (ei->i_dtime && list_empty(&ei->i_orphan)) {
+ if (ei->i_dtime && !ext4_inode_orphan_tracked(inode)) {
raw_inode->i_uid_high = 0;
raw_inode->i_gid_high = 0;
} else {
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 898443e98efc..a4c94eabc78e 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -225,7 +225,7 @@ static int mext_page_mkuptodate(struct folio *folio, size_t from, size_t to)
do {
if (bh_offset(bh) + blocksize <= from)
continue;
- if (bh_offset(bh) > to)
+ if (bh_offset(bh) >= to)
break;
wait_on_buffer(bh);
if (buffer_uptodate(bh))
diff --git a/fs/ext4/orphan.c b/fs/ext4/orphan.c
index a23b0c01f809..05997b4d0120 100644
--- a/fs/ext4/orphan.c
+++ b/fs/ext4/orphan.c
@@ -109,11 +109,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
!inode_is_locked(inode));
- /*
- * Inode orphaned in orphan file or in orphan list?
- */
- if (ext4_test_inode_state(inode, EXT4_STATE_ORPHAN_FILE) ||
- !list_empty(&EXT4_I(inode)->i_orphan))
+ if (ext4_inode_orphan_tracked(inode))
return 0;
/*
@@ -517,7 +513,7 @@ void ext4_release_orphan_info(struct super_block *sb)
return;
for (i = 0; i < oi->of_blocks; i++)
brelse(oi->of_binfo[i].ob_bh);
- kfree(oi->of_binfo);
+ kvfree(oi->of_binfo);
}
static struct ext4_orphan_block_tail *ext4_orphan_block_tail(
@@ -588,9 +584,20 @@ int ext4_init_orphan_info(struct super_block *sb)
ext4_msg(sb, KERN_ERR, "get orphan inode failed");
return PTR_ERR(inode);
}
+ /*
+ * This is just an artificial limit to prevent corrupted fs from
+ * consuming absurd amounts of memory when pinning blocks of orphan
+ * file in memory.
+ */
+ if (inode->i_size > 8 << 20) {
+ ext4_msg(sb, KERN_ERR, "orphan file too big: %llu",
+ (unsigned long long)inode->i_size);
+ ret = -EFSCORRUPTED;
+ goto out_put;
+ }
oi->of_blocks = inode->i_size >> sb->s_blocksize_bits;
oi->of_csum_seed = EXT4_I(inode)->i_csum_seed;
- oi->of_binfo = kmalloc_array(oi->of_blocks,
+ oi->of_binfo = kvmalloc_array(oi->of_blocks,
sizeof(struct ext4_orphan_block),
GFP_KERNEL);
if (!oi->of_binfo) {
@@ -631,7 +638,7 @@ int ext4_init_orphan_info(struct super_block *sb)
out_free:
for (i--; i >= 0; i--)
brelse(oi->of_binfo[i].ob_bh);
- kfree(oi->of_binfo);
+ kvfree(oi->of_binfo);
out_put:
iput(inode);
return ret;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 58d125ad2371..78ecdedadb07 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -266,6 +266,15 @@ struct buffer_head *ext4_sb_bread_unmovable(struct super_block *sb,
return __ext4_sb_bread_gfp(sb, block, 0, gfp);
}
+struct buffer_head *ext4_sb_bread_nofail(struct super_block *sb,
+ sector_t block)
+{
+ gfp_t gfp = mapping_gfp_constraint(sb->s_bdev->bd_mapping,
+ ~__GFP_FS) | __GFP_MOVABLE | __GFP_NOFAIL;
+
+ return __ext4_sb_bread_gfp(sb, block, 0, gfp);
+}
+
void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block)
{
struct buffer_head *bh = bdev_getblk(sb->s_bdev, block,
@@ -1461,9 +1470,9 @@ static void ext4_free_in_core_inode(struct inode *inode)
static void ext4_destroy_inode(struct inode *inode)
{
- if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
+ if (ext4_inode_orphan_tracked(inode)) {
ext4_msg(inode->i_sb, KERN_ERR,
- "Inode %lu (%p): orphan list check failed!",
+ "Inode %lu (%p): inode tracked as orphan!",
inode->i_ino, EXT4_I(inode));
print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
EXT4_I(inode), sizeof(struct ext4_inode_info),
@@ -2484,7 +2493,7 @@ static int parse_apply_sb_mount_options(struct super_block *sb,
struct ext4_fs_context *m_ctx)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- char *s_mount_opts = NULL;
+ char s_mount_opts[65];
struct ext4_fs_context *s_ctx = NULL;
struct fs_context *fc = NULL;
int ret = -ENOMEM;
@@ -2492,15 +2501,11 @@ static int parse_apply_sb_mount_options(struct super_block *sb,
if (!sbi->s_es->s_mount_opts[0])
return 0;
- s_mount_opts = kstrndup(sbi->s_es->s_mount_opts,
- sizeof(sbi->s_es->s_mount_opts),
- GFP_KERNEL);
- if (!s_mount_opts)
- return ret;
+ strscpy_pad(s_mount_opts, sbi->s_es->s_mount_opts);
fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
if (!fc)
- goto out_free;
+ return -ENOMEM;
s_ctx = kzalloc(sizeof(struct ext4_fs_context), GFP_KERNEL);
if (!s_ctx)
@@ -2532,11 +2537,8 @@ parse_failed:
ret = 0;
out_free:
- if (fc) {
- ext4_fc_free(fc);
- kfree(fc);
- }
- kfree(s_mount_opts);
+ ext4_fc_free(fc);
+ kfree(fc);
return ret;
}
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 6ff94cdf1515..5ddfa4801bb3 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -251,6 +251,10 @@ check_xattrs(struct inode *inode, struct buffer_head *bh,
err_str = "invalid ea_ino";
goto errout;
}
+ if (ea_ino && !size) {
+ err_str = "invalid size in ea xattr";
+ goto errout;
+ }
if (size > EXT4_XATTR_SIZE_MAX) {
err_str = "e_value size too large";
goto errout;
@@ -1036,7 +1040,7 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
int ref_change)
{
struct ext4_iloc iloc;
- s64 ref_count;
+ u64 ref_count;
int ret;
inode_lock_nested(ea_inode, I_MUTEX_XATTR);
@@ -1046,13 +1050,17 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
goto out;
ref_count = ext4_xattr_inode_get_ref(ea_inode);
+ if ((ref_count == 0 && ref_change < 0) || (ref_count == U64_MAX && ref_change > 0)) {
+ ext4_error_inode(ea_inode, __func__, __LINE__, 0,
+ "EA inode %lu ref wraparound: ref_count=%lld ref_change=%d",
+ ea_inode->i_ino, ref_count, ref_change);
+ ret = -EFSCORRUPTED;
+ goto out;
+ }
ref_count += ref_change;
ext4_xattr_inode_set_ref(ea_inode, ref_count);
if (ref_change > 0) {
- WARN_ONCE(ref_count <= 0, "EA inode %lu ref_count=%lld",
- ea_inode->i_ino, ref_count);
-
if (ref_count == 1) {
WARN_ONCE(ea_inode->i_nlink, "EA inode %lu i_nlink=%u",
ea_inode->i_ino, ea_inode->i_nlink);
@@ -1061,9 +1069,6 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
ext4_orphan_del(handle, ea_inode);
}
} else {
- WARN_ONCE(ref_count < 0, "EA inode %lu ref_count=%lld",
- ea_inode->i_ino, ref_count);
-
if (ref_count == 0) {
WARN_ONCE(ea_inode->i_nlink != 1,
"EA inode %lu i_nlink=%u",
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index efc30626760a..040c06dfb8c0 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1781,12 +1781,13 @@ sync_out:
if (map->m_flags & F2FS_MAP_MAPPED) {
unsigned int ofs = start_pgofs - map->m_lblk;
- f2fs_update_read_extent_cache_range(&dn,
- start_pgofs, map->m_pblk + ofs,
- map->m_len - ofs);
+ if (map->m_len > ofs)
+ f2fs_update_read_extent_cache_range(&dn,
+ start_pgofs, map->m_pblk + ofs,
+ map->m_len - ofs);
}
if (map->m_next_extent)
- *map->m_next_extent = pgofs + 1;
+ *map->m_next_extent = is_hole ? pgofs + 1 : pgofs;
}
f2fs_put_dnode(&dn);
unlock_out:
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 2dec22f2ea63..0d3ef487f72a 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2331,8 +2331,6 @@ static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi,
{
if (!inode)
return true;
- if (!test_opt(sbi, RESERVE_ROOT))
- return false;
if (IS_NOQUOTA(inode))
return true;
if (uid_eq(F2FS_OPTION(sbi).s_resuid, current_fsuid()))
@@ -2353,7 +2351,7 @@ static inline unsigned int get_available_block_count(struct f2fs_sb_info *sbi,
avail_user_block_count = sbi->user_block_count -
sbi->current_reserved_blocks;
- if (!__allow_reserved_blocks(sbi, inode, cap))
+ if (test_opt(sbi, RESERVE_ROOT) && !__allow_reserved_blocks(sbi, inode, cap))
avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index fa77841f3e2c..2a108c561e8b 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -35,15 +35,23 @@
#include <trace/events/f2fs.h>
#include <uapi/linux/f2fs.h>
-static void f2fs_zero_post_eof_page(struct inode *inode, loff_t new_size)
+static void f2fs_zero_post_eof_page(struct inode *inode,
+ loff_t new_size, bool lock)
{
loff_t old_size = i_size_read(inode);
if (old_size >= new_size)
return;
+ if (mapping_empty(inode->i_mapping))
+ return;
+
+ if (lock)
+ filemap_invalidate_lock(inode->i_mapping);
/* zero or drop pages only in range of [old_size, new_size] */
- truncate_pagecache(inode, old_size);
+ truncate_inode_pages_range(inode->i_mapping, old_size, new_size);
+ if (lock)
+ filemap_invalidate_unlock(inode->i_mapping);
}
static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf)
@@ -114,9 +122,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
- filemap_invalidate_lock(inode->i_mapping);
- f2fs_zero_post_eof_page(inode, (folio->index + 1) << PAGE_SHIFT);
- filemap_invalidate_unlock(inode->i_mapping);
+ f2fs_zero_post_eof_page(inode, (folio->index + 1) << PAGE_SHIFT, true);
file_update_time(vmf->vma->vm_file);
filemap_invalidate_lock_shared(inode->i_mapping);
@@ -856,8 +862,16 @@ int f2fs_truncate(struct inode *inode)
/* we should check inline_data size */
if (!f2fs_may_inline_data(inode)) {
err = f2fs_convert_inline_inode(inode);
- if (err)
+ if (err) {
+ /*
+ * Always truncate page #0 to avoid page cache
+ * leak in evict() path.
+ */
+ truncate_inode_pages_range(inode->i_mapping,
+ F2FS_BLK_TO_BYTES(0),
+ F2FS_BLK_END_BYTES(0));
return err;
+ }
}
err = f2fs_truncate_blocks(inode, i_size_read(inode), true);
@@ -1081,7 +1095,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
filemap_invalidate_lock(inode->i_mapping);
if (attr->ia_size > old_size)
- f2fs_zero_post_eof_page(inode, attr->ia_size);
+ f2fs_zero_post_eof_page(inode, attr->ia_size, false);
truncate_setsize(inode, attr->ia_size);
if (attr->ia_size <= old_size)
@@ -1200,9 +1214,7 @@ static int f2fs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
if (ret)
return ret;
- filemap_invalidate_lock(inode->i_mapping);
- f2fs_zero_post_eof_page(inode, offset + len);
- filemap_invalidate_unlock(inode->i_mapping);
+ f2fs_zero_post_eof_page(inode, offset + len, true);
pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
@@ -1487,7 +1499,7 @@ static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
- f2fs_zero_post_eof_page(inode, offset + len);
+ f2fs_zero_post_eof_page(inode, offset + len, false);
f2fs_lock_op(sbi);
f2fs_drop_extent_tree(inode);
@@ -1610,9 +1622,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
if (ret)
return ret;
- filemap_invalidate_lock(mapping);
- f2fs_zero_post_eof_page(inode, offset + len);
- filemap_invalidate_unlock(mapping);
+ f2fs_zero_post_eof_page(inode, offset + len, true);
pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
@@ -1746,7 +1756,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(mapping);
- f2fs_zero_post_eof_page(inode, offset + len);
+ f2fs_zero_post_eof_page(inode, offset + len, false);
truncate_pagecache(inode, offset);
while (!ret && idx > pg_start) {
@@ -1804,9 +1814,7 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset,
if (err)
return err;
- filemap_invalidate_lock(inode->i_mapping);
- f2fs_zero_post_eof_page(inode, offset + len);
- filemap_invalidate_unlock(inode->i_mapping);
+ f2fs_zero_post_eof_page(inode, offset + len, true);
f2fs_balance_fs(sbi, true);
@@ -4751,9 +4759,8 @@ static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from)
if (err)
return err;
- filemap_invalidate_lock(inode->i_mapping);
- f2fs_zero_post_eof_page(inode, iocb->ki_pos + iov_iter_count(from));
- filemap_invalidate_unlock(inode->i_mapping);
+ f2fs_zero_post_eof_page(inode,
+ iocb->ki_pos + iov_iter_count(from), true);
return count;
}
diff --git a/fs/file.c b/fs/file.c
index bfc9eb9e7229..68c1bcc6b7e9 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -1262,7 +1262,10 @@ int replace_fd(unsigned fd, struct file *file, unsigned flags)
err = expand_files(files, fd);
if (unlikely(err < 0))
goto out_unlock;
- return do_dup2(files, file, fd, flags);
+ err = do_dup2(files, file, fd, flags);
+ if (err < 0)
+ return err;
+ return 0;
out_unlock:
spin_unlock(&files->file_lock);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 4ae226778d64..28edfad85c62 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -446,22 +446,23 @@ static bool inode_do_switch_wbs(struct inode *inode,
* Transfer to @new_wb's IO list if necessary. If the @inode is dirty,
* the specific list @inode was on is ignored and the @inode is put on
* ->b_dirty which is always correct including from ->b_dirty_time.
- * The transfer preserves @inode->dirtied_when ordering. If the @inode
- * was clean, it means it was on the b_attached list, so move it onto
- * the b_attached list of @new_wb.
+ * If the @inode was clean, it means it was on the b_attached list, so
+ * move it onto the b_attached list of @new_wb.
*/
if (!list_empty(&inode->i_io_list)) {
inode->i_wb = new_wb;
if (inode->i_state & I_DIRTY_ALL) {
- struct inode *pos;
-
- list_for_each_entry(pos, &new_wb->b_dirty, i_io_list)
- if (time_after_eq(inode->dirtied_when,
- pos->dirtied_when))
- break;
+ /*
+ * We need to keep b_dirty list sorted by
+ * dirtied_time_when. However properly sorting the
+ * inode in the list gets too expensive when switching
+ * many inodes. So just attach inode at the end of the
+ * dirty list and clobber the dirtied_time_when.
+ */
+ inode->dirtied_time_when = jiffies;
inode_io_list_move_locked(inode, new_wb,
- pos->i_io_list.prev);
+ &new_wb->b_dirty);
} else {
inode_cgwb_move_to_attached(inode, new_wb);
}
@@ -503,6 +504,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
*/
down_read(&bdi->wb_switch_rwsem);
+ inodep = isw->inodes;
/*
* By the time control reaches here, RCU grace period has passed
* since I_WB_SWITCH assertion and all wb stat update transactions
@@ -513,6 +515,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
* gives us exclusion against all wb related operations on @inode
* including IO list manipulations and stat updates.
*/
+relock:
if (old_wb < new_wb) {
spin_lock(&old_wb->list_lock);
spin_lock_nested(&new_wb->list_lock, SINGLE_DEPTH_NESTING);
@@ -521,10 +524,17 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING);
}
- for (inodep = isw->inodes; *inodep; inodep++) {
+ while (*inodep) {
WARN_ON_ONCE((*inodep)->i_wb != old_wb);
if (inode_do_switch_wbs(*inodep, old_wb, new_wb))
nr_switched++;
+ inodep++;
+ if (*inodep && need_resched()) {
+ spin_unlock(&new_wb->list_lock);
+ spin_unlock(&old_wb->list_lock);
+ cond_resched();
+ goto relock;
+ }
}
spin_unlock(&new_wb->list_lock);
diff --git a/fs/fsopen.c b/fs/fsopen.c
index 6cef3deccded..8903d2c23db4 100644
--- a/fs/fsopen.c
+++ b/fs/fsopen.c
@@ -18,50 +18,56 @@
#include "internal.h"
#include "mount.h"
+static inline const char *fetch_message_locked(struct fc_log *log, size_t len,
+ bool *need_free)
+{
+ const char *p;
+ int index;
+
+ if (unlikely(log->head == log->tail))
+ return ERR_PTR(-ENODATA);
+
+ index = log->tail & (ARRAY_SIZE(log->buffer) - 1);
+ p = log->buffer[index];
+ if (unlikely(strlen(p) > len))
+ return ERR_PTR(-EMSGSIZE);
+
+ log->buffer[index] = NULL;
+ *need_free = log->need_free & (1 << index);
+ log->need_free &= ~(1 << index);
+ log->tail++;
+
+ return p;
+}
+
/*
* Allow the user to read back any error, warning or informational messages.
+ * Only one message is returned for each read(2) call.
*/
static ssize_t fscontext_read(struct file *file,
char __user *_buf, size_t len, loff_t *pos)
{
struct fs_context *fc = file->private_data;
- struct fc_log *log = fc->log.log;
- unsigned int logsize = ARRAY_SIZE(log->buffer);
- ssize_t ret;
- char *p;
+ ssize_t err;
+ const char *p __free(kfree) = NULL, *message;
bool need_free;
- int index, n;
+ int n;
- ret = mutex_lock_interruptible(&fc->uapi_mutex);
- if (ret < 0)
- return ret;
-
- if (log->head == log->tail) {
- mutex_unlock(&fc->uapi_mutex);
- return -ENODATA;
- }
-
- index = log->tail & (logsize - 1);
- p = log->buffer[index];
- need_free = log->need_free & (1 << index);
- log->buffer[index] = NULL;
- log->need_free &= ~(1 << index);
- log->tail++;
+ err = mutex_lock_interruptible(&fc->uapi_mutex);
+ if (err < 0)
+ return err;
+ message = fetch_message_locked(fc->log.log, len, &need_free);
mutex_unlock(&fc->uapi_mutex);
+ if (IS_ERR(message))
+ return PTR_ERR(message);
- ret = -EMSGSIZE;
- n = strlen(p);
- if (n > len)
- goto err_free;
- ret = -EFAULT;
- if (copy_to_user(_buf, p, n) != 0)
- goto err_free;
- ret = n;
-
-err_free:
if (need_free)
- kfree(p);
- return ret;
+ p = message;
+
+ n = strlen(message);
+ if (copy_to_user(_buf, message, n))
+ return -EFAULT;
+ return n;
}
static int fscontext_release(struct inode *inode, struct file *file)
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 1f64ae6d7a69..8207855f9af2 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1989,7 +1989,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
*/
if (!oh.unique) {
err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs);
- goto out;
+ goto copy_finish;
}
err = -EINVAL;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 49659d1b2932..a8218a3bc0b4 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -355,8 +355,14 @@ void fuse_file_release(struct inode *inode, struct fuse_file *ff,
* Make the release synchronous if this is a fuseblk mount,
* synchronous RELEASE is allowed (and desirable) in this case
* because the server can be trusted not to screw up.
+ *
+ * Always use the asynchronous file put because the current thread
+ * might be the fuse server. This can happen if a process starts some
+ * aio and closes the fd before the aio completes. Since aio takes its
+ * own ref to the file, the IO completion has to drop the ref, which is
+ * how the fuse server can end up closing its clients' files.
*/
- fuse_file_put(ff, ff->fm->fc->destroy);
+ fuse_file_put(ff, false);
}
void fuse_release_common(struct file *file, bool isdir)
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 161fc76ed5b0..e5558e63e2cb 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -801,8 +801,6 @@ skip_inval:
clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD);
return;
- } else {
- clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
}
}
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index f007e389d5d2..fc01f9dc8c39 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -491,8 +491,14 @@ void minix_set_inode(struct inode *inode, dev_t rdev)
inode->i_op = &minix_symlink_inode_operations;
inode_nohighmem(inode);
inode->i_mapping->a_ops = &minix_aops;
- } else
+ } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
+ S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
init_special_inode(inode, inode->i_mode, rdev);
+ } else {
+ printk(KERN_DEBUG "MINIX-fs: Invalid file type 0%04o for inode %lu.\n",
+ inode->i_mode, inode->i_ino);
+ make_bad_inode(inode);
+ }
}
/*
diff --git a/fs/namei.c b/fs/namei.c
index 6795600c5738..1eb20cb5e58f 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1388,6 +1388,10 @@ static int follow_automount(struct path *path, int *count, unsigned lookup_flags
dentry->d_inode)
return -EISDIR;
+ /* No need to trigger automounts if mountpoint crossing is disabled. */
+ if (lookup_flags & LOOKUP_NO_XDEV)
+ return -EXDEV;
+
if (count && (*count)++ >= MAXSYMLINKS)
return -ELOOP;
@@ -1411,6 +1415,10 @@ static int __traverse_mounts(struct path *path, unsigned flags, bool *jumped,
/* Allow the filesystem to manage the transit without i_mutex
* being held. */
if (flags & DCACHE_MANAGE_TRANSIT) {
+ if (lookup_flags & LOOKUP_NO_XDEV) {
+ ret = -EXDEV;
+ break;
+ }
ret = path->dentry->d_op->d_manage(path, false);
flags = smp_load_acquire(&path->dentry->d_flags);
if (ret < 0)
diff --git a/fs/namespace.c b/fs/namespace.c
index c8519302f582..cc4926d53e7d 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -65,6 +65,15 @@ static int __init set_mphash_entries(char *str)
}
__setup("mphash_entries=", set_mphash_entries);
+static char * __initdata initramfs_options;
+static int __init initramfs_options_setup(char *str)
+{
+ initramfs_options = str;
+ return 1;
+}
+
+__setup("initramfs_options=", initramfs_options_setup);
+
static u64 event;
static DEFINE_IDA(mnt_id_ida);
static DEFINE_IDA(mnt_group_ida);
@@ -144,7 +153,7 @@ static void mnt_ns_release(struct mnt_namespace *ns)
lockdep_assert_not_held(&mnt_ns_tree_lock);
/* keep alive for {list,stat}mount() */
- if (refcount_dec_and_test(&ns->passive)) {
+ if (ns && refcount_dec_and_test(&ns->passive)) {
put_user_ns(ns->user_ns);
kfree(ns);
}
@@ -5200,7 +5209,6 @@ static int grab_requested_root(struct mnt_namespace *ns, struct path *root)
static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
struct mnt_namespace *ns)
{
- struct path root __free(path_put) = {};
struct mount *m;
int err;
@@ -5212,7 +5220,7 @@ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
if (!s->mnt)
return -ENOENT;
- err = grab_requested_root(ns, &root);
+ err = grab_requested_root(ns, &s->root);
if (err)
return err;
@@ -5221,15 +5229,13 @@ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
* mounts to show users.
*/
m = real_mount(s->mnt);
- if (!is_path_reachable(m, m->mnt.mnt_root, &root) &&
+ if (!is_path_reachable(m, m->mnt.mnt_root, &s->root) &&
!ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
return -EPERM;
err = security_sb_statfs(s->mnt->mnt_root);
if (err)
return err;
-
- s->root = root;
if (s->mask & STATMOUNT_SB_BASIC)
statmount_sb_basic(s);
@@ -5406,28 +5412,40 @@ retry:
if (!ret)
ret = copy_statmount_to_user(ks);
kvfree(ks->seq.buf);
+ path_put(&ks->root);
if (retry_statmount(ret, &seq_size))
goto retry;
return ret;
}
-static ssize_t do_listmount(struct mnt_namespace *ns, u64 mnt_parent_id,
- u64 last_mnt_id, u64 *mnt_ids, size_t nr_mnt_ids,
- bool reverse)
+struct klistmount {
+ u64 last_mnt_id;
+ u64 mnt_parent_id;
+ u64 *kmnt_ids;
+ u32 nr_mnt_ids;
+ struct mnt_namespace *ns;
+ struct path root;
+};
+
+static ssize_t do_listmount(struct klistmount *kls, bool reverse)
{
- struct path root __free(path_put) = {};
+ struct mnt_namespace *ns = kls->ns;
+ u64 mnt_parent_id = kls->mnt_parent_id;
+ u64 last_mnt_id = kls->last_mnt_id;
+ u64 *mnt_ids = kls->kmnt_ids;
+ size_t nr_mnt_ids = kls->nr_mnt_ids;
struct path orig;
struct mount *r, *first;
ssize_t ret;
rwsem_assert_held(&namespace_sem);
- ret = grab_requested_root(ns, &root);
+ ret = grab_requested_root(ns, &kls->root);
if (ret)
return ret;
if (mnt_parent_id == LSMT_ROOT) {
- orig = root;
+ orig = kls->root;
} else {
orig.mnt = lookup_mnt_in_ns(mnt_parent_id, ns);
if (!orig.mnt)
@@ -5439,7 +5457,7 @@ static ssize_t do_listmount(struct mnt_namespace *ns, u64 mnt_parent_id,
* Don't trigger audit denials. We just want to determine what
* mounts to show users.
*/
- if (!is_path_reachable(real_mount(orig.mnt), orig.dentry, &root) &&
+ if (!is_path_reachable(real_mount(orig.mnt), orig.dentry, &kls->root) &&
!ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
return -EPERM;
@@ -5472,14 +5490,45 @@ static ssize_t do_listmount(struct mnt_namespace *ns, u64 mnt_parent_id,
return ret;
}
+static void __free_klistmount_free(const struct klistmount *kls)
+{
+ path_put(&kls->root);
+ kvfree(kls->kmnt_ids);
+ mnt_ns_release(kls->ns);
+}
+
+static inline int prepare_klistmount(struct klistmount *kls, struct mnt_id_req *kreq,
+ size_t nr_mnt_ids)
+{
+
+ u64 last_mnt_id = kreq->param;
+
+ /* The first valid unique mount id is MNT_UNIQUE_ID_OFFSET + 1. */
+ if (last_mnt_id != 0 && last_mnt_id <= MNT_UNIQUE_ID_OFFSET)
+ return -EINVAL;
+
+ kls->last_mnt_id = last_mnt_id;
+
+ kls->nr_mnt_ids = nr_mnt_ids;
+ kls->kmnt_ids = kvmalloc_array(nr_mnt_ids, sizeof(*kls->kmnt_ids),
+ GFP_KERNEL_ACCOUNT);
+ if (!kls->kmnt_ids)
+ return -ENOMEM;
+
+ kls->ns = grab_requested_mnt_ns(kreq);
+ if (!kls->ns)
+ return -ENOENT;
+
+ kls->mnt_parent_id = kreq->mnt_id;
+ return 0;
+}
+
SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req,
u64 __user *, mnt_ids, size_t, nr_mnt_ids, unsigned int, flags)
{
- u64 *kmnt_ids __free(kvfree) = NULL;
+ struct klistmount kls __free(klistmount_free) = {};
const size_t maxcount = 1000000;
- struct mnt_namespace *ns __free(mnt_ns_release) = NULL;
struct mnt_id_req kreq;
- u64 last_mnt_id;
ssize_t ret;
if (flags & ~LISTMOUNT_REVERSE)
@@ -5500,31 +5549,20 @@ SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req,
if (ret)
return ret;
- last_mnt_id = kreq.param;
- /* The first valid unique mount id is MNT_UNIQUE_ID_OFFSET + 1. */
- if (last_mnt_id != 0 && last_mnt_id <= MNT_UNIQUE_ID_OFFSET)
- return -EINVAL;
-
- kmnt_ids = kvmalloc_array(nr_mnt_ids, sizeof(*kmnt_ids),
- GFP_KERNEL_ACCOUNT);
- if (!kmnt_ids)
- return -ENOMEM;
-
- ns = grab_requested_mnt_ns(&kreq);
- if (!ns)
- return -ENOENT;
+ ret = prepare_klistmount(&kls, &kreq, nr_mnt_ids);
+ if (ret)
+ return ret;
- if (kreq.mnt_ns_id && (ns != current->nsproxy->mnt_ns) &&
- !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
+ if (kreq.mnt_ns_id && (kls.ns != current->nsproxy->mnt_ns) &&
+ !ns_capable_noaudit(kls.ns->user_ns, CAP_SYS_ADMIN))
return -ENOENT;
scoped_guard(rwsem_read, &namespace_sem)
- ret = do_listmount(ns, kreq.mnt_id, last_mnt_id, kmnt_ids,
- nr_mnt_ids, (flags & LISTMOUNT_REVERSE));
+ ret = do_listmount(&kls, (flags & LISTMOUNT_REVERSE));
if (ret <= 0)
return ret;
- if (copy_to_user(mnt_ids, kmnt_ids, ret * sizeof(*mnt_ids)))
+ if (copy_to_user(mnt_ids, kls.kmnt_ids, ret * sizeof(*mnt_ids)))
return -EFAULT;
return ret;
@@ -5537,7 +5575,7 @@ static void __init init_mount_tree(void)
struct mnt_namespace *ns;
struct path root;
- mnt = vfs_kern_mount(&rootfs_fs_type, 0, "rootfs", NULL);
+ mnt = vfs_kern_mount(&rootfs_fs_type, 0, "rootfs", initramfs_options);
if (IS_ERR(mnt))
panic("Can't create rootfs");
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 6cf92498a5ac..86bdc7d23fb9 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -211,10 +211,6 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion)
return ERR_PTR(-ENOMEM);
}
cb_info->serv = serv;
- /* As there is only one thread we need to over-ride the
- * default maximum of 80 connections
- */
- serv->sv_maxconn = 1024;
dprintk("nfs_callback_create_svc: service created\n");
return serv;
}
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index fdeb0b34a3d3..4254ba3ee7c5 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -984,6 +984,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp)
nfs_put_client(cps.clp);
goto out_invalidcred;
}
+ svc_xprt_set_valid(rqstp->rq_xprt);
}
cps.minorversion = hdr_arg.minorversion;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index ea92483d5e71..c21e63027fc0 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -9393,7 +9393,7 @@ static int nfs4_verify_back_channel_attrs(struct nfs41_create_session_args *args
goto out;
if (rcvd->max_rqst_sz > sent->max_rqst_sz)
return -EINVAL;
- if (rcvd->max_resp_sz < sent->max_resp_sz)
+ if (rcvd->max_resp_sz > sent->max_resp_sz)
return -EINVAL;
if (rcvd->max_resp_sz_cached > sent->max_resp_sz_cached)
return -EINVAL;
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 49aede376d86..3216416ca042 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1075,47 +1075,62 @@ static struct svc_export *exp_find(struct cache_detail *cd,
}
/**
- * check_nfsd_access - check if access to export is allowed.
+ * check_xprtsec_policy - check if access to export is allowed by the
+ * xprtsec policy
* @exp: svc_export that is being accessed.
- * @rqstp: svc_rqst attempting to access @exp (will be NULL for LOCALIO).
+ * @rqstp: svc_rqst attempting to access @exp.
+ *
+ * Helper function for check_nfsd_access(). Note that callers should be
+ * using check_nfsd_access() instead of calling this function directly. The
+ * one exception is __fh_verify() since it has logic that may result in one
+ * or both of the helpers being skipped.
*
* Return values:
* %nfs_ok if access is granted, or
* %nfserr_wrongsec if access is denied
*/
-__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp)
+__be32 check_xprtsec_policy(struct svc_export *exp, struct svc_rqst *rqstp)
{
- struct exp_flavor_info *f, *end = exp->ex_flavors + exp->ex_nflavors;
- struct svc_xprt *xprt;
-
- /*
- * If rqstp is NULL, this is a LOCALIO request which will only
- * ever use a filehandle/credential pair for which access has
- * been affirmed (by ACCESS or OPEN NFS requests) over the
- * wire. So there is no need for further checks here.
- */
- if (!rqstp)
- return nfs_ok;
-
- xprt = rqstp->rq_xprt;
+ struct svc_xprt *xprt = rqstp->rq_xprt;
if (exp->ex_xprtsec_modes & NFSEXP_XPRTSEC_NONE) {
if (!test_bit(XPT_TLS_SESSION, &xprt->xpt_flags))
- goto ok;
+ return nfs_ok;
}
if (exp->ex_xprtsec_modes & NFSEXP_XPRTSEC_TLS) {
if (test_bit(XPT_TLS_SESSION, &xprt->xpt_flags) &&
!test_bit(XPT_PEER_AUTH, &xprt->xpt_flags))
- goto ok;
+ return nfs_ok;
}
if (exp->ex_xprtsec_modes & NFSEXP_XPRTSEC_MTLS) {
if (test_bit(XPT_TLS_SESSION, &xprt->xpt_flags) &&
test_bit(XPT_PEER_AUTH, &xprt->xpt_flags))
- goto ok;
+ return nfs_ok;
}
- goto denied;
+ return nfserr_wrongsec;
+}
+
+/**
+ * check_security_flavor - check if access to export is allowed by the
+ * security flavor
+ * @exp: svc_export that is being accessed.
+ * @rqstp: svc_rqst attempting to access @exp.
+ * @may_bypass_gss: reduce strictness of authorization check
+ *
+ * Helper function for check_nfsd_access(). Note that callers should be
+ * using check_nfsd_access() instead of calling this function directly. The
+ * one exception is __fh_verify() since it has logic that may result in one
+ * or both of the helpers being skipped.
+ *
+ * Return values:
+ * %nfs_ok if access is granted, or
+ * %nfserr_wrongsec if access is denied
+ */
+__be32 check_security_flavor(struct svc_export *exp, struct svc_rqst *rqstp,
+ bool may_bypass_gss)
+{
+ struct exp_flavor_info *f, *end = exp->ex_flavors + exp->ex_nflavors;
-ok:
/* legacy gss-only clients are always OK: */
if (exp->ex_client == rqstp->rq_gssclient)
return nfs_ok;
@@ -1140,10 +1155,47 @@ ok:
if (nfsd4_spo_must_allow(rqstp))
return nfs_ok;
-denied:
+ /* Some calls may be processed without authentication
+ * on GSS exports. For example NFS2/3 calls on root
+ * directory, see section 2.3.2 of rfc 2623.
+ * For "may_bypass_gss" check that export has really
+ * enabled some flavor with authentication (GSS or any
+ * other) and also check that the used auth flavor is
+ * without authentication (none or sys).
+ */
+ if (may_bypass_gss && (
+ rqstp->rq_cred.cr_flavor == RPC_AUTH_NULL ||
+ rqstp->rq_cred.cr_flavor == RPC_AUTH_UNIX)) {
+ for (f = exp->ex_flavors; f < end; f++) {
+ if (f->pseudoflavor >= RPC_AUTH_DES)
+ return 0;
+ }
+ }
+
return nfserr_wrongsec;
}
+/**
+ * check_nfsd_access - check if access to export is allowed.
+ * @exp: svc_export that is being accessed.
+ * @rqstp: svc_rqst attempting to access @exp.
+ * @may_bypass_gss: reduce strictness of authorization check
+ *
+ * Return values:
+ * %nfs_ok if access is granted, or
+ * %nfserr_wrongsec if access is denied
+ */
+__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp,
+ bool may_bypass_gss)
+{
+ __be32 status;
+
+ status = check_xprtsec_policy(exp, rqstp);
+ if (status != nfs_ok)
+ return status;
+ return check_security_flavor(exp, rqstp, may_bypass_gss);
+}
+
/*
* Uses rq_client and rq_gssclient to find an export; uses rq_client (an
* auth_unix client) if it's available and has secinfo information;
diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h
index 3794ae253a70..0f4d34943e2c 100644
--- a/fs/nfsd/export.h
+++ b/fs/nfsd/export.h
@@ -101,7 +101,11 @@ struct svc_expkey {
struct svc_cred;
int nfsexp_flags(struct svc_cred *cred, struct svc_export *exp);
-__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp);
+__be32 check_xprtsec_policy(struct svc_export *exp, struct svc_rqst *rqstp);
+__be32 check_security_flavor(struct svc_export *exp, struct svc_rqst *rqstp,
+ bool may_bypass_gss);
+__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp,
+ bool may_bypass_gss);
/*
* Function declarations
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index 46a7f9b813e5..6b042218668b 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -38,16 +38,40 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp,
memcpy(&fh.fh_handle.fh_raw, f->data, f->size);
fh.fh_export = NULL;
+ /*
+ * Allow BYPASS_GSS as some client implementations use AUTH_SYS
+ * for NLM even when GSS is used for NFS.
+ * Allow OWNER_OVERRIDE as permission might have been changed
+ * after the file was opened.
+ * Pass MAY_NLM so that authentication can be completely bypassed
+ * if NFSEXP_NOAUTHNLM is set. Some older clients use AUTH_NULL
+ * for NLM requests.
+ */
access = (mode == O_WRONLY) ? NFSD_MAY_WRITE : NFSD_MAY_READ;
- access |= NFSD_MAY_LOCK;
+ access |= NFSD_MAY_NLM | NFSD_MAY_OWNER_OVERRIDE | NFSD_MAY_BYPASS_GSS;
nfserr = nfsd_open(rqstp, &fh, S_IFREG, access, filp);
fh_put(&fh);
- /* We return nlm error codes as nlm doesn't know
+ /* We return nlm error codes as nlm doesn't know
* about nfsd, but nfsd does know about nlm..
*/
switch (nfserr) {
case nfs_ok:
return 0;
+ case nfserr_jukebox:
+ /* this error can indicate a presence of a conflicting
+ * delegation to an NLM lock request. Options are:
+ * (1) For now, drop this request and make the client
+ * retry. When delegation is returned, client's lock retry
+ * will complete.
+ * (2) NLM4_DENIED as per "spec" signals to the client
+ * that the lock is unavailable now but client can retry.
+ * Linux client implementation does not. It treats
+ * NLM4_DENIED same as NLM4_FAILED and errors the request.
+ * (3) For the future, treat this as blocked lock and try
+ * to callback when the delegation is returned but might
+ * not have a proper lock request to block on.
+ */
+ fallthrough;
case nfserr_dropit:
return nlm_drop_reply;
case nfserr_stale:
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 26f7b34d1a03..a05a45bb1978 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -129,8 +129,8 @@ struct nfsd_net {
unsigned char writeverf[8];
/*
- * Max number of connections this nfsd container will allow. Defaults
- * to '0' which is means that it bases this on the number of threads.
+ * Max number of non-validated connections this nfsd container
+ * will allow. Defaults to '0' gets mapped to 64.
*/
unsigned int max_connections;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 02c9f3b312a0..8f2dc7eb4fc4 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1372,7 +1372,7 @@ try_again:
return 0;
}
if (work) {
- strscpy(work->nsui_ipaddr, ipaddr, sizeof(work->nsui_ipaddr) - 1);
+ strscpy(work->nsui_ipaddr, ipaddr, sizeof(work->nsui_ipaddr));
refcount_set(&work->nsui_refcnt, 2);
work->nsui_busy = true;
list_add_tail(&work->nsui_list, &nn->nfsd_ssc_mount_list);
@@ -2799,7 +2799,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
if (current_fh->fh_export &&
need_wrongsec_check(rqstp))
- op->status = check_nfsd_access(current_fh->fh_export, rqstp);
+ op->status = check_nfsd_access(current_fh->fh_export, rqstp, false);
}
encode_op:
if (op->status == nfserr_replay_me) {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index bcb44400e243..7b0fabf8c657 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -7998,11 +7998,9 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (check_lock_length(lock->lk_offset, lock->lk_length))
return nfserr_inval;
- if ((status = fh_verify(rqstp, &cstate->current_fh,
- S_IFREG, NFSD_MAY_LOCK))) {
- dprintk("NFSD: nfsd4_lock: permission denied!\n");
+ status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0);
+ if (status != nfs_ok)
return status;
- }
sb = cstate->current_fh.fh_dentry->d_sb;
if (lock->lk_is_new) {
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 6edeb3bdf81b..90db900b346c 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3784,7 +3784,7 @@ nfsd4_encode_entry4_fattr(struct nfsd4_readdir *cd, const char *name,
nfserr = nfserrno(err);
goto out_put;
}
- nfserr = check_nfsd_access(exp, cd->rd_rqstp);
+ nfserr = check_nfsd_access(exp, cd->rd_rqstp, false);
if (nfserr)
goto out_put;
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 96e19c50a5d7..854dcdc36b2c 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -320,6 +320,7 @@ __fh_verify(struct svc_rqst *rqstp,
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct svc_export *exp = NULL;
+ bool may_bypass_gss = false;
struct dentry *dentry;
__be32 error;
@@ -363,12 +364,31 @@ __fh_verify(struct svc_rqst *rqstp,
goto out;
/*
- * pseudoflavor restrictions are not enforced on NLM,
- * which clients virtually always use auth_sys for,
- * even while using RPCSEC_GSS for NFS.
+ * If rqstp is NULL, this is a LOCALIO request which will only
+ * ever use a filehandle/credential pair for which access has
+ * been affirmed (by ACCESS or OPEN NFS requests) over the
+ * wire. Skip both the xprtsec policy and the security flavor
+ * checks.
*/
- if (access & NFSD_MAY_LOCK || access & NFSD_MAY_BYPASS_GSS)
- goto skip_pseudoflavor_check;
+ if (!rqstp)
+ goto check_permissions;
+
+ if ((access & NFSD_MAY_NLM) && (exp->ex_flags & NFSEXP_NOAUTHNLM))
+ /* NLM is allowed to fully bypass authentication */
+ goto out;
+
+ /*
+ * NLM is allowed to bypass the xprtsec policy check because lockd
+ * doesn't support xprtsec.
+ */
+ if (!(access & NFSD_MAY_NLM)) {
+ error = check_xprtsec_policy(exp, rqstp);
+ if (error)
+ goto out;
+ }
+
+ if (access & NFSD_MAY_BYPASS_GSS)
+ may_bypass_gss = true;
/*
* Clients may expect to be able to use auth_sys during mount,
* even if they use gss for everything else; see section 2.3.2
@@ -376,13 +396,17 @@ __fh_verify(struct svc_rqst *rqstp,
*/
if (access & NFSD_MAY_BYPASS_GSS_ON_ROOT
&& exp->ex_path.dentry == dentry)
- goto skip_pseudoflavor_check;
+ may_bypass_gss = true;
- error = check_nfsd_access(exp, rqstp);
+ error = check_security_flavor(exp, rqstp, may_bypass_gss);
if (error)
goto out;
-skip_pseudoflavor_check:
+ /* During LOCALIO call to fh_verify will be called with a NULL rqstp */
+ if (rqstp)
+ svc_xprt_set_valid(rqstp->rq_xprt);
+
+check_permissions:
/* Finally, check access permissions. */
error = nfsd_permission(cred, exp, dentry, access);
out:
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index b8470d4cbe99..3448e444d410 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -79,7 +79,7 @@ DEFINE_NFSD_XDR_ERR_EVENT(cant_encode);
{ NFSD_MAY_READ, "READ" }, \
{ NFSD_MAY_SATTR, "SATTR" }, \
{ NFSD_MAY_TRUNC, "TRUNC" }, \
- { NFSD_MAY_LOCK, "LOCK" }, \
+ { NFSD_MAY_NLM, "NLM" }, \
{ NFSD_MAY_OWNER_OVERRIDE, "OWNER_OVERRIDE" }, \
{ NFSD_MAY_LOCAL_ACCESS, "LOCAL_ACCESS" }, \
{ NFSD_MAY_BYPASS_GSS_ON_ROOT, "BYPASS_GSS_ON_ROOT" }, \
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ca29a5e1600f..8c4f4e2f9cee 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -321,7 +321,7 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
err = nfsd_lookup_dentry(rqstp, fhp, name, len, &exp, &dentry);
if (err)
return err;
- err = check_nfsd_access(exp, rqstp);
+ err = check_nfsd_access(exp, rqstp, false);
if (err)
goto out;
/*
@@ -2519,7 +2519,7 @@ nfsd_permission(struct svc_cred *cred, struct svc_export *exp,
(acc & NFSD_MAY_EXEC)? " exec" : "",
(acc & NFSD_MAY_SATTR)? " sattr" : "",
(acc & NFSD_MAY_TRUNC)? " trunc" : "",
- (acc & NFSD_MAY_LOCK)? " lock" : "",
+ (acc & NFSD_MAY_NLM)? " nlm" : "",
(acc & NFSD_MAY_OWNER_OVERRIDE)? " owneroverride" : "",
inode->i_mode,
IS_IMMUTABLE(inode)? " immut" : "",
@@ -2544,16 +2544,6 @@ nfsd_permission(struct svc_cred *cred, struct svc_export *exp,
if ((acc & NFSD_MAY_TRUNC) && IS_APPEND(inode))
return nfserr_perm;
- if (acc & NFSD_MAY_LOCK) {
- /* If we cannot rely on authentication in NLM requests,
- * just allow locks, otherwise require read permission, or
- * ownership
- */
- if (exp->ex_flags & NFSEXP_NOAUTHNLM)
- return 0;
- else
- acc = NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE;
- }
/*
* The file owner always gets access permission for accesses that
* would normally be checked at open time. This is to make
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 3ff146522556..a61ada4fd920 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -20,7 +20,7 @@
#define NFSD_MAY_READ 0x004 /* == MAY_READ */
#define NFSD_MAY_SATTR 0x008
#define NFSD_MAY_TRUNC 0x010
-#define NFSD_MAY_LOCK 0x020
+#define NFSD_MAY_NLM 0x020 /* request is from lockd */
#define NFSD_MAY_MASK 0x03f
/* extra hints to permission and open routines: */
diff --git a/fs/ntfs3/bitmap.c b/fs/ntfs3/bitmap.c
index cf4fe21a5039..29b585f443f3 100644
--- a/fs/ntfs3/bitmap.c
+++ b/fs/ntfs3/bitmap.c
@@ -1399,6 +1399,7 @@ int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits)
mark_buffer_dirty(bh);
unlock_buffer(bh);
/* err = sync_dirty_buffer(bh); */
+ put_bh(bh);
b0 = 0;
bits -= op;
diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c
index 1bf2a6593dec..6d1bf890929d 100644
--- a/fs/ntfs3/index.c
+++ b/fs/ntfs3/index.c
@@ -1508,6 +1508,16 @@ static int indx_add_allocate(struct ntfs_index *indx, struct ntfs_inode *ni,
bmp_size = bmp_size_v = le32_to_cpu(bmp->res.data_size);
}
+ /*
+ * Index blocks exist, but $BITMAP has zero valid bits.
+ * This implies an on-disk corruption and must be rejected.
+ */
+ if (in->name == I30_NAME &&
+ unlikely(bmp_size_v == 0 && indx->alloc_run.count)) {
+ err = -EINVAL;
+ goto out1;
+ }
+
bit = bmp_size << 3;
}
diff --git a/fs/ntfs3/run.c b/fs/ntfs3/run.c
index 48566dff0dc9..662add939da7 100644
--- a/fs/ntfs3/run.c
+++ b/fs/ntfs3/run.c
@@ -9,6 +9,7 @@
#include <linux/blkdev.h>
#include <linux/fs.h>
#include <linux/log2.h>
+#include <linux/overflow.h>
#include "debug.h"
#include "ntfs.h"
@@ -982,14 +983,18 @@ int run_unpack(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino,
if (!dlcn)
return -EINVAL;
- lcn = prev_lcn + dlcn;
+
+ if (check_add_overflow(prev_lcn, dlcn, &lcn))
+ return -EINVAL;
prev_lcn = lcn;
} else {
/* The size of 'dlcn' can't be > 8. */
return -EINVAL;
}
- next_vcn = vcn64 + len;
+ if (check_add_overflow(vcn64, len, &next_vcn))
+ return -EINVAL;
+
/* Check boundary. */
if (next_vcn > evcn + 1)
return -EINVAL;
@@ -1153,7 +1158,8 @@ int run_get_highest_vcn(CLST vcn, const u8 *run_buf, u64 *highest_vcn)
return -EINVAL;
run_buf += size_size + offset_size;
- vcn64 += len;
+ if (check_add_overflow(vcn64, len, &vcn64))
+ return -EINVAL;
#ifndef CONFIG_NTFS3_64BIT_CLUSTER
if (vcn64 > 0x100000000ull)
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 77edcd70f72c..c5236b3ed168 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -1018,6 +1018,7 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
printk(KERN_ERR "ocfs2: Could not determine"
" locking version\n");
user_cluster_disconnect(conn);
+ lc = NULL;
goto out;
}
wait_event(lc->oc_wait, (atomic_read(&lc->oc_this_node) > 0));
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 71c0ce31a4c4..94825180385a 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -163,6 +163,9 @@ static struct quota_module_name module_names[] = INIT_QUOTA_MODULE_NAMES;
/* SLAB cache for dquot structures */
static struct kmem_cache *dquot_cachep;
+/* workqueue for work quota_release_work*/
+static struct workqueue_struct *quota_unbound_wq;
+
void register_quota_format(struct quota_format_type *fmt)
{
spin_lock(&dq_list_lock);
@@ -882,7 +885,7 @@ void dqput(struct dquot *dquot)
put_releasing_dquots(dquot);
atomic_dec(&dquot->dq_count);
spin_unlock(&dq_list_lock);
- queue_delayed_work(system_unbound_wq, &quota_release_work, 1);
+ queue_delayed_work(quota_unbound_wq, &quota_release_work, 1);
}
EXPORT_SYMBOL(dqput);
@@ -3042,6 +3045,11 @@ static int __init dquot_init(void)
shrinker_register(dqcache_shrinker);
+ quota_unbound_wq = alloc_workqueue("quota_events_unbound",
+ WQ_UNBOUND | WQ_MEM_RECLAIM, WQ_MAX_ACTIVE);
+ if (!quota_unbound_wq)
+ panic("Cannot create quota_unbound_wq\n");
+
return 0;
}
fs_initcall(dquot_init);
diff --git a/fs/read_write.c b/fs/read_write.c
index befec0b5c537..46408bab9238 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1600,6 +1600,13 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
if (len == 0)
return 0;
+ /*
+ * Make sure return value doesn't overflow in 32bit compat mode. Also
+ * limit the size for all cases except when calling ->copy_file_range().
+ */
+ if (splice || !file_out->f_op->copy_file_range || in_compat_syscall())
+ len = min_t(size_t, MAX_RW_COUNT, len);
+
file_start_write(file_out);
/*
@@ -1613,9 +1620,7 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
len, flags);
} else if (!splice && file_in->f_op->remap_file_range && samesb) {
ret = file_in->f_op->remap_file_range(file_in, pos_in,
- file_out, pos_out,
- min_t(loff_t, MAX_RW_COUNT, len),
- REMAP_FILE_CAN_SHORTEN);
+ file_out, pos_out, len, REMAP_FILE_CAN_SHORTEN);
/* fallback to splice */
if (ret <= 0)
splice = true;
@@ -1648,8 +1653,7 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
* to splicing from input file, while file_start_write() is held on
* the output file on a different sb.
*/
- ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out,
- min_t(size_t, len, MAX_RW_COUNT), 0);
+ ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out, len, 0);
done:
if (ret > 0) {
fsnotify_access(file_in);
diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c
index 0385a514f59e..4670c3e451a7 100644
--- a/fs/smb/client/smb1ops.c
+++ b/fs/smb/client/smb1ops.c
@@ -672,13 +672,71 @@ static int cifs_query_path_info(const unsigned int xid,
#ifdef CONFIG_CIFS_XATTR
/*
+ * For non-symlink WSL reparse points it is required to fetch
+ * EA $LXMOD which contains in its S_DT part the mandatory file type.
+ */
+ if (!rc && data->reparse_point) {
+ struct smb2_file_full_ea_info *ea;
+ u32 next = 0;
+
+ ea = (struct smb2_file_full_ea_info *)data->wsl.eas;
+ do {
+ ea = (void *)((u8 *)ea + next);
+ next = le32_to_cpu(ea->next_entry_offset);
+ } while (next);
+ if (le16_to_cpu(ea->ea_value_length)) {
+ ea->next_entry_offset = cpu_to_le32(ALIGN(sizeof(*ea) +
+ ea->ea_name_length + 1 +
+ le16_to_cpu(ea->ea_value_length), 4));
+ ea = (void *)((u8 *)ea + le32_to_cpu(ea->next_entry_offset));
+ }
+
+ rc = CIFSSMBQAllEAs(xid, tcon, full_path, SMB2_WSL_XATTR_MODE,
+ &ea->ea_data[SMB2_WSL_XATTR_NAME_LEN + 1],
+ SMB2_WSL_XATTR_MODE_SIZE, cifs_sb);
+ if (rc == SMB2_WSL_XATTR_MODE_SIZE) {
+ ea->next_entry_offset = cpu_to_le32(0);
+ ea->flags = 0;
+ ea->ea_name_length = SMB2_WSL_XATTR_NAME_LEN;
+ ea->ea_value_length = cpu_to_le16(SMB2_WSL_XATTR_MODE_SIZE);
+ memcpy(&ea->ea_data[0], SMB2_WSL_XATTR_MODE, SMB2_WSL_XATTR_NAME_LEN + 1);
+ data->wsl.eas_len += ALIGN(sizeof(*ea) + SMB2_WSL_XATTR_NAME_LEN + 1 +
+ SMB2_WSL_XATTR_MODE_SIZE, 4);
+ rc = 0;
+ } else if (rc >= 0) {
+ /* It is an error if EA $LXMOD has wrong size. */
+ rc = -EINVAL;
+ } else {
+ /*
+ * In all other cases ignore error if fetching
+ * of EA $LXMOD failed. It is needed only for
+ * non-symlink WSL reparse points and wsl_to_fattr()
+ * handle the case when EA is missing.
+ */
+ rc = 0;
+ }
+ }
+
+ /*
* For WSL CHR and BLK reparse points it is required to fetch
* EA $LXDEV which contains major and minor device numbers.
*/
if (!rc && data->reparse_point) {
struct smb2_file_full_ea_info *ea;
+ u32 next = 0;
ea = (struct smb2_file_full_ea_info *)data->wsl.eas;
+ do {
+ ea = (void *)((u8 *)ea + next);
+ next = le32_to_cpu(ea->next_entry_offset);
+ } while (next);
+ if (le16_to_cpu(ea->ea_value_length)) {
+ ea->next_entry_offset = cpu_to_le32(ALIGN(sizeof(*ea) +
+ ea->ea_name_length + 1 +
+ le16_to_cpu(ea->ea_value_length), 4));
+ ea = (void *)((u8 *)ea + le32_to_cpu(ea->next_entry_offset));
+ }
+
rc = CIFSSMBQAllEAs(xid, tcon, full_path, SMB2_WSL_XATTR_DEV,
&ea->ea_data[SMB2_WSL_XATTR_NAME_LEN + 1],
SMB2_WSL_XATTR_DEV_SIZE, cifs_sb);
@@ -688,8 +746,8 @@ static int cifs_query_path_info(const unsigned int xid,
ea->ea_name_length = SMB2_WSL_XATTR_NAME_LEN;
ea->ea_value_length = cpu_to_le16(SMB2_WSL_XATTR_DEV_SIZE);
memcpy(&ea->ea_data[0], SMB2_WSL_XATTR_DEV, SMB2_WSL_XATTR_NAME_LEN + 1);
- data->wsl.eas_len = sizeof(*ea) + SMB2_WSL_XATTR_NAME_LEN + 1 +
- SMB2_WSL_XATTR_DEV_SIZE;
+ data->wsl.eas_len += ALIGN(sizeof(*ea) + SMB2_WSL_XATTR_NAME_LEN + 1 +
+ SMB2_WSL_XATTR_MODE_SIZE, 4);
rc = 0;
} else if (rc >= 0) {
/* It is an error if EA $LXDEV has wrong size. */
diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c
index 104a563dc317..cb049bc70e0c 100644
--- a/fs/smb/client/smb2inode.c
+++ b/fs/smb/client/smb2inode.c
@@ -1220,31 +1220,33 @@ int
smb2_set_file_info(struct inode *inode, const char *full_path,
FILE_BASIC_INFO *buf, const unsigned int xid)
{
- struct cifs_open_parms oparms;
+ struct kvec in_iov = { .iov_base = buf, .iov_len = sizeof(*buf), };
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifsFileInfo *cfile = NULL;
+ struct cifs_open_parms oparms;
struct tcon_link *tlink;
struct cifs_tcon *tcon;
- struct cifsFileInfo *cfile;
- struct kvec in_iov = { .iov_base = buf, .iov_len = sizeof(*buf), };
- int rc;
-
- if ((buf->CreationTime == 0) && (buf->LastAccessTime == 0) &&
- (buf->LastWriteTime == 0) && (buf->ChangeTime == 0) &&
- (buf->Attributes == 0))
- return 0; /* would be a no op, no sense sending this */
+ int rc = 0;
tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink))
return PTR_ERR(tlink);
tcon = tlink_tcon(tlink);
- cifs_get_writable_path(tcon, full_path, FIND_WR_ANY, &cfile);
+ if ((buf->CreationTime == 0) && (buf->LastAccessTime == 0) &&
+ (buf->LastWriteTime == 0) && (buf->ChangeTime == 0)) {
+ if (buf->Attributes == 0)
+ goto out; /* would be a no op, no sense sending this */
+ cifs_get_writable_path(tcon, full_path, FIND_WR_ANY, &cfile);
+ }
+
oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, FILE_WRITE_ATTRIBUTES,
FILE_OPEN, 0, ACL_NO_MODE);
rc = smb2_compound_op(xid, tcon, cifs_sb,
full_path, &oparms, &in_iov,
&(int){SMB2_OP_SET_INFO}, 1,
cfile, NULL, NULL, NULL);
+out:
cifs_put_tlink(tlink);
return rc;
}
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index ab911a967246..1b30035d02bc 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -4223,7 +4223,7 @@ fill_transform_hdr(struct smb2_transform_hdr *tr_hdr, unsigned int orig_len,
static void *smb2_aead_req_alloc(struct crypto_aead *tfm, const struct smb_rqst *rqst,
int num_rqst, const u8 *sig, u8 **iv,
struct aead_request **req, struct sg_table *sgt,
- unsigned int *num_sgs, size_t *sensitive_size)
+ unsigned int *num_sgs)
{
unsigned int req_size = sizeof(**req) + crypto_aead_reqsize(tfm);
unsigned int iv_size = crypto_aead_ivsize(tfm);
@@ -4240,9 +4240,8 @@ static void *smb2_aead_req_alloc(struct crypto_aead *tfm, const struct smb_rqst
len += req_size;
len = ALIGN(len, __alignof__(struct scatterlist));
len += array_size(*num_sgs, sizeof(struct scatterlist));
- *sensitive_size = len;
- p = kvzalloc(len, GFP_NOFS);
+ p = kzalloc(len, GFP_NOFS);
if (!p)
return ERR_PTR(-ENOMEM);
@@ -4256,16 +4255,14 @@ static void *smb2_aead_req_alloc(struct crypto_aead *tfm, const struct smb_rqst
static void *smb2_get_aead_req(struct crypto_aead *tfm, struct smb_rqst *rqst,
int num_rqst, const u8 *sig, u8 **iv,
- struct aead_request **req, struct scatterlist **sgl,
- size_t *sensitive_size)
+ struct aead_request **req, struct scatterlist **sgl)
{
struct sg_table sgtable = {};
unsigned int skip, num_sgs, i, j;
ssize_t rc;
void *p;
- p = smb2_aead_req_alloc(tfm, rqst, num_rqst, sig, iv, req, &sgtable,
- &num_sgs, sensitive_size);
+ p = smb2_aead_req_alloc(tfm, rqst, num_rqst, sig, iv, req, &sgtable, &num_sgs);
if (IS_ERR(p))
return ERR_CAST(p);
@@ -4354,7 +4351,6 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst,
DECLARE_CRYPTO_WAIT(wait);
unsigned int crypt_len = le32_to_cpu(tr_hdr->OriginalMessageSize);
void *creq;
- size_t sensitive_size;
rc = smb2_get_enc_key(server, le64_to_cpu(tr_hdr->SessionId), enc, key);
if (rc) {
@@ -4380,8 +4376,7 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst,
return rc;
}
- creq = smb2_get_aead_req(tfm, rqst, num_rqst, sign, &iv, &req, &sg,
- &sensitive_size);
+ creq = smb2_get_aead_req(tfm, rqst, num_rqst, sign, &iv, &req, &sg);
if (IS_ERR(creq))
return PTR_ERR(creq);
@@ -4411,7 +4406,7 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst,
if (!rc && enc)
memcpy(&tr_hdr->Signature, sign, SMB2_SIGNATURE_SIZE);
- kvfree_sensitive(creq, sensitive_size);
+ kfree_sensitive(creq);
return rc;
}
@@ -4662,7 +4657,7 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
unsigned int pad_len;
struct cifs_io_subrequest *rdata = mid->callback_data;
struct smb2_hdr *shdr = (struct smb2_hdr *)buf;
- int length;
+ size_t copied;
bool use_rdma_mr = false;
if (shdr->Command != SMB2_READ) {
@@ -4775,10 +4770,10 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
} else if (buf_len >= data_offset + data_len) {
/* read response payload is in buf */
WARN_ONCE(buffer, "read data can be either in buf or in buffer");
- length = copy_to_iter(buf + data_offset, data_len, &rdata->subreq.io_iter);
- if (length < 0)
- return length;
- rdata->got_bytes = data_len;
+ copied = copy_to_iter(buf + data_offset, data_len, &rdata->subreq.io_iter);
+ if (copied == 0)
+ return -EIO;
+ rdata->got_bytes = copied;
} else {
/* read response payload cannot be in both buf and pages */
WARN_ONCE(1, "buf can not contain only a part of read data");
diff --git a/fs/smb/server/ksmbd_netlink.h b/fs/smb/server/ksmbd_netlink.h
index 3f07a612c05b..8ccd57fd904b 100644
--- a/fs/smb/server/ksmbd_netlink.h
+++ b/fs/smb/server/ksmbd_netlink.h
@@ -112,10 +112,11 @@ struct ksmbd_startup_request {
__u32 smbd_max_io_size; /* smbd read write size */
__u32 max_connections; /* Number of maximum simultaneous connections */
__s8 bind_interfaces_only;
- __s8 reserved[503]; /* Reserved room */
+ __u32 max_ip_connections; /* Number of maximum connection per ip address */
+ __s8 reserved[499]; /* Reserved room */
__u32 ifc_list_sz; /* interfaces list size */
__s8 ____payload[];
-};
+} __packed;
#define KSMBD_STARTUP_CONFIG_INTERFACES(s) ((s)->____payload)
diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c
index 9dec4c2940bc..b36d0676dbe5 100644
--- a/fs/smb/server/mgmt/user_session.c
+++ b/fs/smb/server/mgmt/user_session.c
@@ -104,29 +104,32 @@ int ksmbd_session_rpc_open(struct ksmbd_session *sess, char *rpc_name)
if (!entry)
return -ENOMEM;
- down_read(&sess->rpc_lock);
entry->method = method;
entry->id = id = ksmbd_ipc_id_alloc();
if (id < 0)
goto free_entry;
+
+ down_write(&sess->rpc_lock);
old = xa_store(&sess->rpc_handle_list, id, entry, KSMBD_DEFAULT_GFP);
- if (xa_is_err(old))
+ if (xa_is_err(old)) {
+ up_write(&sess->rpc_lock);
goto free_id;
+ }
resp = ksmbd_rpc_open(sess, id);
- if (!resp)
- goto erase_xa;
+ if (!resp) {
+ xa_erase(&sess->rpc_handle_list, entry->id);
+ up_write(&sess->rpc_lock);
+ goto free_id;
+ }
- up_read(&sess->rpc_lock);
+ up_write(&sess->rpc_lock);
kvfree(resp);
return id;
-erase_xa:
- xa_erase(&sess->rpc_handle_list, entry->id);
free_id:
ksmbd_rpc_id_free(entry->id);
free_entry:
kfree(entry);
- up_read(&sess->rpc_lock);
return -EINVAL;
}
@@ -144,9 +147,14 @@ void ksmbd_session_rpc_close(struct ksmbd_session *sess, int id)
int ksmbd_session_rpc_method(struct ksmbd_session *sess, int id)
{
struct ksmbd_session_rpc *entry;
+ int method;
+ down_read(&sess->rpc_lock);
entry = xa_load(&sess->rpc_handle_list, id);
- return entry ? entry->method : 0;
+ method = entry ? entry->method : 0;
+ up_read(&sess->rpc_lock);
+
+ return method;
}
void ksmbd_session_destroy(struct ksmbd_session *sess)
diff --git a/fs/smb/server/server.h b/fs/smb/server/server.h
index 995555febe7d..b8a7317be86b 100644
--- a/fs/smb/server/server.h
+++ b/fs/smb/server/server.h
@@ -43,6 +43,7 @@ struct ksmbd_server_config {
unsigned int auth_mechs;
unsigned int max_connections;
unsigned int max_inflight_req;
+ unsigned int max_ip_connections;
char *conf[SERVER_CONF_WORK_GROUP + 1];
struct task_struct *dh_task;
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index 6dafc2fbac25..d2182477566a 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -5600,7 +5600,8 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
if (!work->tcon->posix_extensions) {
pr_err("client doesn't negotiate with SMB3.1.1 POSIX Extensions\n");
- rc = -EOPNOTSUPP;
+ path_put(&path);
+ return -EOPNOTSUPP;
} else {
info = (struct filesystem_posix_info *)(rsp->Buffer);
info->OptimalTransferSize = cpu_to_le32(stfs.f_bsize);
diff --git a/fs/smb/server/transport_ipc.c b/fs/smb/server/transport_ipc.c
index 2da2a5f6b983..4454bbe3c710 100644
--- a/fs/smb/server/transport_ipc.c
+++ b/fs/smb/server/transport_ipc.c
@@ -335,6 +335,9 @@ static int ipc_server_config_on_startup(struct ksmbd_startup_request *req)
if (req->max_connections)
server_conf.max_connections = req->max_connections;
+ if (req->max_ip_connections)
+ server_conf.max_ip_connections = req->max_ip_connections;
+
ret = ksmbd_set_netbios_name(req->netbios_name);
ret |= ksmbd_set_server_string(req->server_string);
ret |= ksmbd_set_work_group(req->work_group);
diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c
index d059c890d142..05dfef7ad67f 100644
--- a/fs/smb/server/transport_rdma.c
+++ b/fs/smb/server/transport_rdma.c
@@ -152,6 +152,10 @@ struct smb_direct_transport {
struct work_struct disconnect_work;
bool negotiation_requested;
+
+ bool legacy_iwarp;
+ u8 initiator_depth;
+ u8 responder_resources;
};
#define KSMBD_TRANS(t) ((struct ksmbd_transport *)&((t)->transport))
@@ -346,6 +350,9 @@ static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id)
t->cm_id = cm_id;
cm_id->context = t;
+ t->initiator_depth = SMB_DIRECT_CM_INITIATOR_DEPTH;
+ t->responder_resources = 1;
+
t->status = SMB_DIRECT_CS_NEW;
init_waitqueue_head(&t->wait_status);
@@ -1623,21 +1630,21 @@ static int smb_direct_send_negotiate_response(struct smb_direct_transport *t,
static int smb_direct_accept_client(struct smb_direct_transport *t)
{
struct rdma_conn_param conn_param;
- struct ib_port_immutable port_immutable;
- u32 ird_ord_hdr[2];
+ __be32 ird_ord_hdr[2];
int ret;
+ /*
+ * smb_direct_handle_connect_request()
+ * already negotiated t->initiator_depth
+ * and t->responder_resources
+ */
memset(&conn_param, 0, sizeof(conn_param));
- conn_param.initiator_depth = min_t(u8, t->cm_id->device->attrs.max_qp_rd_atom,
- SMB_DIRECT_CM_INITIATOR_DEPTH);
- conn_param.responder_resources = 0;
-
- t->cm_id->device->ops.get_port_immutable(t->cm_id->device,
- t->cm_id->port_num,
- &port_immutable);
- if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) {
- ird_ord_hdr[0] = conn_param.responder_resources;
- ird_ord_hdr[1] = 1;
+ conn_param.initiator_depth = t->initiator_depth;
+ conn_param.responder_resources = t->responder_resources;
+
+ if (t->legacy_iwarp) {
+ ird_ord_hdr[0] = cpu_to_be32(conn_param.responder_resources);
+ ird_ord_hdr[1] = cpu_to_be32(conn_param.initiator_depth);
conn_param.private_data = ird_ord_hdr;
conn_param.private_data_len = sizeof(ird_ord_hdr);
} else {
@@ -2023,10 +2030,13 @@ static bool rdma_frwr_is_supported(struct ib_device_attr *attrs)
return true;
}
-static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id)
+static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id,
+ struct rdma_cm_event *event)
{
struct smb_direct_transport *t;
struct task_struct *handler;
+ u8 peer_initiator_depth;
+ u8 peer_responder_resources;
int ret;
if (!rdma_frwr_is_supported(&new_cm_id->device->attrs)) {
@@ -2040,6 +2050,67 @@ static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id)
if (!t)
return -ENOMEM;
+ peer_initiator_depth = event->param.conn.initiator_depth;
+ peer_responder_resources = event->param.conn.responder_resources;
+ if (rdma_protocol_iwarp(new_cm_id->device, new_cm_id->port_num) &&
+ event->param.conn.private_data_len == 8) {
+ /*
+ * Legacy clients with only iWarp MPA v1 support
+ * need a private blob in order to negotiate
+ * the IRD/ORD values.
+ */
+ const __be32 *ird_ord_hdr = event->param.conn.private_data;
+ u32 ird32 = be32_to_cpu(ird_ord_hdr[0]);
+ u32 ord32 = be32_to_cpu(ird_ord_hdr[1]);
+
+ /*
+ * cifs.ko sends the legacy IRD/ORD negotiation
+ * event if iWarp MPA v2 was used.
+ *
+ * Here we check that the values match and only
+ * mark the client as legacy if they don't match.
+ */
+ if ((u32)event->param.conn.initiator_depth != ird32 ||
+ (u32)event->param.conn.responder_resources != ord32) {
+ /*
+ * There are broken clients (old cifs.ko)
+ * using little endian and also
+ * struct rdma_conn_param only uses u8
+ * for initiator_depth and responder_resources,
+ * so we truncate the value to U8_MAX.
+ *
+ * smb_direct_accept_client() will then
+ * do the real negotiation in order to
+ * select the minimum between client and
+ * server.
+ */
+ ird32 = min_t(u32, ird32, U8_MAX);
+ ord32 = min_t(u32, ord32, U8_MAX);
+
+ t->legacy_iwarp = true;
+ peer_initiator_depth = (u8)ird32;
+ peer_responder_resources = (u8)ord32;
+ }
+ }
+
+ /*
+ * First set what the we as server are able to support
+ */
+ t->initiator_depth = min_t(u8, t->initiator_depth,
+ new_cm_id->device->attrs.max_qp_rd_atom);
+
+ /*
+ * negotiate the value by using the minimum
+ * between client and server if the client provided
+ * non 0 values.
+ */
+ if (peer_initiator_depth != 0)
+ t->initiator_depth = min_t(u8, t->initiator_depth,
+ peer_initiator_depth);
+ if (peer_responder_resources != 0)
+ t->responder_resources = min_t(u8, t->responder_resources,
+ peer_responder_resources);
+
ret = smb_direct_connect(t);
if (ret)
goto out_err;
@@ -2064,7 +2135,7 @@ static int smb_direct_listen_handler(struct rdma_cm_id *cm_id,
{
switch (event->event) {
case RDMA_CM_EVENT_CONNECT_REQUEST: {
- int ret = smb_direct_handle_connect_request(cm_id);
+ int ret = smb_direct_handle_connect_request(cm_id, event);
if (ret) {
pr_err("Can't create transport: %d\n", ret);
diff --git a/fs/smb/server/transport_tcp.c b/fs/smb/server/transport_tcp.c
index 756833c91b14..b51ccc16abe1 100644
--- a/fs/smb/server/transport_tcp.c
+++ b/fs/smb/server/transport_tcp.c
@@ -240,6 +240,7 @@ static int ksmbd_kthread_fn(void *p)
struct interface *iface = (struct interface *)p;
struct ksmbd_conn *conn;
int ret;
+ unsigned int max_ip_conns;
while (!kthread_should_stop()) {
mutex_lock(&iface->sock_release_lock);
@@ -257,34 +258,38 @@ static int ksmbd_kthread_fn(void *p)
continue;
}
+ if (!server_conf.max_ip_connections)
+ goto skip_max_ip_conns_limit;
+
/*
* Limits repeated connections from clients with the same IP.
*/
+ max_ip_conns = 0;
down_read(&conn_list_lock);
- list_for_each_entry(conn, &conn_list, conns_list)
+ list_for_each_entry(conn, &conn_list, conns_list) {
#if IS_ENABLED(CONFIG_IPV6)
if (client_sk->sk->sk_family == AF_INET6) {
if (memcmp(&client_sk->sk->sk_v6_daddr,
- &conn->inet6_addr, 16) == 0) {
- ret = -EAGAIN;
- break;
- }
+ &conn->inet6_addr, 16) == 0)
+ max_ip_conns++;
} else if (inet_sk(client_sk->sk)->inet_daddr ==
- conn->inet_addr) {
- ret = -EAGAIN;
- break;
- }
+ conn->inet_addr)
+ max_ip_conns++;
#else
if (inet_sk(client_sk->sk)->inet_daddr ==
- conn->inet_addr) {
+ conn->inet_addr)
+ max_ip_conns++;
+#endif
+ if (server_conf.max_ip_connections <= max_ip_conns) {
ret = -EAGAIN;
break;
}
-#endif
+ }
up_read(&conn_list_lock);
if (ret == -EAGAIN)
continue;
+skip_max_ip_conns_limit:
if (server_conf.max_connections &&
atomic_inc_return(&active_num_conn) >= server_conf.max_connections) {
pr_info_ratelimited("Limit the maximum number of connections(%u)\n",
diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c
index d5918eba27e3..f5dcb8353f86 100644
--- a/fs/squashfs/inode.c
+++ b/fs/squashfs/inode.c
@@ -140,8 +140,17 @@ int squashfs_read_inode(struct inode *inode, long long ino)
if (err < 0)
goto failed_read;
+ inode->i_size = le32_to_cpu(sqsh_ino->file_size);
frag = le32_to_cpu(sqsh_ino->fragment);
if (frag != SQUASHFS_INVALID_FRAG) {
+ /*
+ * the file cannot have a fragment (tailend) and have a
+ * file size a multiple of the block size
+ */
+ if ((inode->i_size & (msblk->block_size - 1)) == 0) {
+ err = -EINVAL;
+ goto failed_read;
+ }
frag_offset = le32_to_cpu(sqsh_ino->offset);
frag_size = squashfs_frag_lookup(sb, frag, &frag_blk);
if (frag_size < 0) {
@@ -155,7 +164,6 @@ int squashfs_read_inode(struct inode *inode, long long ino)
}
set_nlink(inode, 1);
- inode->i_size = le32_to_cpu(sqsh_ino->file_size);
inode->i_fop = &generic_ro_fops;
inode->i_mode |= S_IFREG;
inode->i_blocks = ((inode->i_size - 1) >> 9) + 1;
@@ -165,6 +173,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
squashfs_i(inode)->start = le32_to_cpu(sqsh_ino->start_block);
squashfs_i(inode)->block_list_start = block;
squashfs_i(inode)->offset = offset;
+ squashfs_i(inode)->parent = 0;
inode->i_data.a_ops = &squashfs_aops;
TRACE("File inode %x:%x, start_block %llx, block_list_start "
@@ -183,8 +192,21 @@ int squashfs_read_inode(struct inode *inode, long long ino)
if (err < 0)
goto failed_read;
+ inode->i_size = le64_to_cpu(sqsh_ino->file_size);
+ if (inode->i_size < 0) {
+ err = -EINVAL;
+ goto failed_read;
+ }
frag = le32_to_cpu(sqsh_ino->fragment);
if (frag != SQUASHFS_INVALID_FRAG) {
+ /*
+ * the file cannot have a fragment (tailend) and have a
+ * file size a multiple of the block size
+ */
+ if ((inode->i_size & (msblk->block_size - 1)) == 0) {
+ err = -EINVAL;
+ goto failed_read;
+ }
frag_offset = le32_to_cpu(sqsh_ino->offset);
frag_size = squashfs_frag_lookup(sb, frag, &frag_blk);
if (frag_size < 0) {
@@ -199,7 +221,6 @@ int squashfs_read_inode(struct inode *inode, long long ino)
xattr_id = le32_to_cpu(sqsh_ino->xattr);
set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
- inode->i_size = le64_to_cpu(sqsh_ino->file_size);
inode->i_op = &squashfs_inode_ops;
inode->i_fop = &generic_ro_fops;
inode->i_mode |= S_IFREG;
@@ -212,6 +233,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
squashfs_i(inode)->start = le64_to_cpu(sqsh_ino->start_block);
squashfs_i(inode)->block_list_start = block;
squashfs_i(inode)->offset = offset;
+ squashfs_i(inode)->parent = 0;
inode->i_data.a_ops = &squashfs_aops;
TRACE("File inode %x:%x, start_block %llx, block_list_start "
@@ -292,6 +314,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
inode->i_mode |= S_IFLNK;
squashfs_i(inode)->start = block;
squashfs_i(inode)->offset = offset;
+ squashfs_i(inode)->parent = 0;
if (type == SQUASHFS_LSYMLINK_TYPE) {
__le32 xattr;
@@ -329,6 +352,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
rdev = le32_to_cpu(sqsh_ino->rdev);
init_special_inode(inode, inode->i_mode, new_decode_dev(rdev));
+ squashfs_i(inode)->parent = 0;
TRACE("Device inode %x:%x, rdev %x\n",
SQUASHFS_INODE_BLK(ino), offset, rdev);
@@ -353,6 +377,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
rdev = le32_to_cpu(sqsh_ino->rdev);
init_special_inode(inode, inode->i_mode, new_decode_dev(rdev));
+ squashfs_i(inode)->parent = 0;
TRACE("Device inode %x:%x, rdev %x\n",
SQUASHFS_INODE_BLK(ino), offset, rdev);
@@ -373,6 +398,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
inode->i_mode |= S_IFSOCK;
set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
init_special_inode(inode, inode->i_mode, 0);
+ squashfs_i(inode)->parent = 0;
break;
}
case SQUASHFS_LFIFO_TYPE:
@@ -392,6 +418,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
inode->i_op = &squashfs_inode_ops;
set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
init_special_inode(inode, inode->i_mode, 0);
+ squashfs_i(inode)->parent = 0;
break;
}
default:
diff --git a/fs/squashfs/squashfs_fs_i.h b/fs/squashfs/squashfs_fs_i.h
index 2c82d6f2a456..8e497ac07b9a 100644
--- a/fs/squashfs/squashfs_fs_i.h
+++ b/fs/squashfs/squashfs_fs_i.h
@@ -16,6 +16,7 @@ struct squashfs_inode_info {
u64 xattr;
unsigned int xattr_size;
int xattr_count;
+ int parent;
union {
struct {
u64 fragment_block;
@@ -27,7 +28,6 @@ struct squashfs_inode_info {
u64 dir_idx_start;
int dir_idx_offset;
int dir_idx_cnt;
- int parent;
};
};
struct inode vfs_inode;
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 4386dd845e40..0f97850a165a 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -2265,6 +2265,9 @@ int udf_current_aext(struct inode *inode, struct extent_position *epos,
if (check_add_overflow(sizeof(struct allocExtDesc),
le32_to_cpu(header->lengthAllocDescs), &alen))
return -1;
+
+ if (alen > epos->bh->b_size)
+ return -1;
}
switch (iinfo->i_alloc_type) {