diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/alloc_tag.c | 6 | ||||
-rw-r--r-- | lib/maple_tree.c | 10 | ||||
-rw-r--r-- | lib/test_hmm.c | 72 | ||||
-rw-r--r-- | lib/test_xarray.c | 52 | ||||
-rw-r--r-- | lib/xarray.c | 157 |
5 files changed, 228 insertions, 69 deletions
diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c index 19b45617bdcf6..1d893e3136149 100644 --- a/lib/alloc_tag.c +++ b/lib/alloc_tag.c @@ -174,7 +174,7 @@ void pgalloc_tag_split(struct folio *folio, int old_order, int new_order) if (!mem_alloc_profiling_enabled()) return; - tag = pgalloc_tag_get(&folio->page); + tag = __pgalloc_tag_get(&folio->page); if (!tag) return; @@ -200,10 +200,10 @@ void pgalloc_tag_swap(struct folio *new, struct folio *old) if (!mem_alloc_profiling_enabled()) return; - tag_old = pgalloc_tag_get(&old->page); + tag_old = __pgalloc_tag_get(&old->page); if (!tag_old) return; - tag_new = pgalloc_tag_get(&new->page); + tag_new = __pgalloc_tag_get(&new->page); if (!tag_new) return; diff --git a/lib/maple_tree.c b/lib/maple_tree.c index f7153ade1be5f..d0bea23fa4bc9 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -584,13 +584,10 @@ static __always_inline bool ma_dead_node(const struct maple_node *node) */ static __always_inline bool mte_dead_node(const struct maple_enode *enode) { - struct maple_node *parent, *node; + struct maple_node *node; node = mte_to_node(enode); - /* Do not reorder reads from the node prior to the parent check */ - smp_rmb(); - parent = mte_parent(enode); - return (parent == node); + return ma_dead_node(node); } /* @@ -1245,7 +1242,6 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp) if (mas->mas_flags & MA_STATE_PREALLOC) { if (allocated) return; - BUG_ON(!allocated); WARN_ON(!allocated); } @@ -1353,7 +1349,7 @@ static void mas_node_count(struct ma_state *mas, int count) * mas_start() - Sets up maple state for operations. * @mas: The maple state. * - * If mas->status == mas_start, then set the min, max and depth to + * If mas->status == ma_start, then set the min, max and depth to * defaults. * * Return: diff --git a/lib/test_hmm.c b/lib/test_hmm.c index 056f2e411d7b4..5b144bc5c4ec7 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -195,7 +195,8 @@ static int dmirror_fops_release(struct inode *inode, struct file *filp) static struct dmirror_chunk *dmirror_page_to_chunk(struct page *page) { - return container_of(page->pgmap, struct dmirror_chunk, pagemap); + return container_of(page_pgmap(page), struct dmirror_chunk, + pagemap); } static struct dmirror_device *dmirror_page_to_device(struct page *page) @@ -706,34 +707,23 @@ static int dmirror_check_atomic(struct dmirror *dmirror, unsigned long start, return 0; } -static int dmirror_atomic_map(unsigned long start, unsigned long end, - struct page **pages, struct dmirror *dmirror) +static int dmirror_atomic_map(unsigned long addr, struct page *page, + struct dmirror *dmirror) { - unsigned long pfn, mapped = 0; - int i; + void *entry; /* Map the migrated pages into the device's page tables. */ mutex_lock(&dmirror->mutex); - for (i = 0, pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++, i++) { - void *entry; - - if (!pages[i]) - continue; - - entry = pages[i]; - entry = xa_tag_pointer(entry, DPT_XA_TAG_ATOMIC); - entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC); - if (xa_is_err(entry)) { - mutex_unlock(&dmirror->mutex); - return xa_err(entry); - } - - mapped++; + entry = xa_tag_pointer(page, DPT_XA_TAG_ATOMIC); + entry = xa_store(&dmirror->pt, addr >> PAGE_SHIFT, entry, GFP_ATOMIC); + if (xa_is_err(entry)) { + mutex_unlock(&dmirror->mutex); + return xa_err(entry); } mutex_unlock(&dmirror->mutex); - return mapped; + return 0; } static int dmirror_migrate_finalize_and_map(struct migrate_vma *args, @@ -780,10 +770,8 @@ static int dmirror_exclusive(struct dmirror *dmirror, unsigned long start, end, addr; unsigned long size = cmd->npages << PAGE_SHIFT; struct mm_struct *mm = dmirror->notifier.mm; - struct page *pages[64]; struct dmirror_bounce bounce; - unsigned long next; - int ret; + int ret = 0; start = cmd->addr; end = start + size; @@ -795,36 +783,26 @@ static int dmirror_exclusive(struct dmirror *dmirror, return -EINVAL; mmap_read_lock(mm); - for (addr = start; addr < end; addr = next) { - unsigned long mapped = 0; - int i; - - next = min(end, addr + (ARRAY_SIZE(pages) << PAGE_SHIFT)); + for (addr = start; !ret && addr < end; addr += PAGE_SIZE) { + struct folio *folio; + struct page *page; - ret = make_device_exclusive_range(mm, addr, next, pages, NULL); - /* - * Do dmirror_atomic_map() iff all pages are marked for - * exclusive access to avoid accessing uninitialized - * fields of pages. - */ - if (ret == (next - addr) >> PAGE_SHIFT) - mapped = dmirror_atomic_map(addr, next, pages, dmirror); - for (i = 0; i < ret; i++) { - if (pages[i]) { - unlock_page(pages[i]); - put_page(pages[i]); - } + page = make_device_exclusive(mm, addr, NULL, &folio); + if (IS_ERR(page)) { + ret = PTR_ERR(page); + break; } - if (addr + (mapped << PAGE_SHIFT) < next) { - mmap_read_unlock(mm); - mmput(mm); - return -EBUSY; - } + ret = dmirror_atomic_map(addr, page, dmirror); + folio_unlock(folio); + folio_put(folio); } mmap_read_unlock(mm); mmput(mm); + if (ret) + return ret; + /* Return the migrated data for verification. */ ret = dmirror_bounce_init(&bounce, start, size); if (ret) diff --git a/lib/test_xarray.c b/lib/test_xarray.c index 0e865bab4a10b..080a39d22e736 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -1858,6 +1858,54 @@ static void check_split_1(struct xarray *xa, unsigned long index, xa_destroy(xa); } +static void check_split_2(struct xarray *xa, unsigned long index, + unsigned int order, unsigned int new_order) +{ + XA_STATE_ORDER(xas, xa, index, new_order); + unsigned int i, found; + void *entry; + + xa_store_order(xa, index, order, xa, GFP_KERNEL); + xa_set_mark(xa, index, XA_MARK_1); + + /* allocate a node for xas_try_split() */ + xas_set_err(&xas, -ENOMEM); + XA_BUG_ON(xa, !xas_nomem(&xas, GFP_KERNEL)); + + xas_lock(&xas); + xas_try_split(&xas, xa, order); + if (((new_order / XA_CHUNK_SHIFT) < (order / XA_CHUNK_SHIFT)) && + new_order < order - 1) { + XA_BUG_ON(xa, !xas_error(&xas) || xas_error(&xas) != -EINVAL); + xas_unlock(&xas); + goto out; + } + for (i = 0; i < (1 << order); i += (1 << new_order)) + __xa_store(xa, index + i, xa_mk_index(index + i), 0); + xas_unlock(&xas); + + for (i = 0; i < (1 << order); i++) { + unsigned int val = index + (i & ~((1 << new_order) - 1)); + XA_BUG_ON(xa, xa_load(xa, index + i) != xa_mk_index(val)); + } + + xa_set_mark(xa, index, XA_MARK_0); + XA_BUG_ON(xa, !xa_get_mark(xa, index, XA_MARK_0)); + + xas_set_order(&xas, index, 0); + found = 0; + rcu_read_lock(); + xas_for_each_marked(&xas, entry, ULONG_MAX, XA_MARK_1) { + found++; + XA_BUG_ON(xa, xa_is_internal(entry)); + } + rcu_read_unlock(); + XA_BUG_ON(xa, found != 1 << (order - new_order)); +out: + xas_destroy(&xas); + xa_destroy(xa); +} + static noinline void check_split(struct xarray *xa) { unsigned int order, new_order; @@ -1869,6 +1917,10 @@ static noinline void check_split(struct xarray *xa) check_split_1(xa, 0, order, new_order); check_split_1(xa, 1UL << order, order, new_order); check_split_1(xa, 3UL << order, order, new_order); + + check_split_2(xa, 0, order, new_order); + check_split_2(xa, 1UL << order, order, new_order); + check_split_2(xa, 3UL << order, order, new_order); } } } diff --git a/lib/xarray.c b/lib/xarray.c index 116e9286c64ec..9644b18af18d1 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -278,6 +278,7 @@ void xas_destroy(struct xa_state *xas) xas->xa_alloc = node = next; } } +EXPORT_SYMBOL_GPL(xas_destroy); /** * xas_nomem() - Allocate memory if needed. @@ -1007,6 +1008,26 @@ static void node_set_marks(struct xa_node *node, unsigned int offset, } } +static void __xas_init_node_for_split(struct xa_state *xas, + struct xa_node *node, void *entry) +{ + unsigned int i; + void *sibling = NULL; + unsigned int mask = xas->xa_sibs; + + if (!node) + return; + node->array = xas->xa; + for (i = 0; i < XA_CHUNK_SIZE; i++) { + if ((i & mask) == 0) { + RCU_INIT_POINTER(node->slots[i], entry); + sibling = xa_mk_sibling(i); + } else { + RCU_INIT_POINTER(node->slots[i], sibling); + } + } +} + /** * xas_split_alloc() - Allocate memory for splitting an entry. * @xas: XArray operation state. @@ -1025,7 +1046,6 @@ void xas_split_alloc(struct xa_state *xas, void *entry, unsigned int order, gfp_t gfp) { unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1; - unsigned int mask = xas->xa_sibs; /* XXX: no support for splitting really large entries yet */ if (WARN_ON(xas->xa_shift + 2 * XA_CHUNK_SHIFT <= order)) @@ -1034,22 +1054,13 @@ void xas_split_alloc(struct xa_state *xas, void *entry, unsigned int order, return; do { - unsigned int i; - void *sibling = NULL; struct xa_node *node; node = kmem_cache_alloc_lru(radix_tree_node_cachep, xas->xa_lru, gfp); if (!node) goto nomem; - node->array = xas->xa; - for (i = 0; i < XA_CHUNK_SIZE; i++) { - if ((i & mask) == 0) { - RCU_INIT_POINTER(node->slots[i], entry); - sibling = xa_mk_sibling(i); - } else { - RCU_INIT_POINTER(node->slots[i], sibling); - } - } + + __xas_init_node_for_split(xas, node, entry); RCU_INIT_POINTER(node->parent, xas->xa_alloc); xas->xa_alloc = node; } while (sibs-- > 0); @@ -1122,6 +1133,128 @@ void xas_split(struct xa_state *xas, void *entry, unsigned int order) xas_update(xas, node); } EXPORT_SYMBOL_GPL(xas_split); + +/** + * xas_try_split_min_order() - Minimal split order xas_try_split() can accept + * @order: Current entry order. + * + * xas_try_split() can split a multi-index entry to smaller than @order - 1 if + * no new xa_node is needed. This function provides the minimal order + * xas_try_split() supports. + * + * Return: the minimal order xas_try_split() supports + * + * Context: Any context. + * + */ +unsigned int xas_try_split_min_order(unsigned int order) +{ + if (order % XA_CHUNK_SHIFT == 0) + return order == 0 ? 0 : order - 1; + + return order - (order % XA_CHUNK_SHIFT); +} +EXPORT_SYMBOL_GPL(xas_try_split_min_order); + +/** + * xas_try_split() - Try to split a multi-index entry. + * @xas: XArray operation state. + * @entry: New entry to store in the array. + * @order: Current entry order. + * + * The size of the new entries is set in @xas. The value in @entry is + * copied to all the replacement entries. If and only if one new xa_node is + * needed, the function will use GFP_NOWAIT to get one if xas->xa_alloc is + * NULL. If more new xa_node are needed, the function gives EINVAL error. + * + * NOTE: use xas_try_split_min_order() to get next split order instead of + * @order - 1 if you want to minmize xas_try_split() calls. + * + * Context: Any context. The caller should hold the xa_lock. + */ +void xas_try_split(struct xa_state *xas, void *entry, unsigned int order) +{ + unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1; + unsigned int offset, marks; + struct xa_node *node; + void *curr = xas_load(xas); + int values = 0; + gfp_t gfp = GFP_NOWAIT; + + node = xas->xa_node; + if (xas_top(node)) + return; + + if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT) + gfp |= __GFP_ACCOUNT; + + marks = node_get_marks(node, xas->xa_offset); + + offset = xas->xa_offset + sibs; + + if (xas->xa_shift < node->shift) { + struct xa_node *child = xas->xa_alloc; + unsigned int expected_sibs = + (1 << ((order - 1) % XA_CHUNK_SHIFT)) - 1; + + /* + * No support for splitting sibling entries + * (horizontally) or cascade split (vertically), which + * requires two or more new xa_nodes. + * Since if one xa_node allocation fails, + * it is hard to free the prior allocations. + */ + if (sibs || xas->xa_sibs != expected_sibs) { + xas_destroy(xas); + xas_set_err(xas, -EINVAL); + return; + } + + if (!child) { + child = kmem_cache_alloc_lru(radix_tree_node_cachep, + xas->xa_lru, gfp); + if (!child) { + xas_destroy(xas); + xas_set_err(xas, -ENOMEM); + return; + } + RCU_INIT_POINTER(child->parent, xas->xa_alloc); + } + __xas_init_node_for_split(xas, child, entry); + + xas->xa_alloc = rcu_dereference_raw(child->parent); + child->shift = node->shift - XA_CHUNK_SHIFT; + child->offset = offset; + child->count = XA_CHUNK_SIZE; + child->nr_values = xa_is_value(entry) ? + XA_CHUNK_SIZE : 0; + RCU_INIT_POINTER(child->parent, node); + node_set_marks(node, offset, child, xas->xa_sibs, + marks); + rcu_assign_pointer(node->slots[offset], + xa_mk_node(child)); + if (xa_is_value(curr)) + values--; + xas_update(xas, child); + + } else { + do { + unsigned int canon = offset - xas->xa_sibs; + + node_set_marks(node, canon, NULL, 0, marks); + rcu_assign_pointer(node->slots[canon], entry); + while (offset > canon) + rcu_assign_pointer(node->slots[offset--], + xa_mk_sibling(canon)); + values += (xa_is_value(entry) - xa_is_value(curr)) * + (xas->xa_sibs + 1); + } while (offset-- > xas->xa_offset); + } + + node->nr_values += values; + xas_update(xas, node); +} +EXPORT_SYMBOL_GPL(xas_try_split); #endif /** |