summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/alloc_tag.c6
-rw-r--r--lib/maple_tree.c10
-rw-r--r--lib/test_hmm.c72
-rw-r--r--lib/test_xarray.c52
-rw-r--r--lib/xarray.c157
5 files changed, 228 insertions, 69 deletions
diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
index 19b45617bdcf6..1d893e3136149 100644
--- a/lib/alloc_tag.c
+++ b/lib/alloc_tag.c
@@ -174,7 +174,7 @@ void pgalloc_tag_split(struct folio *folio, int old_order, int new_order)
if (!mem_alloc_profiling_enabled())
return;
- tag = pgalloc_tag_get(&folio->page);
+ tag = __pgalloc_tag_get(&folio->page);
if (!tag)
return;
@@ -200,10 +200,10 @@ void pgalloc_tag_swap(struct folio *new, struct folio *old)
if (!mem_alloc_profiling_enabled())
return;
- tag_old = pgalloc_tag_get(&old->page);
+ tag_old = __pgalloc_tag_get(&old->page);
if (!tag_old)
return;
- tag_new = pgalloc_tag_get(&new->page);
+ tag_new = __pgalloc_tag_get(&new->page);
if (!tag_new)
return;
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index f7153ade1be5f..d0bea23fa4bc9 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -584,13 +584,10 @@ static __always_inline bool ma_dead_node(const struct maple_node *node)
*/
static __always_inline bool mte_dead_node(const struct maple_enode *enode)
{
- struct maple_node *parent, *node;
+ struct maple_node *node;
node = mte_to_node(enode);
- /* Do not reorder reads from the node prior to the parent check */
- smp_rmb();
- parent = mte_parent(enode);
- return (parent == node);
+ return ma_dead_node(node);
}
/*
@@ -1245,7 +1242,6 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
if (mas->mas_flags & MA_STATE_PREALLOC) {
if (allocated)
return;
- BUG_ON(!allocated);
WARN_ON(!allocated);
}
@@ -1353,7 +1349,7 @@ static void mas_node_count(struct ma_state *mas, int count)
* mas_start() - Sets up maple state for operations.
* @mas: The maple state.
*
- * If mas->status == mas_start, then set the min, max and depth to
+ * If mas->status == ma_start, then set the min, max and depth to
* defaults.
*
* Return:
diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index 056f2e411d7b4..5b144bc5c4ec7 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -195,7 +195,8 @@ static int dmirror_fops_release(struct inode *inode, struct file *filp)
static struct dmirror_chunk *dmirror_page_to_chunk(struct page *page)
{
- return container_of(page->pgmap, struct dmirror_chunk, pagemap);
+ return container_of(page_pgmap(page), struct dmirror_chunk,
+ pagemap);
}
static struct dmirror_device *dmirror_page_to_device(struct page *page)
@@ -706,34 +707,23 @@ static int dmirror_check_atomic(struct dmirror *dmirror, unsigned long start,
return 0;
}
-static int dmirror_atomic_map(unsigned long start, unsigned long end,
- struct page **pages, struct dmirror *dmirror)
+static int dmirror_atomic_map(unsigned long addr, struct page *page,
+ struct dmirror *dmirror)
{
- unsigned long pfn, mapped = 0;
- int i;
+ void *entry;
/* Map the migrated pages into the device's page tables. */
mutex_lock(&dmirror->mutex);
- for (i = 0, pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++, i++) {
- void *entry;
-
- if (!pages[i])
- continue;
-
- entry = pages[i];
- entry = xa_tag_pointer(entry, DPT_XA_TAG_ATOMIC);
- entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC);
- if (xa_is_err(entry)) {
- mutex_unlock(&dmirror->mutex);
- return xa_err(entry);
- }
-
- mapped++;
+ entry = xa_tag_pointer(page, DPT_XA_TAG_ATOMIC);
+ entry = xa_store(&dmirror->pt, addr >> PAGE_SHIFT, entry, GFP_ATOMIC);
+ if (xa_is_err(entry)) {
+ mutex_unlock(&dmirror->mutex);
+ return xa_err(entry);
}
mutex_unlock(&dmirror->mutex);
- return mapped;
+ return 0;
}
static int dmirror_migrate_finalize_and_map(struct migrate_vma *args,
@@ -780,10 +770,8 @@ static int dmirror_exclusive(struct dmirror *dmirror,
unsigned long start, end, addr;
unsigned long size = cmd->npages << PAGE_SHIFT;
struct mm_struct *mm = dmirror->notifier.mm;
- struct page *pages[64];
struct dmirror_bounce bounce;
- unsigned long next;
- int ret;
+ int ret = 0;
start = cmd->addr;
end = start + size;
@@ -795,36 +783,26 @@ static int dmirror_exclusive(struct dmirror *dmirror,
return -EINVAL;
mmap_read_lock(mm);
- for (addr = start; addr < end; addr = next) {
- unsigned long mapped = 0;
- int i;
-
- next = min(end, addr + (ARRAY_SIZE(pages) << PAGE_SHIFT));
+ for (addr = start; !ret && addr < end; addr += PAGE_SIZE) {
+ struct folio *folio;
+ struct page *page;
- ret = make_device_exclusive_range(mm, addr, next, pages, NULL);
- /*
- * Do dmirror_atomic_map() iff all pages are marked for
- * exclusive access to avoid accessing uninitialized
- * fields of pages.
- */
- if (ret == (next - addr) >> PAGE_SHIFT)
- mapped = dmirror_atomic_map(addr, next, pages, dmirror);
- for (i = 0; i < ret; i++) {
- if (pages[i]) {
- unlock_page(pages[i]);
- put_page(pages[i]);
- }
+ page = make_device_exclusive(mm, addr, NULL, &folio);
+ if (IS_ERR(page)) {
+ ret = PTR_ERR(page);
+ break;
}
- if (addr + (mapped << PAGE_SHIFT) < next) {
- mmap_read_unlock(mm);
- mmput(mm);
- return -EBUSY;
- }
+ ret = dmirror_atomic_map(addr, page, dmirror);
+ folio_unlock(folio);
+ folio_put(folio);
}
mmap_read_unlock(mm);
mmput(mm);
+ if (ret)
+ return ret;
+
/* Return the migrated data for verification. */
ret = dmirror_bounce_init(&bounce, start, size);
if (ret)
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index 0e865bab4a10b..080a39d22e736 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -1858,6 +1858,54 @@ static void check_split_1(struct xarray *xa, unsigned long index,
xa_destroy(xa);
}
+static void check_split_2(struct xarray *xa, unsigned long index,
+ unsigned int order, unsigned int new_order)
+{
+ XA_STATE_ORDER(xas, xa, index, new_order);
+ unsigned int i, found;
+ void *entry;
+
+ xa_store_order(xa, index, order, xa, GFP_KERNEL);
+ xa_set_mark(xa, index, XA_MARK_1);
+
+ /* allocate a node for xas_try_split() */
+ xas_set_err(&xas, -ENOMEM);
+ XA_BUG_ON(xa, !xas_nomem(&xas, GFP_KERNEL));
+
+ xas_lock(&xas);
+ xas_try_split(&xas, xa, order);
+ if (((new_order / XA_CHUNK_SHIFT) < (order / XA_CHUNK_SHIFT)) &&
+ new_order < order - 1) {
+ XA_BUG_ON(xa, !xas_error(&xas) || xas_error(&xas) != -EINVAL);
+ xas_unlock(&xas);
+ goto out;
+ }
+ for (i = 0; i < (1 << order); i += (1 << new_order))
+ __xa_store(xa, index + i, xa_mk_index(index + i), 0);
+ xas_unlock(&xas);
+
+ for (i = 0; i < (1 << order); i++) {
+ unsigned int val = index + (i & ~((1 << new_order) - 1));
+ XA_BUG_ON(xa, xa_load(xa, index + i) != xa_mk_index(val));
+ }
+
+ xa_set_mark(xa, index, XA_MARK_0);
+ XA_BUG_ON(xa, !xa_get_mark(xa, index, XA_MARK_0));
+
+ xas_set_order(&xas, index, 0);
+ found = 0;
+ rcu_read_lock();
+ xas_for_each_marked(&xas, entry, ULONG_MAX, XA_MARK_1) {
+ found++;
+ XA_BUG_ON(xa, xa_is_internal(entry));
+ }
+ rcu_read_unlock();
+ XA_BUG_ON(xa, found != 1 << (order - new_order));
+out:
+ xas_destroy(&xas);
+ xa_destroy(xa);
+}
+
static noinline void check_split(struct xarray *xa)
{
unsigned int order, new_order;
@@ -1869,6 +1917,10 @@ static noinline void check_split(struct xarray *xa)
check_split_1(xa, 0, order, new_order);
check_split_1(xa, 1UL << order, order, new_order);
check_split_1(xa, 3UL << order, order, new_order);
+
+ check_split_2(xa, 0, order, new_order);
+ check_split_2(xa, 1UL << order, order, new_order);
+ check_split_2(xa, 3UL << order, order, new_order);
}
}
}
diff --git a/lib/xarray.c b/lib/xarray.c
index 116e9286c64ec..9644b18af18d1 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -278,6 +278,7 @@ void xas_destroy(struct xa_state *xas)
xas->xa_alloc = node = next;
}
}
+EXPORT_SYMBOL_GPL(xas_destroy);
/**
* xas_nomem() - Allocate memory if needed.
@@ -1007,6 +1008,26 @@ static void node_set_marks(struct xa_node *node, unsigned int offset,
}
}
+static void __xas_init_node_for_split(struct xa_state *xas,
+ struct xa_node *node, void *entry)
+{
+ unsigned int i;
+ void *sibling = NULL;
+ unsigned int mask = xas->xa_sibs;
+
+ if (!node)
+ return;
+ node->array = xas->xa;
+ for (i = 0; i < XA_CHUNK_SIZE; i++) {
+ if ((i & mask) == 0) {
+ RCU_INIT_POINTER(node->slots[i], entry);
+ sibling = xa_mk_sibling(i);
+ } else {
+ RCU_INIT_POINTER(node->slots[i], sibling);
+ }
+ }
+}
+
/**
* xas_split_alloc() - Allocate memory for splitting an entry.
* @xas: XArray operation state.
@@ -1025,7 +1046,6 @@ void xas_split_alloc(struct xa_state *xas, void *entry, unsigned int order,
gfp_t gfp)
{
unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1;
- unsigned int mask = xas->xa_sibs;
/* XXX: no support for splitting really large entries yet */
if (WARN_ON(xas->xa_shift + 2 * XA_CHUNK_SHIFT <= order))
@@ -1034,22 +1054,13 @@ void xas_split_alloc(struct xa_state *xas, void *entry, unsigned int order,
return;
do {
- unsigned int i;
- void *sibling = NULL;
struct xa_node *node;
node = kmem_cache_alloc_lru(radix_tree_node_cachep, xas->xa_lru, gfp);
if (!node)
goto nomem;
- node->array = xas->xa;
- for (i = 0; i < XA_CHUNK_SIZE; i++) {
- if ((i & mask) == 0) {
- RCU_INIT_POINTER(node->slots[i], entry);
- sibling = xa_mk_sibling(i);
- } else {
- RCU_INIT_POINTER(node->slots[i], sibling);
- }
- }
+
+ __xas_init_node_for_split(xas, node, entry);
RCU_INIT_POINTER(node->parent, xas->xa_alloc);
xas->xa_alloc = node;
} while (sibs-- > 0);
@@ -1122,6 +1133,128 @@ void xas_split(struct xa_state *xas, void *entry, unsigned int order)
xas_update(xas, node);
}
EXPORT_SYMBOL_GPL(xas_split);
+
+/**
+ * xas_try_split_min_order() - Minimal split order xas_try_split() can accept
+ * @order: Current entry order.
+ *
+ * xas_try_split() can split a multi-index entry to smaller than @order - 1 if
+ * no new xa_node is needed. This function provides the minimal order
+ * xas_try_split() supports.
+ *
+ * Return: the minimal order xas_try_split() supports
+ *
+ * Context: Any context.
+ *
+ */
+unsigned int xas_try_split_min_order(unsigned int order)
+{
+ if (order % XA_CHUNK_SHIFT == 0)
+ return order == 0 ? 0 : order - 1;
+
+ return order - (order % XA_CHUNK_SHIFT);
+}
+EXPORT_SYMBOL_GPL(xas_try_split_min_order);
+
+/**
+ * xas_try_split() - Try to split a multi-index entry.
+ * @xas: XArray operation state.
+ * @entry: New entry to store in the array.
+ * @order: Current entry order.
+ *
+ * The size of the new entries is set in @xas. The value in @entry is
+ * copied to all the replacement entries. If and only if one new xa_node is
+ * needed, the function will use GFP_NOWAIT to get one if xas->xa_alloc is
+ * NULL. If more new xa_node are needed, the function gives EINVAL error.
+ *
+ * NOTE: use xas_try_split_min_order() to get next split order instead of
+ * @order - 1 if you want to minmize xas_try_split() calls.
+ *
+ * Context: Any context. The caller should hold the xa_lock.
+ */
+void xas_try_split(struct xa_state *xas, void *entry, unsigned int order)
+{
+ unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1;
+ unsigned int offset, marks;
+ struct xa_node *node;
+ void *curr = xas_load(xas);
+ int values = 0;
+ gfp_t gfp = GFP_NOWAIT;
+
+ node = xas->xa_node;
+ if (xas_top(node))
+ return;
+
+ if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT)
+ gfp |= __GFP_ACCOUNT;
+
+ marks = node_get_marks(node, xas->xa_offset);
+
+ offset = xas->xa_offset + sibs;
+
+ if (xas->xa_shift < node->shift) {
+ struct xa_node *child = xas->xa_alloc;
+ unsigned int expected_sibs =
+ (1 << ((order - 1) % XA_CHUNK_SHIFT)) - 1;
+
+ /*
+ * No support for splitting sibling entries
+ * (horizontally) or cascade split (vertically), which
+ * requires two or more new xa_nodes.
+ * Since if one xa_node allocation fails,
+ * it is hard to free the prior allocations.
+ */
+ if (sibs || xas->xa_sibs != expected_sibs) {
+ xas_destroy(xas);
+ xas_set_err(xas, -EINVAL);
+ return;
+ }
+
+ if (!child) {
+ child = kmem_cache_alloc_lru(radix_tree_node_cachep,
+ xas->xa_lru, gfp);
+ if (!child) {
+ xas_destroy(xas);
+ xas_set_err(xas, -ENOMEM);
+ return;
+ }
+ RCU_INIT_POINTER(child->parent, xas->xa_alloc);
+ }
+ __xas_init_node_for_split(xas, child, entry);
+
+ xas->xa_alloc = rcu_dereference_raw(child->parent);
+ child->shift = node->shift - XA_CHUNK_SHIFT;
+ child->offset = offset;
+ child->count = XA_CHUNK_SIZE;
+ child->nr_values = xa_is_value(entry) ?
+ XA_CHUNK_SIZE : 0;
+ RCU_INIT_POINTER(child->parent, node);
+ node_set_marks(node, offset, child, xas->xa_sibs,
+ marks);
+ rcu_assign_pointer(node->slots[offset],
+ xa_mk_node(child));
+ if (xa_is_value(curr))
+ values--;
+ xas_update(xas, child);
+
+ } else {
+ do {
+ unsigned int canon = offset - xas->xa_sibs;
+
+ node_set_marks(node, canon, NULL, 0, marks);
+ rcu_assign_pointer(node->slots[canon], entry);
+ while (offset > canon)
+ rcu_assign_pointer(node->slots[offset--],
+ xa_mk_sibling(canon));
+ values += (xa_is_value(entry) - xa_is_value(curr)) *
+ (xas->xa_sibs + 1);
+ } while (offset-- > xas->xa_offset);
+ }
+
+ node->nr_values += values;
+ xas_update(xas, node);
+}
+EXPORT_SYMBOL_GPL(xas_try_split);
#endif
/**