Merge branch 'slab/for-6.19/memdesc_prep' into slab/for-next

Merge series "Prepare slab for memdescs" by Matthew Wilcox.

From the cover letter [1]:

When we separate struct folio, struct page and struct slab from each
other, converting to folios then to slabs will be nonsense.  It made
sense under the 'folio is just a head page' interpretation, but with
full separation, page_folio() will return NULL for a page which belongs
to a slab.

This patch series removes almost all mentions of folio from slab.
There are a few folio_test_slab() invocations left around the tree that
I haven't decided how to handle yet.  We're not yet quite at the point
of separately allocating struct slab, but that's what I'll be working
on next.

Link: https://lore.kernel.org/all/20251113000932.1589073-1-willy@infradead.org/ [1]
This commit is contained in:
Vlastimil Babka
2025-11-25 14:33:14 +01:00
8 changed files with 157 additions and 175 deletions

View File

@@ -1048,19 +1048,7 @@ PAGE_TYPE_OPS(Table, table, pgtable)
*/
PAGE_TYPE_OPS(Guard, guard, guard)
FOLIO_TYPE_OPS(slab, slab)
/**
* PageSlab - Determine if the page belongs to the slab allocator
* @page: The page to test.
*
* Context: Any context.
* Return: True for slab pages, false for any other kind of page.
*/
static inline bool PageSlab(const struct page *page)
{
return folio_test_slab(page_folio(page));
}
PAGE_TYPE_OPS(Slab, slab, slab)
#ifdef CONFIG_HUGETLB_PAGE
FOLIO_TYPE_OPS(hugetlb, hugetlb)
@@ -1076,7 +1064,7 @@ PAGE_TYPE_OPS(Zsmalloc, zsmalloc, zsmalloc)
* Serialized with zone lock.
*/
PAGE_TYPE_OPS(Unaccepted, unaccepted, unaccepted)
FOLIO_TYPE_OPS(large_kmalloc, large_kmalloc)
PAGE_TYPE_OPS(LargeKmalloc, large_kmalloc, large_kmalloc)
/**
* PageHuge - Determine if the page belongs to hugetlbfs

View File

@@ -520,24 +520,20 @@ void __kasan_mempool_unpoison_pages(struct page *page, unsigned int order,
bool __kasan_mempool_poison_object(void *ptr, unsigned long ip)
{
struct folio *folio = virt_to_folio(ptr);
struct page *page = virt_to_page(ptr);
struct slab *slab;
/*
* This function can be called for large kmalloc allocation that get
* their memory from page_alloc. Thus, the folio might not be a slab.
*/
if (unlikely(!folio_test_slab(folio))) {
if (unlikely(PageLargeKmalloc(page))) {
if (check_page_allocation(ptr, ip))
return false;
kasan_poison(ptr, folio_size(folio), KASAN_PAGE_FREE, false);
kasan_poison(ptr, page_size(page), KASAN_PAGE_FREE, false);
return true;
}
if (is_kfence_address(ptr))
return true;
slab = folio_slab(folio);
slab = page_slab(page);
if (check_slab_allocation(slab->slab_cache, ptr, ip))
return false;

View File

@@ -612,14 +612,15 @@ static unsigned long kfence_init_pool(void)
* enters __slab_free() slow-path.
*/
for (i = 0; i < KFENCE_POOL_SIZE / PAGE_SIZE; i++) {
struct slab *slab;
struct page *page;
if (!i || (i % 2))
continue;
slab = page_slab(pfn_to_page(start_pfn + i));
__folio_set_slab(slab_folio(slab));
page = pfn_to_page(start_pfn + i);
__SetPageSlab(page);
#ifdef CONFIG_MEMCG
struct slab *slab = page_slab(page);
slab->obj_exts = (unsigned long)&kfence_metadata_init[i / 2 - 1].obj_exts |
MEMCG_DATA_OBJEXTS;
#endif
@@ -665,16 +666,17 @@ static unsigned long kfence_init_pool(void)
reset_slab:
for (i = 0; i < KFENCE_POOL_SIZE / PAGE_SIZE; i++) {
struct slab *slab;
struct page *page;
if (!i || (i % 2))
continue;
slab = page_slab(pfn_to_page(start_pfn + i));
page = pfn_to_page(start_pfn + i);
#ifdef CONFIG_MEMCG
struct slab *slab = page_slab(page);
slab->obj_exts = 0;
#endif
__folio_clear_slab(slab_folio(slab));
__ClearPageSlab(page);
}
return addr;

View File

@@ -2557,38 +2557,25 @@ static inline void mod_objcg_mlstate(struct obj_cgroup *objcg,
}
static __always_inline
struct mem_cgroup *mem_cgroup_from_obj_folio(struct folio *folio, void *p)
struct mem_cgroup *mem_cgroup_from_obj_slab(struct slab *slab, void *p)
{
/*
* Slab objects are accounted individually, not per-page.
* Memcg membership data for each individual object is saved in
* slab->obj_exts.
*/
if (folio_test_slab(folio)) {
struct slabobj_ext *obj_exts;
struct slab *slab;
unsigned int off;
slab = folio_slab(folio);
obj_exts = slab_obj_exts(slab);
if (!obj_exts)
return NULL;
off = obj_to_index(slab->slab_cache, slab, p);
if (obj_exts[off].objcg)
return obj_cgroup_memcg(obj_exts[off].objcg);
struct slabobj_ext *obj_exts;
unsigned int off;
obj_exts = slab_obj_exts(slab);
if (!obj_exts)
return NULL;
}
/*
* folio_memcg_check() is used here, because in theory we can encounter
* a folio where the slab flag has been cleared already, but
* slab->obj_exts has not been freed yet
* folio_memcg_check() will guarantee that a proper memory
* cgroup pointer or NULL will be returned.
*/
return folio_memcg_check(folio);
off = obj_to_index(slab->slab_cache, slab, p);
if (obj_exts[off].objcg)
return obj_cgroup_memcg(obj_exts[off].objcg);
return NULL;
}
/*
@@ -2602,10 +2589,15 @@ struct mem_cgroup *mem_cgroup_from_obj_folio(struct folio *folio, void *p)
*/
struct mem_cgroup *mem_cgroup_from_slab_obj(void *p)
{
struct slab *slab;
if (mem_cgroup_disabled())
return NULL;
return mem_cgroup_from_obj_folio(virt_to_folio(p), p);
slab = virt_to_slab(p);
if (slab)
return mem_cgroup_from_obj_slab(slab, p);
return folio_memcg_check(virt_to_folio(p));
}
static struct obj_cgroup *__get_obj_cgroup_from_memcg(struct mem_cgroup *memcg)

View File

@@ -117,19 +117,6 @@ static_assert(sizeof(struct slab) <= sizeof(struct page));
static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(freelist_aba_t)));
#endif
/**
* folio_slab - Converts from folio to slab.
* @folio: The folio.
*
* Currently struct slab is a different representation of a folio where
* folio_test_slab() is true.
*
* Return: The slab which contains this folio.
*/
#define folio_slab(folio) (_Generic((folio), \
const struct folio *: (const struct slab *)(folio), \
struct folio *: (struct slab *)(folio)))
/**
* slab_folio - The folio allocated for a slab
* @s: The slab.
@@ -146,20 +133,24 @@ static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(freelist_aba_t)
struct slab *: (struct folio *)s))
/**
* page_slab - Converts from first struct page to slab.
* @p: The first (either head of compound or single) page of slab.
* page_slab - Converts from struct page to its slab.
* @page: A page which may or may not belong to a slab.
*
* A temporary wrapper to convert struct page to struct slab in situations where
* we know the page is the compound head, or single order-0 page.
*
* Long-term ideally everything would work with struct slab directly or go
* through folio to struct slab.
*
* Return: The slab which contains this page
* Return: The slab which contains this page or NULL if the page does
* not belong to a slab. This includes pages returned from large kmalloc.
*/
#define page_slab(p) (_Generic((p), \
const struct page *: (const struct slab *)(p), \
struct page *: (struct slab *)(p)))
static inline struct slab *page_slab(const struct page *page)
{
unsigned long head;
head = READ_ONCE(page->compound_head);
if (head & 1)
page = (struct page *)(head - 1);
if (data_race(page->page_type >> 24) != PGTY_slab)
page = NULL;
return (struct slab *)page;
}
/**
* slab_page - The first struct page allocated for a slab
@@ -188,12 +179,7 @@ static inline pg_data_t *slab_pgdat(const struct slab *slab)
static inline struct slab *virt_to_slab(const void *addr)
{
struct folio *folio = virt_to_folio(addr);
if (!folio_test_slab(folio))
return NULL;
return folio_slab(folio);
return page_slab(virt_to_page(addr));
}
static inline int slab_order(const struct slab *slab)
@@ -599,6 +585,16 @@ static inline size_t slab_ksize(const struct kmem_cache *s)
return s->size;
}
static inline unsigned int large_kmalloc_order(const struct page *page)
{
return page[1].flags.f & 0xff;
}
static inline size_t large_kmalloc_size(const struct page *page)
{
return PAGE_SIZE << large_kmalloc_order(page);
}
#ifdef CONFIG_SLUB_DEBUG
void dump_unreclaimable_slab(void);
#else

View File

@@ -997,26 +997,27 @@ void __init create_kmalloc_caches(void)
*/
size_t __ksize(const void *object)
{
struct folio *folio;
const struct page *page;
const struct slab *slab;
if (unlikely(object == ZERO_SIZE_PTR))
return 0;
folio = virt_to_folio(object);
page = virt_to_page(object);
if (unlikely(!folio_test_slab(folio))) {
if (WARN_ON(folio_size(folio) <= KMALLOC_MAX_CACHE_SIZE))
return 0;
if (WARN_ON(object != folio_address(folio)))
return 0;
return folio_size(folio);
}
if (unlikely(PageLargeKmalloc(page)))
return large_kmalloc_size(page);
slab = page_slab(page);
/* Delete this after we're sure there are no users */
if (WARN_ON(!slab))
return page_size(page);
#ifdef CONFIG_SLUB_DEBUG
skip_orig_size_check(folio_slab(folio)->slab_cache, object);
skip_orig_size_check(slab->slab_cache, object);
#endif
return slab_ksize(folio_slab(folio)->slab_cache);
return slab_ksize(slab->slab_cache);
}
gfp_t kmalloc_fix_flags(gfp_t flags)
@@ -1614,17 +1615,15 @@ static void kfree_rcu_work(struct work_struct *work)
static bool kfree_rcu_sheaf(void *obj)
{
struct kmem_cache *s;
struct folio *folio;
struct slab *slab;
if (is_vmalloc_addr(obj))
return false;
folio = virt_to_folio(obj);
if (unlikely(!folio_test_slab(folio)))
slab = virt_to_slab(obj);
if (unlikely(!slab))
return false;
slab = folio_slab(folio);
s = slab->slab_cache;
if (s->cpu_sheaves) {
if (likely(!IS_ENABLED(CONFIG_NUMA) ||

139
mm/slub.c
View File

@@ -2372,33 +2372,34 @@ bool memcg_slab_post_charge(void *p, gfp_t flags)
{
struct slabobj_ext *slab_exts;
struct kmem_cache *s;
struct folio *folio;
struct page *page;
struct slab *slab;
unsigned long off;
folio = virt_to_folio(p);
if (!folio_test_slab(folio)) {
page = virt_to_page(p);
if (PageLargeKmalloc(page)) {
unsigned int order;
int size;
if (folio_memcg_kmem(folio))
if (PageMemcgKmem(page))
return true;
if (__memcg_kmem_charge_page(folio_page(folio, 0), flags,
folio_order(folio)))
order = large_kmalloc_order(page);
if (__memcg_kmem_charge_page(page, flags, order))
return false;
/*
* This folio has already been accounted in the global stats but
* This page has already been accounted in the global stats but
* not in the memcg stats. So, subtract from the global and use
* the interface which adds to both global and memcg stats.
*/
size = folio_size(folio);
node_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B, -size);
lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B, size);
size = PAGE_SIZE << order;
mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE_B, -size);
mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, size);
return true;
}
slab = folio_slab(folio);
slab = page_slab(page);
s = slab->slab_cache;
/*
@@ -3066,24 +3067,24 @@ static inline struct slab *alloc_slab_page(gfp_t flags, int node,
struct kmem_cache_order_objects oo,
bool allow_spin)
{
struct folio *folio;
struct page *page;
struct slab *slab;
unsigned int order = oo_order(oo);
if (unlikely(!allow_spin))
folio = (struct folio *)alloc_frozen_pages_nolock(0/* __GFP_COMP is implied */,
page = alloc_frozen_pages_nolock(0/* __GFP_COMP is implied */,
node, order);
else if (node == NUMA_NO_NODE)
folio = (struct folio *)alloc_frozen_pages(flags, order);
page = alloc_frozen_pages(flags, order);
else
folio = (struct folio *)__alloc_frozen_pages(flags, order, node, NULL);
page = __alloc_frozen_pages(flags, order, node, NULL);
if (!folio)
if (!page)
return NULL;
slab = folio_slab(folio);
__folio_set_slab(folio);
if (folio_is_pfmemalloc(folio))
__SetPageSlab(page);
slab = page_slab(page);
if (page_is_pfmemalloc(page))
slab_set_pfmemalloc(slab);
return slab;
@@ -3307,16 +3308,16 @@ static struct slab *new_slab(struct kmem_cache *s, gfp_t flags, int node)
static void __free_slab(struct kmem_cache *s, struct slab *slab)
{
struct folio *folio = slab_folio(slab);
int order = folio_order(folio);
struct page *page = slab_page(slab);
int order = compound_order(page);
int pages = 1 << order;
__slab_clear_pfmemalloc(slab);
folio->mapping = NULL;
__folio_clear_slab(folio);
page->mapping = NULL;
__ClearPageSlab(page);
mm_account_reclaimed_pages(pages);
unaccount_slab(slab, order, s);
free_frozen_pages(&folio->page, order);
free_frozen_pages(page, order);
}
static void rcu_free_slab(struct rcu_head *h)
@@ -5139,7 +5140,7 @@ void *alloc_from_pcs(struct kmem_cache *s, gfp_t gfp, int node)
* be false because of cpu migration during an unlocked part of
* the current allocation or previous freeing process.
*/
if (folio_nid(virt_to_folio(object)) != node) {
if (page_to_nid(virt_to_page(object)) != node) {
local_unlock(&s->cpu_sheaves->lock);
return NULL;
}
@@ -5593,7 +5594,7 @@ unsigned int kmem_cache_sheaf_size(struct slab_sheaf *sheaf)
*/
static void *___kmalloc_large_node(size_t size, gfp_t flags, int node)
{
struct folio *folio;
struct page *page;
void *ptr = NULL;
unsigned int order = get_order(size);
@@ -5603,15 +5604,15 @@ static void *___kmalloc_large_node(size_t size, gfp_t flags, int node)
flags |= __GFP_COMP;
if (node == NUMA_NO_NODE)
folio = (struct folio *)alloc_frozen_pages_noprof(flags, order);
page = alloc_frozen_pages_noprof(flags, order);
else
folio = (struct folio *)__alloc_frozen_pages_noprof(flags, order, node, NULL);
page = __alloc_frozen_pages_noprof(flags, order, node, NULL);
if (folio) {
ptr = folio_address(folio);
lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B,
if (page) {
ptr = page_address(page);
mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
PAGE_SIZE << order);
__folio_set_large_kmalloc(folio);
__SetPageLargeKmalloc(page);
}
ptr = kasan_kmalloc_large(ptr, size, flags);
@@ -6783,12 +6784,12 @@ void kmem_cache_free(struct kmem_cache *s, void *x)
}
EXPORT_SYMBOL(kmem_cache_free);
static void free_large_kmalloc(struct folio *folio, void *object)
static void free_large_kmalloc(struct page *page, void *object)
{
unsigned int order = folio_order(folio);
unsigned int order = compound_order(page);
if (WARN_ON_ONCE(!folio_test_large_kmalloc(folio))) {
dump_page(&folio->page, "Not a kmalloc allocation");
if (WARN_ON_ONCE(!PageLargeKmalloc(page))) {
dump_page(page, "Not a kmalloc allocation");
return;
}
@@ -6799,10 +6800,10 @@ static void free_large_kmalloc(struct folio *folio, void *object)
kasan_kfree_large(object);
kmsan_kfree_large(object);
lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B,
mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
-(PAGE_SIZE << order));
__folio_clear_large_kmalloc(folio);
free_frozen_pages(&folio->page, order);
__ClearPageLargeKmalloc(page);
free_frozen_pages(page, order);
}
/*
@@ -6812,7 +6813,7 @@ static void free_large_kmalloc(struct folio *folio, void *object)
void kvfree_rcu_cb(struct rcu_head *head)
{
void *obj = head;
struct folio *folio;
struct page *page;
struct slab *slab;
struct kmem_cache *s;
void *slab_addr;
@@ -6823,20 +6824,20 @@ void kvfree_rcu_cb(struct rcu_head *head)
return;
}
folio = virt_to_folio(obj);
if (!folio_test_slab(folio)) {
page = virt_to_page(obj);
slab = page_slab(page);
if (!slab) {
/*
* rcu_head offset can be only less than page size so no need to
* consider folio order
* consider allocation order
*/
obj = (void *) PAGE_ALIGN_DOWN((unsigned long)obj);
free_large_kmalloc(folio, obj);
free_large_kmalloc(page, obj);
return;
}
slab = folio_slab(folio);
s = slab->slab_cache;
slab_addr = folio_address(folio);
slab_addr = slab_address(slab);
if (is_kfence_address(obj)) {
obj = kfence_object_start(obj);
@@ -6858,7 +6859,7 @@ void kvfree_rcu_cb(struct rcu_head *head)
*/
void kfree(const void *object)
{
struct folio *folio;
struct page *page;
struct slab *slab;
struct kmem_cache *s;
void *x = (void *)object;
@@ -6868,13 +6869,13 @@ void kfree(const void *object)
if (unlikely(ZERO_OR_NULL_PTR(object)))
return;
folio = virt_to_folio(object);
if (unlikely(!folio_test_slab(folio))) {
free_large_kmalloc(folio, (void *)object);
page = virt_to_page(object);
slab = page_slab(page);
if (!slab) {
free_large_kmalloc(page, (void *)object);
return;
}
slab = folio_slab(folio);
s = slab->slab_cache;
slab_free(s, slab, x, _RET_IP_);
}
@@ -6891,7 +6892,6 @@ EXPORT_SYMBOL(kfree);
*/
void kfree_nolock(const void *object)
{
struct folio *folio;
struct slab *slab;
struct kmem_cache *s;
void *x = (void *)object;
@@ -6899,13 +6899,12 @@ void kfree_nolock(const void *object)
if (unlikely(ZERO_OR_NULL_PTR(object)))
return;
folio = virt_to_folio(object);
if (unlikely(!folio_test_slab(folio))) {
slab = virt_to_slab(object);
if (unlikely(!slab)) {
WARN_ONCE(1, "large_kmalloc is not supported by kfree_nolock()");
return;
}
slab = folio_slab(folio);
s = slab->slab_cache;
memcg_slab_free_hook(s, slab, &x, 1);
@@ -6969,16 +6968,16 @@ __do_krealloc(const void *p, size_t new_size, unsigned long align, gfp_t flags,
if (is_kfence_address(p)) {
ks = orig_size = kfence_ksize(p);
} else {
struct folio *folio;
struct page *page = virt_to_page(p);
struct slab *slab = page_slab(page);
folio = virt_to_folio(p);
if (unlikely(!folio_test_slab(folio))) {
if (!slab) {
/* Big kmalloc object */
WARN_ON(folio_size(folio) <= KMALLOC_MAX_CACHE_SIZE);
WARN_ON(p != folio_address(folio));
ks = folio_size(folio);
ks = page_size(page);
WARN_ON(ks <= KMALLOC_MAX_CACHE_SIZE);
WARN_ON(p != page_address(page));
} else {
s = folio_slab(folio)->slab_cache;
s = slab->slab_cache;
orig_size = get_orig_size(s, (void *)p);
ks = s->object_size;
}
@@ -7282,23 +7281,25 @@ int build_detached_freelist(struct kmem_cache *s, size_t size,
{
int lookahead = 3;
void *object;
struct folio *folio;
struct page *page;
struct slab *slab;
size_t same;
object = p[--size];
folio = virt_to_folio(object);
page = virt_to_page(object);
slab = page_slab(page);
if (!s) {
/* Handle kalloc'ed objects */
if (unlikely(!folio_test_slab(folio))) {
free_large_kmalloc(folio, object);
if (!slab) {
free_large_kmalloc(page, object);
df->slab = NULL;
return size;
}
/* Derive kmem_cache from object */
df->slab = folio_slab(folio);
df->s = df->slab->slab_cache;
df->slab = slab;
df->s = slab->slab_cache;
} else {
df->slab = folio_slab(folio);
df->slab = slab;
df->s = cache_from_obj(s, object); /* Support for memcg */
}

View File

@@ -164,7 +164,8 @@ static inline void check_heap_object(const void *ptr, unsigned long n,
{
unsigned long addr = (unsigned long)ptr;
unsigned long offset;
struct folio *folio;
struct page *page;
struct slab *slab;
if (is_kmap_addr(ptr)) {
offset = offset_in_page(ptr);
@@ -189,16 +190,23 @@ static inline void check_heap_object(const void *ptr, unsigned long n,
if (!virt_addr_valid(ptr))
return;
folio = virt_to_folio(ptr);
if (folio_test_slab(folio)) {
page = virt_to_page(ptr);
slab = page_slab(page);
if (slab) {
/* Check slab allocator for flags and size. */
__check_heap_object(ptr, n, folio_slab(folio), to_user);
} else if (folio_test_large(folio)) {
offset = ptr - folio_address(folio);
if (n > folio_size(folio) - offset)
__check_heap_object(ptr, n, slab, to_user);
} else if (PageCompound(page)) {
page = compound_head(page);
offset = ptr - page_address(page);
if (n > page_size(page) - offset)
usercopy_abort("page alloc", NULL, to_user, offset, n);
}
/*
* We cannot check non-compound pages. They might be part of
* a large allocation, in which case crossing a page boundary
* is fine.
*/
}
DEFINE_STATIC_KEY_MAYBE_RO(CONFIG_HARDENED_USERCOPY_DEFAULT_ON,