mirror of
https://github.com/torvalds/linux.git
synced 2026-01-12 00:42:35 +08:00
mm/page_alloc: fix race condition in unaccepted memory handling
The page allocator tracks the number of zones that have unaccepted memory
using static_branch_enc/dec() and uses that static branch in hot paths to
determine if it needs to deal with unaccepted memory.
Borislav and Thomas pointed out that the tracking is racy: operations on
static_branch are not serialized against adding/removing unaccepted pages
to/from the zone.
Sanity checks inside static_branch machinery detects it:
WARNING: CPU: 0 PID: 10 at kernel/jump_label.c:276 __static_key_slow_dec_cpuslocked+0x8e/0xa0
The comment around the WARN() explains the problem:
/*
* Warn about the '-1' case though; since that means a
* decrement is concurrent with a first (0->1) increment. IOW
* people are trying to disable something that wasn't yet fully
* enabled. This suggests an ordering problem on the user side.
*/
The effect of this static_branch optimization is only visible on
microbenchmark.
Instead of adding more complexity around it, remove it altogether.
Link: https://lkml.kernel.org/r/20250506133207.1009676-1-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Fixes: dcdfdd40fa ("mm: Add support for unaccepted memory")
Link: https://lore.kernel.org/all/20250506092445.GBaBnVXXyvnazly6iF@fat_crate.local
Reported-by: Borislav Petkov <bp@alien8.de>
Tested-by: Borislav Petkov (AMD) <bp@alien8.de>
Reported-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Brendan Jackman <jackmanb@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: <stable@vger.kernel.org> [6.5+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
committed by
Andrew Morton
parent
23fa022a07
commit
fefc075182
@@ -1590,7 +1590,6 @@ unsigned long move_page_tables(struct pagetable_move_control *pmc);
|
||||
|
||||
#ifdef CONFIG_UNACCEPTED_MEMORY
|
||||
void accept_page(struct page *page);
|
||||
void unaccepted_cleanup_work(struct work_struct *work);
|
||||
#else /* CONFIG_UNACCEPTED_MEMORY */
|
||||
static inline void accept_page(struct page *page)
|
||||
{
|
||||
|
||||
@@ -1441,7 +1441,6 @@ static void __meminit zone_init_free_lists(struct zone *zone)
|
||||
|
||||
#ifdef CONFIG_UNACCEPTED_MEMORY
|
||||
INIT_LIST_HEAD(&zone->unaccepted_pages);
|
||||
INIT_WORK(&zone->unaccepted_cleanup, unaccepted_cleanup_work);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@@ -7172,16 +7172,8 @@ bool has_managed_dma(void)
|
||||
|
||||
#ifdef CONFIG_UNACCEPTED_MEMORY
|
||||
|
||||
/* Counts number of zones with unaccepted pages. */
|
||||
static DEFINE_STATIC_KEY_FALSE(zones_with_unaccepted_pages);
|
||||
|
||||
static bool lazy_accept = true;
|
||||
|
||||
void unaccepted_cleanup_work(struct work_struct *work)
|
||||
{
|
||||
static_branch_dec(&zones_with_unaccepted_pages);
|
||||
}
|
||||
|
||||
static int __init accept_memory_parse(char *p)
|
||||
{
|
||||
if (!strcmp(p, "lazy")) {
|
||||
@@ -7206,11 +7198,7 @@ static bool page_contains_unaccepted(struct page *page, unsigned int order)
|
||||
static void __accept_page(struct zone *zone, unsigned long *flags,
|
||||
struct page *page)
|
||||
{
|
||||
bool last;
|
||||
|
||||
list_del(&page->lru);
|
||||
last = list_empty(&zone->unaccepted_pages);
|
||||
|
||||
account_freepages(zone, -MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
|
||||
__mod_zone_page_state(zone, NR_UNACCEPTED, -MAX_ORDER_NR_PAGES);
|
||||
__ClearPageUnaccepted(page);
|
||||
@@ -7219,28 +7207,6 @@ static void __accept_page(struct zone *zone, unsigned long *flags,
|
||||
accept_memory(page_to_phys(page), PAGE_SIZE << MAX_PAGE_ORDER);
|
||||
|
||||
__free_pages_ok(page, MAX_PAGE_ORDER, FPI_TO_TAIL);
|
||||
|
||||
if (last) {
|
||||
/*
|
||||
* There are two corner cases:
|
||||
*
|
||||
* - If allocation occurs during the CPU bring up,
|
||||
* static_branch_dec() cannot be used directly as
|
||||
* it causes a deadlock on cpu_hotplug_lock.
|
||||
*
|
||||
* Instead, use schedule_work() to prevent deadlock.
|
||||
*
|
||||
* - If allocation occurs before workqueues are initialized,
|
||||
* static_branch_dec() should be called directly.
|
||||
*
|
||||
* Workqueues are initialized before CPU bring up, so this
|
||||
* will not conflict with the first scenario.
|
||||
*/
|
||||
if (system_wq)
|
||||
schedule_work(&zone->unaccepted_cleanup);
|
||||
else
|
||||
unaccepted_cleanup_work(&zone->unaccepted_cleanup);
|
||||
}
|
||||
}
|
||||
|
||||
void accept_page(struct page *page)
|
||||
@@ -7277,20 +7243,12 @@ static bool try_to_accept_memory_one(struct zone *zone)
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool has_unaccepted_memory(void)
|
||||
{
|
||||
return static_branch_unlikely(&zones_with_unaccepted_pages);
|
||||
}
|
||||
|
||||
static bool cond_accept_memory(struct zone *zone, unsigned int order,
|
||||
int alloc_flags)
|
||||
{
|
||||
long to_accept, wmark;
|
||||
bool ret = false;
|
||||
|
||||
if (!has_unaccepted_memory())
|
||||
return false;
|
||||
|
||||
if (list_empty(&zone->unaccepted_pages))
|
||||
return false;
|
||||
|
||||
@@ -7328,22 +7286,17 @@ static bool __free_unaccepted(struct page *page)
|
||||
{
|
||||
struct zone *zone = page_zone(page);
|
||||
unsigned long flags;
|
||||
bool first = false;
|
||||
|
||||
if (!lazy_accept)
|
||||
return false;
|
||||
|
||||
spin_lock_irqsave(&zone->lock, flags);
|
||||
first = list_empty(&zone->unaccepted_pages);
|
||||
list_add_tail(&page->lru, &zone->unaccepted_pages);
|
||||
account_freepages(zone, MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
|
||||
__mod_zone_page_state(zone, NR_UNACCEPTED, MAX_ORDER_NR_PAGES);
|
||||
__SetPageUnaccepted(page);
|
||||
spin_unlock_irqrestore(&zone->lock, flags);
|
||||
|
||||
if (first)
|
||||
static_branch_inc(&zones_with_unaccepted_pages);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user