rcu: Enable rcu_normal_wake_from_gp on small systems

Automatically enable the rcu_normal_wake_from_gp parameter on
systems with a small number of CPUs. The activation threshold
is set to 16 CPUs.

This helps to reduce a latency of normal synchronize_rcu() API
by waking up GP-waiters earlier and decoupling synchronize_rcu()
callers from regular callback handling.

A benchmark running 64 parallel jobs(system with 64 CPUs) invoking
synchronize_rcu() demonstrates a notable latency reduction with the
setting enabled.

Latency distribution (microseconds):

<default>
 0      - 9999   : 1
 10000  - 19999  : 4
 20000  - 29999  : 399
 30000  - 39999  : 3197
 40000  - 49999  : 10428
 50000  - 59999  : 17363
 60000  - 69999  : 15529
 70000  - 79999  : 9287
 80000  - 89999  : 4249
 90000  - 99999  : 1915
 100000 - 109999 : 922
 110000 - 119999 : 390
 120000 - 129999 : 187
 ...
<default>

<rcu_normal_wake_from_gp>
 0      - 9999  : 1
 10000  - 19999 : 234
 20000  - 29999 : 6678
 30000  - 39999 : 33463
 40000  - 49999 : 20669
 50000  - 59999 : 2766
 60000  - 69999 : 183
 ...
<rcu_normal_wake_from_gp>

Reviewed-by: Joel Fernandes <joelagnelf@nvidia.com>
Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Neeraj Upadhyay (AMD) <neeraj.upadhyay@kernel.org>
This commit is contained in:
Uladzislau Rezki (Sony)
2025-07-02 16:59:36 +02:00
committed by Neeraj Upadhyay (AMD)
parent 90c09d57ca
commit 78370df5c3

View File

@@ -1632,8 +1632,10 @@ static void rcu_sr_put_wait_head(struct llist_node *node)
atomic_set_release(&sr_wn->inuse, 0);
}
/* Disabled by default. */
static int rcu_normal_wake_from_gp;
/* Enable rcu_normal_wake_from_gp automatically on small systems. */
#define WAKE_FROM_GP_CPU_THRESHOLD 16
static int rcu_normal_wake_from_gp = -1;
module_param(rcu_normal_wake_from_gp, int, 0644);
static struct workqueue_struct *sync_wq;
@@ -3250,7 +3252,7 @@ static void synchronize_rcu_normal(void)
trace_rcu_sr_normal(rcu_state.name, &rs.head, TPS("request"));
if (!READ_ONCE(rcu_normal_wake_from_gp)) {
if (READ_ONCE(rcu_normal_wake_from_gp) < 1) {
wait_rcu_gp(call_rcu_hurry);
goto trace_complete_out;
}
@@ -4854,6 +4856,12 @@ void __init rcu_init(void)
sync_wq = alloc_workqueue("sync_wq", WQ_MEM_RECLAIM, 0);
WARN_ON(!sync_wq);
/* Respect if explicitly disabled via a boot parameter. */
if (rcu_normal_wake_from_gp < 0) {
if (num_possible_cpus() <= WAKE_FROM_GP_CPU_THRESHOLD)
rcu_normal_wake_from_gp = 1;
}
/* Fill in default value for rcutree.qovld boot parameter. */
/* -After- the rcu_node ->lock fields are initialized! */
if (qovld < 0)