nptl: Optimize trylock for high cache contention workloads (BZ #33704)

Check lock availability before acquisition to reduce cache line bouncing. Significantly improves trylock throughput on multi-core systems under heavy contention. Tested on x86_64. Fixes BZ #33704. Co-authored-by: Alex M Wells <alex.m.wells@intel.com> Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com> (cherry picked from commit 63716823db)
2026-01-12 00:20:19 +08:00 · 2025-12-09 08:57:44 -08:00
parent a94467ce05
commit 9e1a305028
1 changed files with 6 additions and 2 deletions
--- a/nptl/pthread_mutex_trylock.c
+++ b/nptl/pthread_mutex_trylock.c
@@ -48,7 +48,8 @@ ___pthread_mutex_trylock (pthread_mutex_t *mutex)
 	  return 0;
 	}

-      if (lll_trylock (mutex->__data.__lock) == 0)
+      if (atomic_load_relaxed (&(mutex->__data.__lock)) == 0
+	  && lll_trylock (mutex->__data.__lock) == 0)
 	{
 	  /* Record the ownership.  */
 	  mutex->__data.__owner = id;
@@ -71,7 +72,10 @@ ___pthread_mutex_trylock (pthread_mutex_t *mutex)
      /*FALL THROUGH*/
    case PTHREAD_MUTEX_ADAPTIVE_NP:
    case PTHREAD_MUTEX_ERRORCHECK_NP:
-      if (lll_trylock (mutex->__data.__lock) != 0)
+      /* Mutex type is already loaded, lock check overhead should
+         be minimal.  */
+      if (atomic_load_relaxed (&(mutex->__data.__lock)) != 0
+	  || lll_trylock (mutex->__data.__lock) != 0)
 	break;

      /* Record the ownership.  */