Add support for locking memory on Linux

Linux wakes up kcompactd threads in order to make more contiguous memory
available on the system, it does this by migrating live movable pages
(actively modifying live processes' page tables and constantly flooding
them with page invalidation IPIs, which can be up to millions per
second), which causes the process to become unresponsive for up to
seconds or even minutes in some severe cases. In case of sshd, we want
to always be able to connect to the system, even if it's under heavy
kcompactd load.

Introduce an option to protect sshd and its children sessions from being
compacted by kcompactd (this works in cojunction with
compact_unevictable_allowed = 0). Note that we depend on MCL_ONFAULT
being available, which was introduced in linux 4.4. MCL_ONFAULT allows
the system to lock pages lazily, thus drastically reducing memory usage
of a locked process (without MCL_ONFAULT, every existing mapping in the
process is instantly write-faulted).
This commit is contained in:
Daniil Tatianin
2025-02-27 11:37:13 +03:00
committed by Darren Tucker
parent fdc4853c5b
commit 6c49e5f7dc
4 changed files with 52 additions and 0 deletions

View File

@@ -928,6 +928,27 @@ int main(void) { if (NSVersionOfRunTimeLibrary("System") >= (60 << 16))
AC_DEFINE([_PATH_BTMP], ["/var/log/btmp"], [log for bad login attempts])
AC_DEFINE([USE_BTMP])
AC_DEFINE([LINUX_OOM_ADJUST], [1], [Adjust Linux out-of-memory killer])
AC_ARG_WITH([linux-memlock-onfault],
[ --with-linux-memlock-onfault Enables memory locking on Linux],
[
if test "x$withval" != "xno" ; then
AC_MSG_CHECKING([for MCL_ONFAULT])
AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
[[ #include <sys/mman.h> ]],
[[ mlockall(MCL_FUTURE | MCL_ONFAULT); ]],
)],
[
AC_MSG_RESULT([supported])
AC_DEFINE([LINUX_MEMLOCK_ONFAULT], [1],
[Lock all memory to protect sshd against Linux kcompactd] )],
[
AC_MSG_RESULT([not supported])
AC_MSG_ERROR([MCL_ONFAULT is not available on your system])
])
fi
],
)
AC_DEFINE([SYSTEMD_NOTIFY], [1], [Have sshd notify systemd on start/reload])
inet6_default_4in6=yes
case `uname -r` in

View File

@@ -319,6 +319,19 @@ oom_adjust_restore(void)
}
#endif /* LINUX_OOM_ADJUST */
#ifdef LINUX_MEMLOCK_ONFAULT
#include <sys/mman.h>
void
memlock_onfault_setup(void)
{
if (mlockall(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT) < 0)
verbose("unable to lock memory: %s", strerror(errno));
else
debug("memory locked");
}
#endif /* LINUX_MEMLOCK_ONFAULT */
#ifdef SYSTEMD_NOTIFY
static void ssh_systemd_notify(const char *, ...)

View File

@@ -30,6 +30,10 @@ void oom_adjust_restore(void);
void oom_adjust_setup(void);
#endif
#ifdef LINUX_MEMLOCK_ONFAULT
void memlock_onfault_setup(void);
#endif
#ifdef SYSTEMD_NOTIFY
void ssh_systemd_notify_ready(void);
void ssh_systemd_notify_reload(void);

View File

@@ -34,6 +34,13 @@ platform_pre_listen(void)
/* Adjust out-of-memory killer so listening process is not killed */
oom_adjust_setup();
#endif
#ifdef LINUX_MEMLOCK_ONFAULT
/*
* Protect ourselves against kcompactd so that we are able to process
* new connections while it is active and migrating pages.
*/
memlock_onfault_setup();
#endif
}
void
@@ -84,4 +91,11 @@ platform_post_fork_child(void)
void platform_pre_session_start(void)
{
#ifdef LINUX_MEMLOCK_ONFAULT
/*
* Memlock flags are dropped on fork, lock the memory again so that the
* child connection is also protected against kcompactd.
*/
memlock_onfault_setup();
#endif
}