Skip to content

Commit 6e1aa09

Browse files
committed
syscall_user_dispatch: Add PR_SYS_DISPATCH_INCLUSIVE_ON
JIRA: https://issues.redhat.com/browse/RHEL-78200 upstream ======== commit a2fc422 Author: Dmitry Vyukov <dvyukov@google.com> Date: Wed May 21 17:04:29 2025 +0200 description =========== There are two possible scenarios for syscall filtering: - having a trusted/allowed range of PCs, and intercepting everything else - or the opposite: a single untrusted/intercepted range and allowing everything else (this is relevant for any kind of sandboxing scenario, or monitoring behavior of a single library) The current API only allows the former use case due to allowed range wrap-around check. Add PR_SYS_DISPATCH_INCLUSIVE_ON that enables the second use case. Add PR_SYS_DISPATCH_EXCLUSIVE_ON alias for PR_SYS_DISPATCH_ON to make it clear how it's different from the new PR_SYS_DISPATCH_INCLUSIVE_ON. Signed-off-by: Dmitry Vyukov <dvyukov@google.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Link: https://lore.kernel.org/all/97947cc8e205ff49675826d7b0327ef2e2c66eea.1747839857.git.dvyukov@google.com Signed-off-by: Anubhav Shelat <ashelat@redhat.com>
1 parent 6247e76 commit 6e1aa09

File tree

4 files changed

+49
-24
lines changed

4 files changed

+49
-24
lines changed

Documentation/admin-guide/syscall-user-dispatch.rst

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -53,20 +53,25 @@ following prctl:
5353

5454
prctl(PR_SET_SYSCALL_USER_DISPATCH, <op>, <offset>, <length>, [selector])
5555

56-
<op> is either PR_SYS_DISPATCH_ON or PR_SYS_DISPATCH_OFF, to enable and
57-
disable the mechanism globally for that thread. When
58-
PR_SYS_DISPATCH_OFF is used, the other fields must be zero.
59-
60-
[<offset>, <offset>+<length>) delimit a memory region interval
61-
from which syscalls are always executed directly, regardless of the
62-
userspace selector. This provides a fast path for the C library, which
63-
includes the most common syscall dispatchers in the native code
64-
applications, and also provides a way for the signal handler to return
56+
<op> is either PR_SYS_DISPATCH_EXCLUSIVE_ON/PR_SYS_DISPATCH_INCLUSIVE_ON
57+
or PR_SYS_DISPATCH_OFF, to enable and disable the mechanism globally for
58+
that thread. When PR_SYS_DISPATCH_OFF is used, the other fields must be zero.
59+
60+
For PR_SYS_DISPATCH_EXCLUSIVE_ON [<offset>, <offset>+<length>) delimit
61+
a memory region interval from which syscalls are always executed directly,
62+
regardless of the userspace selector. This provides a fast path for the
63+
C library, which includes the most common syscall dispatchers in the native
64+
code applications, and also provides a way for the signal handler to return
6565
without triggering a nested SIGSYS on (rt\_)sigreturn. Users of this
6666
interface should make sure that at least the signal trampoline code is
6767
included in this region. In addition, for syscalls that implement the
6868
trampoline code on the vDSO, that trampoline is never intercepted.
6969

70+
For PR_SYS_DISPATCH_INCLUSIVE_ON [<offset>, <offset>+<length>) delimit
71+
a memory region interval from which syscalls are dispatched based on
72+
the userspace selector. Syscalls from outside of the range are always
73+
executed directly.
74+
7075
[selector] is a pointer to a char-sized region in the process memory
7176
region, that provides a quick way to enable disable syscall redirection
7277
thread-wide, without the need to invoke the kernel directly. selector

include/uapi/linux/prctl.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,12 @@ struct prctl_mm_map {
252252
/* Dispatch syscalls to a userspace handler */
253253
#define PR_SET_SYSCALL_USER_DISPATCH 59
254254
# define PR_SYS_DISPATCH_OFF 0
255-
# define PR_SYS_DISPATCH_ON 1
255+
/* Enable dispatch except for the specified range */
256+
# define PR_SYS_DISPATCH_EXCLUSIVE_ON 1
257+
/* Enable dispatch for the specified range */
258+
# define PR_SYS_DISPATCH_INCLUSIVE_ON 2
259+
/* Legacy name for backwards compatibility */
260+
# define PR_SYS_DISPATCH_ON PR_SYS_DISPATCH_EXCLUSIVE_ON
256261
/* The control values for the user space selector when dispatch is enabled */
257262
# define SYSCALL_DISPATCH_FILTER_ALLOW 0
258263
# define SYSCALL_DISPATCH_FILTER_BLOCK 1

kernel/entry/syscall_user_dispatch.c

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ static int task_set_syscall_user_dispatch(struct task_struct *task, unsigned lon
7878
if (offset || len || selector)
7979
return -EINVAL;
8080
break;
81-
case PR_SYS_DISPATCH_ON:
81+
case PR_SYS_DISPATCH_EXCLUSIVE_ON:
8282
/*
8383
* Validate the direct dispatcher region just for basic
8484
* sanity against overflow and a 0-sized dispatcher
@@ -87,30 +87,40 @@ static int task_set_syscall_user_dispatch(struct task_struct *task, unsigned lon
8787
*/
8888
if (offset && offset + len <= offset)
8989
return -EINVAL;
90-
90+
break;
91+
case PR_SYS_DISPATCH_INCLUSIVE_ON:
92+
if (len == 0 || offset + len <= offset)
93+
return -EINVAL;
9194
/*
92-
* access_ok() will clear memory tags for tagged addresses
93-
* if current has memory tagging enabled.
94-
95-
* To enable a tracer to set a tracees selector the
96-
* selector address must be untagged for access_ok(),
97-
* otherwise an untagged tracer will always fail to set a
98-
* tagged tracees selector.
95+
* Invert the range, the check in syscall_user_dispatch()
96+
* supports wrap-around.
9997
*/
100-
if (selector && !access_ok(untagged_addr(selector), sizeof(*selector)))
101-
return -EFAULT;
102-
98+
offset = offset + len;
99+
len = -len;
103100
break;
104101
default:
105102
return -EINVAL;
106103
}
107104

105+
/*
106+
* access_ok() will clear memory tags for tagged addresses
107+
* if current has memory tagging enabled.
108+
*
109+
* To enable a tracer to set a tracees selector the
110+
* selector address must be untagged for access_ok(),
111+
* otherwise an untagged tracer will always fail to set a
112+
* tagged tracees selector.
113+
*/
114+
if (mode != PR_SYS_DISPATCH_OFF && selector &&
115+
!access_ok(untagged_addr(selector), sizeof(*selector)))
116+
return -EFAULT;
117+
108118
task->syscall_dispatch.selector = selector;
109119
task->syscall_dispatch.offset = offset;
110120
task->syscall_dispatch.len = len;
111121
task->syscall_dispatch.on_dispatch = false;
112122

113-
if (mode == PR_SYS_DISPATCH_ON)
123+
if (mode != PR_SYS_DISPATCH_OFF)
114124
set_task_syscall_work(task, SYSCALL_USER_DISPATCH);
115125
else
116126
clear_task_syscall_work(task, SYSCALL_USER_DISPATCH);

tools/include/uapi/linux/prctl.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,12 @@ struct prctl_mm_map {
252252
/* Dispatch syscalls to a userspace handler */
253253
#define PR_SET_SYSCALL_USER_DISPATCH 59
254254
# define PR_SYS_DISPATCH_OFF 0
255-
# define PR_SYS_DISPATCH_ON 1
255+
/* Enable dispatch except for the specified range */
256+
# define PR_SYS_DISPATCH_EXCLUSIVE_ON 1
257+
/* Enable dispatch for the specified range */
258+
# define PR_SYS_DISPATCH_INCLUSIVE_ON 2
259+
/* Legacy name for backwards compatibility */
260+
# define PR_SYS_DISPATCH_ON PR_SYS_DISPATCH_EXCLUSIVE_ON
256261
/* The control values for the user space selector when dispatch is enabled */
257262
# define SYSCALL_DISPATCH_FILTER_ALLOW 0
258263
# define SYSCALL_DISPATCH_FILTER_BLOCK 1

0 commit comments

Comments
 (0)