@@ -16,14 +16,153 @@ package seccomp
1616
1717import (
1818 "fmt"
19+ "os"
1920 "runtime"
21+ "syscall"
2022 "unsafe"
2123
2224 "golang.org/x/sys/unix"
2325 "gvisor.dev/gvisor/pkg/abi/linux"
2426 "gvisor.dev/gvisor/pkg/bpf"
27+ "gvisor.dev/gvisor/pkg/hostsyscall"
28+ "gvisor.dev/gvisor/pkg/log"
2529)
2630
31+ // NotificationCallback is a callback which is called when a blocked syscall is triggered.
32+ type NotificationCallback func (f * os.File , req linux.SeccompNotif , ret int )
33+
34+ // SetFilterAndLogNotifications installs the given BPF program and logs user
35+ // notifications triggered by the seccomp filter. It allows the triggering
36+ // syscalls to proceed without being blocked.
37+ //
38+ // This function is intended for debugging seccomp filter violations and should
39+ // not be used in production environments.
40+ //
41+ // Note: It spawns a background goroutine to monitor a seccomp file descriptor
42+ // and log any received notifications.
43+ func SetFilterAndLogNotifications (
44+ instrs []bpf.Instruction ,
45+ options ProgramOptions ,
46+ ) error {
47+ // PR_SET_NO_NEW_PRIVS is required in order to enable seccomp. See
48+ // seccomp(2) for details.
49+ //
50+ // PR_SET_NO_NEW_PRIVS is specific to the calling thread, not the whole
51+ // thread group, so between PR_SET_NO_NEW_PRIVS and seccomp() below we must
52+ // remain on the same thread. no_new_privs will be propagated to other
53+ // threads in the thread group by seccomp(SECCOMP_FILTER_FLAG_TSYNC), in
54+ // kernel/seccomp.c:seccomp_sync_threads().
55+ runtime .LockOSThread ()
56+ defer runtime .UnlockOSThread ()
57+ if _ , _ , errno := unix .RawSyscall6 (unix .SYS_PRCTL , linux .PR_SET_NO_NEW_PRIVS , 1 , 0 , 0 , 0 , 0 ); errno != 0 {
58+ return errno
59+ }
60+
61+ sockProg := linux.SockFprog {
62+ Len : uint16 (len (instrs )),
63+ Filter : (* linux .BPFInstruction )(unsafe .Pointer (& instrs [0 ])),
64+ }
65+ flags := linux .SECCOMP_FILTER_FLAG_TSYNC |
66+ linux .SECCOMP_FILTER_FLAG_NEW_LISTENER |
67+ linux .SECCOMP_FILTER_FLAG_TSYNC_ESRCH | (1 << 5 )
68+ fd , errno := seccomp (linux .SECCOMP_SET_MODE_FILTER , uint32 (flags ), unsafe .Pointer (& sockProg ))
69+ if errno != 0 {
70+ return errno
71+ }
72+ if options .NotifyFDNum > 0 {
73+ if err := unix .Dup2 (int (fd ), options .NotifyFDNum ); err != nil {
74+ panic (fmt .Sprintf ("dup2 %d -> %d: %v" , fd , options .NotifyFDNum , err ))
75+ }
76+ unix .Close (int (fd ))
77+ fd = uintptr (options .NotifyFDNum )
78+ }
79+ f := os .NewFile (fd , "seccomp_notify" )
80+ go func () {
81+ // LockOSThread should help minimizing interactions with the scheduler.
82+ runtime .LockOSThread ()
83+ defer runtime .UnlockOSThread ()
84+ var (
85+ req linux.SeccompNotif
86+ resp linux.SeccompNotifResp
87+ )
88+ for {
89+ req = linux.SeccompNotif {}
90+ _ , _ , errno := unix .Syscall (unix .SYS_IOCTL , uintptr (f .Fd ()),
91+ uintptr (linux .SECCOMP_IOCTL_NOTIF_RECV ),
92+ uintptr (unsafe .Pointer (& req )))
93+ if errno != 0 {
94+ if errno == unix .EINTR {
95+ continue
96+ }
97+ panic (fmt .Sprintf ("SECCOMP_IOCTL_NOTIF_RECV failed with %d" , errno ))
98+ }
99+
100+ attached := true
101+ if errno := hostsyscall .RawSyscallErrno (unix .SYS_PTRACE , unix .PTRACE_ATTACH , uintptr (req .Pid ), 0 ); errno != 0 {
102+ log .Warningf ("unable to attach: %v" , errno )
103+ attached = false
104+ }
105+ resp = linux.SeccompNotifResp {
106+ ID : req .ID ,
107+ Flags : linux .SECCOMP_USER_NOTIF_FLAG_CONTINUE ,
108+ }
109+ errno = hostsyscall .RawSyscallErrno (unix .SYS_IOCTL , uintptr (f .Fd ()),
110+ uintptr (linux .SECCOMP_IOCTL_NOTIF_SEND ),
111+ uintptr (unsafe .Pointer (& resp )))
112+ if errno != 0 {
113+ panic (fmt .Sprintf ("SECCOMP_IOCTL_NOTIF_SEND failed with %d" , errno ))
114+ }
115+ if ! attached {
116+ if options .NotificationCallback != nil {
117+ options .NotificationCallback (f , req , 0 )
118+ } else {
119+ log .Warningf ("Seccomp violation: %#v" , req )
120+ }
121+ continue
122+ }
123+ for {
124+ var info unix.Siginfo
125+ errno := unix .Waitid (unix .P_PID , int (req .Pid ), & info , syscall .WALL | syscall .WEXITED , nil )
126+ if errno == syscall .EINTR {
127+ continue
128+ } else if errno != nil {
129+ log .Warningf ("failed to wait for the child process: %v" , errno )
130+ }
131+ break
132+ }
133+ ret := 0
134+ {
135+ var regs linux.PtraceRegs
136+ iovec := unix.Iovec {
137+ Base : (* byte )(unsafe .Pointer (& regs )),
138+ Len : uint64 (unsafe .Sizeof (regs )),
139+ }
140+ _ , _ , errno := unix .RawSyscall6 (
141+ unix .SYS_PTRACE ,
142+ unix .PTRACE_GETREGSET ,
143+ uintptr (req .Pid ),
144+ linux .NT_PRSTATUS ,
145+ uintptr (unsafe .Pointer (& iovec )),
146+ 0 , 0 )
147+ if errno != 0 {
148+ log .Warningf ("unable to get registers: %s" , errno )
149+ }
150+ ret = int (regs .SyscallRet ())
151+ }
152+
153+ if options .NotificationCallback != nil {
154+ options .NotificationCallback (f , req , ret )
155+ } else {
156+ log .Warningf ("Seccomp violation: %#v" , req )
157+ }
158+ if errno := hostsyscall .RawSyscallErrno (unix .SYS_PTRACE , unix .PTRACE_DETACH , uintptr (req .Pid ), 0 ); errno != 0 {
159+ panic (fmt .Sprintf ("unable to detach: %v" , errno ))
160+ }
161+ }
162+ }()
163+ return nil
164+ }
165+
27166// SetFilter installs the given BPF program.
28167func SetFilter (instrs []bpf.Instruction ) error {
29168 // PR_SET_NO_NEW_PRIVS is required in order to enable seccomp. See
0 commit comments