@@ -16,14 +16,158 @@ package seccomp
1616
1717import (
1818 "fmt"
19+ "os"
1920 "runtime"
21+ "syscall"
22+ "time"
2023 "unsafe"
2124
2225 "golang.org/x/sys/unix"
2326 "gvisor.dev/gvisor/pkg/abi/linux"
2427 "gvisor.dev/gvisor/pkg/bpf"
28+ "gvisor.dev/gvisor/pkg/hostsyscall"
29+ "gvisor.dev/gvisor/pkg/log"
2530)
2631
32+ // NotificationCallback is a callback which is called when a blocked syscall is triggered.
33+ type NotificationCallback func (f * os.File , req linux.SeccompNotif , ret int )
34+
35+ // SetFilterAndLogNotifications installs the given BPF program and logs user
36+ // notifications triggered by the seccomp filter. It allows the triggering
37+ // syscalls to proceed without being blocked.
38+ //
39+ // This function is intended for debugging seccomp filter violations and should
40+ // not be used in production environments.
41+ //
42+ // Note: It spawns a background goroutine to monitor a seccomp file descriptor
43+ // and log any received notifications.
44+ func SetFilterAndLogNotifications (
45+ instrs []bpf.Instruction ,
46+ options ProgramOptions ,
47+ ) error {
48+ // PR_SET_NO_NEW_PRIVS is required in order to enable seccomp. See
49+ // seccomp(2) for details.
50+ //
51+ // PR_SET_NO_NEW_PRIVS is specific to the calling thread, not the whole
52+ // thread group, so between PR_SET_NO_NEW_PRIVS and seccomp() below we must
53+ // remain on the same thread. no_new_privs will be propagated to other
54+ // threads in the thread group by seccomp(SECCOMP_FILTER_FLAG_TSYNC), in
55+ // kernel/seccomp.c:seccomp_sync_threads().
56+ runtime .LockOSThread ()
57+ defer runtime .UnlockOSThread ()
58+ if _ , _ , errno := unix .RawSyscall6 (unix .SYS_PRCTL , linux .PR_SET_NO_NEW_PRIVS , 1 , 0 , 0 , 0 , 0 ); errno != 0 {
59+ return errno
60+ }
61+
62+ sockProg := linux.SockFprog {
63+ Len : uint16 (len (instrs )),
64+ Filter : (* linux .BPFInstruction )(unsafe .Pointer (& instrs [0 ])),
65+ }
66+ flags := linux .SECCOMP_FILTER_FLAG_TSYNC |
67+ linux .SECCOMP_FILTER_FLAG_NEW_LISTENER |
68+ linux .SECCOMP_FILTER_FLAG_TSYNC_ESRCH | (1 << 5 )
69+ fd , errno := seccomp (linux .SECCOMP_SET_MODE_FILTER , uint32 (flags ), unsafe .Pointer (& sockProg ))
70+ if errno != 0 {
71+ return errno
72+ }
73+ if options .NotifyFDNum > 0 {
74+ if err := unix .Dup2 (int (fd ), options .NotifyFDNum ); err != nil {
75+ panic (fmt .Sprintf ("dup2 %d -> %d: %v" , fd , options .NotifyFDNum , err ))
76+ }
77+ unix .Close (int (fd ))
78+ fd = uintptr (options .NotifyFDNum )
79+ }
80+ f := os .NewFile (fd , "seccomp_notify" )
81+ go func () {
82+ // LockOSThread should help minimizing interactions with the scheduler.
83+ runtime .LockOSThread ()
84+ defer runtime .UnlockOSThread ()
85+ var (
86+ req linux.SeccompNotif
87+ resp linux.SeccompNotifResp
88+ )
89+ for {
90+ req = linux.SeccompNotif {}
91+ _ , _ , errno := unix .Syscall (unix .SYS_IOCTL , uintptr (f .Fd ()),
92+ uintptr (linux .SECCOMP_IOCTL_NOTIF_RECV ),
93+ uintptr (unsafe .Pointer (& req )))
94+ if errno != 0 {
95+ if errno == unix .EINTR {
96+ continue
97+ }
98+ panic (fmt .Sprintf ("SECCOMP_IOCTL_NOTIF_RECV failed with %d" , errno ))
99+ }
100+
101+ log .Warningf ("req %#v" , req )
102+ attached := true
103+ if errno := hostsyscall .RawSyscallErrno (unix .SYS_PTRACE , unix .PTRACE_ATTACH , uintptr (req .Pid ), 0 ); errno != 0 {
104+ log .Warningf ("unable to attach: %v" , errno )
105+ attached = false
106+ }
107+ resp = linux.SeccompNotifResp {
108+ ID : req .ID ,
109+ Flags : linux .SECCOMP_USER_NOTIF_FLAG_CONTINUE ,
110+ }
111+ errno = hostsyscall .RawSyscallErrno (unix .SYS_IOCTL , uintptr (f .Fd ()),
112+ uintptr (linux .SECCOMP_IOCTL_NOTIF_SEND ),
113+ uintptr (unsafe .Pointer (& resp )))
114+ if errno != 0 {
115+ panic (fmt .Sprintf ("SECCOMP_IOCTL_NOTIF_SEND failed with %d" , errno ))
116+ }
117+ if ! attached {
118+ if options .NotificationCallback != nil {
119+ options .NotificationCallback (f , req , 0 )
120+ } else {
121+ log .Warningf ("Seccomp violation: %#v" , req )
122+ }
123+ continue
124+ }
125+ time .Sleep (1 * time .Second )
126+ for {
127+ var info unix.Siginfo
128+ errno := unix .Waitid (unix .P_PID , int (req .Pid ), & info , syscall .WALL | syscall .WEXITED , nil )
129+ if errno == syscall .EINTR {
130+ continue
131+ } else if errno != nil {
132+ log .Warningf ("failed to wait for the child process: %v" , errno )
133+ }
134+ log .Warningf ("%d: stopped -> %x" , req .Pid , info .Code )
135+ break
136+ }
137+ ret := 0
138+ {
139+ var regs linux.PtraceRegs
140+ iovec := unix.Iovec {
141+ Base : (* byte )(unsafe .Pointer (& regs )),
142+ Len : uint64 (unsafe .Sizeof (regs )),
143+ }
144+ _ , _ , errno := unix .RawSyscall6 (
145+ unix .SYS_PTRACE ,
146+ unix .PTRACE_GETREGSET ,
147+ uintptr (req .Pid ),
148+ linux .NT_PRSTATUS ,
149+ uintptr (unsafe .Pointer (& iovec )),
150+ 0 , 0 )
151+ if errno != 0 {
152+ log .Warningf ("unable to get registers: %s" , errno )
153+ }
154+ ret = int (regs .SyscallRet ())
155+ }
156+
157+ if options .NotificationCallback != nil {
158+ options .NotificationCallback (f , req , ret )
159+ } else {
160+ log .Warningf ("Seccomp violation: %#v" , req )
161+ }
162+ log .Warningf ("detach from %d" , req .Pid )
163+ if errno := hostsyscall .RawSyscallErrno (unix .SYS_PTRACE , unix .PTRACE_DETACH , uintptr (req .Pid ), 0 ); errno != 0 {
164+ panic (fmt .Sprintf ("unable to detach: %v" , errno ))
165+ }
166+ }
167+ }()
168+ return nil
169+ }
170+
27171// SetFilter installs the given BPF program.
28172func SetFilter (instrs []bpf.Instruction ) error {
29173 // PR_SET_NO_NEW_PRIVS is required in order to enable seccomp. See
0 commit comments