@@ -16,14 +16,156 @@ package seccomp
1616
1717import (
1818 "fmt"
19+ "os"
1920 "runtime"
21+ "syscall"
2022 "unsafe"
2123
2224 "golang.org/x/sys/unix"
2325 "gvisor.dev/gvisor/pkg/abi/linux"
2426 "gvisor.dev/gvisor/pkg/bpf"
27+ "gvisor.dev/gvisor/pkg/hostsyscall"
28+ "gvisor.dev/gvisor/pkg/log"
2529)
2630
31+ // NotificationCallback is a callback which is called when a blocked syscall is triggered.
32+ type NotificationCallback func (f * os.File , req linux.SeccompNotif , ret int )
33+
34+ // SetFilterAndLogNotifications installs the given BPF program and logs user
35+ // notifications triggered by the seccomp filter. It allows the triggering
36+ // syscalls to proceed without being blocked.
37+ //
38+ // This function is intended for debugging seccomp filter violations and should
39+ // not be used in production environments.
40+ //
41+ // Note: It spawns a background goroutine to monitor a seccomp file descriptor
42+ // and log any received notifications.
43+ func SetFilterAndLogNotifications (
44+ instrs []bpf.Instruction ,
45+ options ProgramOptions ,
46+ ) error {
47+ // PR_SET_NO_NEW_PRIVS is required in order to enable seccomp. See
48+ // seccomp(2) for details.
49+ //
50+ // PR_SET_NO_NEW_PRIVS is specific to the calling thread, not the whole
51+ // thread group, so between PR_SET_NO_NEW_PRIVS and seccomp() below we must
52+ // remain on the same thread. no_new_privs will be propagated to other
53+ // threads in the thread group by seccomp(SECCOMP_FILTER_FLAG_TSYNC), in
54+ // kernel/seccomp.c:seccomp_sync_threads().
55+ runtime .LockOSThread ()
56+ defer runtime .UnlockOSThread ()
57+ if _ , _ , errno := unix .RawSyscall6 (unix .SYS_PRCTL , linux .PR_SET_NO_NEW_PRIVS , 1 , 0 , 0 , 0 , 0 ); errno != 0 {
58+ return errno
59+ }
60+
61+ sockProg := linux.SockFprog {
62+ Len : uint16 (len (instrs )),
63+ Filter : (* linux .BPFInstruction )(unsafe .Pointer (& instrs [0 ])),
64+ }
65+ flags := linux .SECCOMP_FILTER_FLAG_TSYNC |
66+ linux .SECCOMP_FILTER_FLAG_NEW_LISTENER |
67+ linux .SECCOMP_FILTER_FLAG_TSYNC_ESRCH | (1 << 5 )
68+ fd , errno := seccomp (linux .SECCOMP_SET_MODE_FILTER , uint32 (flags ), unsafe .Pointer (& sockProg ))
69+ if errno != 0 {
70+ return errno
71+ }
72+ if options .NotifyFDNum > 0 {
73+ if err := unix .Dup2 (int (fd ), options .NotifyFDNum ); err != nil {
74+ panic (fmt .Sprintf ("dup2 %d -> %d: %v" , fd , options .NotifyFDNum , err ))
75+ }
76+ unix .Close (int (fd ))
77+ fd = uintptr (options .NotifyFDNum )
78+ }
79+ f := os .NewFile (fd , "seccomp_notify" )
80+ go func () {
81+ // LockOSThread should help minimizing interactions with the scheduler.
82+ runtime .LockOSThread ()
83+ defer runtime .UnlockOSThread ()
84+ var (
85+ req linux.SeccompNotif
86+ resp linux.SeccompNotifResp
87+ )
88+ for {
89+ req = linux.SeccompNotif {}
90+ _ , _ , errno := unix .Syscall (unix .SYS_IOCTL , uintptr (f .Fd ()),
91+ uintptr (linux .SECCOMP_IOCTL_NOTIF_RECV ),
92+ uintptr (unsafe .Pointer (& req )))
93+ if errno != 0 {
94+ if errno == unix .EINTR {
95+ continue
96+ }
97+ panic (fmt .Sprintf ("SECCOMP_IOCTL_NOTIF_RECV failed with %d" , errno ))
98+ }
99+
100+ log .Warningf ("req %#v" , req )
101+ attached := true
102+ if errno := hostsyscall .RawSyscallErrno (unix .SYS_PTRACE , unix .PTRACE_ATTACH , uintptr (req .Pid ), 0 ); errno != 0 {
103+ log .Warningf ("unable to attach: %v" , errno )
104+ attached = false
105+ }
106+ resp = linux.SeccompNotifResp {
107+ ID : req .ID ,
108+ Flags : linux .SECCOMP_USER_NOTIF_FLAG_CONTINUE ,
109+ }
110+ errno = hostsyscall .RawSyscallErrno (unix .SYS_IOCTL , uintptr (f .Fd ()),
111+ uintptr (linux .SECCOMP_IOCTL_NOTIF_SEND ),
112+ uintptr (unsafe .Pointer (& resp )))
113+ if errno != 0 {
114+ panic (fmt .Sprintf ("SECCOMP_IOCTL_NOTIF_SEND failed with %d" , errno ))
115+ }
116+ if ! attached {
117+ if options .NotificationCallback != nil {
118+ options .NotificationCallback (f , req , 0 )
119+ } else {
120+ log .Warningf ("Seccomp violation: %#v" , req )
121+ }
122+ continue
123+ }
124+ for {
125+ var info unix.Siginfo
126+ errno := unix .Waitid (unix .P_PID , int (req .Pid ), & info , syscall .WALL | syscall .WEXITED , nil )
127+ if errno == syscall .EINTR {
128+ continue
129+ } else if errno != nil {
130+ log .Warningf ("failed to wait for the child process: %v" , errno )
131+ }
132+ log .Warningf ("%d: stopped -> %x" , req .Pid , info .Code )
133+ break
134+ }
135+ ret := 0
136+ {
137+ var regs linux.PtraceRegs
138+ iovec := unix.Iovec {
139+ Base : (* byte )(unsafe .Pointer (& regs )),
140+ Len : uint64 (unsafe .Sizeof (regs )),
141+ }
142+ _ , _ , errno := unix .RawSyscall6 (
143+ unix .SYS_PTRACE ,
144+ unix .PTRACE_GETREGSET ,
145+ uintptr (req .Pid ),
146+ linux .NT_PRSTATUS ,
147+ uintptr (unsafe .Pointer (& iovec )),
148+ 0 , 0 )
149+ if errno != 0 {
150+ log .Warningf ("unable to get registers: %s" , errno )
151+ }
152+ ret = int (regs .SyscallRet ())
153+ }
154+
155+ if options .NotificationCallback != nil {
156+ options .NotificationCallback (f , req , ret )
157+ } else {
158+ log .Warningf ("Seccomp violation: %#v" , req )
159+ }
160+ log .Warningf ("detach from %d" , req .Pid )
161+ if errno := hostsyscall .RawSyscallErrno (unix .SYS_PTRACE , unix .PTRACE_DETACH , uintptr (req .Pid ), 0 ); errno != 0 {
162+ panic (fmt .Sprintf ("unable to detach: %v" , errno ))
163+ }
164+ }
165+ }()
166+ return nil
167+ }
168+
27169// SetFilter installs the given BPF program.
28170func SetFilter (instrs []bpf.Instruction ) error {
29171 // PR_SET_NO_NEW_PRIVS is required in order to enable seccomp. See
0 commit comments