3030#define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
3131#define TCCNT_PTR (MAX_BPF_JIT_REG + 2)
3232#define TMP_REG_3 (MAX_BPF_JIT_REG + 3)
33+ #define PRIVATE_SP (MAX_BPF_JIT_REG + 4)
3334#define ARENA_VM_START (MAX_BPF_JIT_REG + 5)
3435
3536#define check_imm (bits , imm ) do { \
@@ -68,6 +69,8 @@ static const int bpf2a64[] = {
6869 [TCCNT_PTR ] = A64_R (26 ),
6970 /* temporary register for blinding constants */
7071 [BPF_REG_AX ] = A64_R (9 ),
72+ /* callee saved register for private stack pointer */
73+ [PRIVATE_SP ] = A64_R (27 ),
7174 /* callee saved register for kern_vm_start address */
7275 [ARENA_VM_START ] = A64_R (28 ),
7376};
@@ -86,6 +89,7 @@ struct jit_ctx {
8689 u64 user_vm_start ;
8790 u64 arena_vm_start ;
8891 bool fp_used ;
92+ bool priv_sp_used ;
8993 bool write ;
9094};
9195
@@ -98,6 +102,10 @@ struct bpf_plt {
98102#define PLT_TARGET_SIZE sizeof_field(struct bpf_plt, target)
99103#define PLT_TARGET_OFFSET offsetof(struct bpf_plt, target)
100104
105+ /* Memory size/value to protect private stack overflow/underflow */
106+ #define PRIV_STACK_GUARD_SZ 16
107+ #define PRIV_STACK_GUARD_VAL 0xEB9F12345678eb9fULL
108+
101109static inline void emit (const u32 insn , struct jit_ctx * ctx )
102110{
103111 if (ctx -> image != NULL && ctx -> write )
@@ -387,8 +395,11 @@ static void find_used_callee_regs(struct jit_ctx *ctx)
387395 if (reg_used & 8 )
388396 ctx -> used_callee_reg [i ++ ] = bpf2a64 [BPF_REG_9 ];
389397
390- if (reg_used & 16 )
398+ if (reg_used & 16 ) {
391399 ctx -> used_callee_reg [i ++ ] = bpf2a64 [BPF_REG_FP ];
400+ if (ctx -> priv_sp_used )
401+ ctx -> used_callee_reg [i ++ ] = bpf2a64 [PRIVATE_SP ];
402+ }
392403
393404 if (ctx -> arena_vm_start )
394405 ctx -> used_callee_reg [i ++ ] = bpf2a64 [ARENA_VM_START ];
@@ -412,6 +423,7 @@ static void push_callee_regs(struct jit_ctx *ctx)
412423 emit (A64_PUSH (A64_R (23 ), A64_R (24 ), A64_SP ), ctx );
413424 emit (A64_PUSH (A64_R (25 ), A64_R (26 ), A64_SP ), ctx );
414425 emit (A64_PUSH (A64_R (27 ), A64_R (28 ), A64_SP ), ctx );
426+ ctx -> fp_used = true;
415427 } else {
416428 find_used_callee_regs (ctx );
417429 for (i = 0 ; i + 1 < ctx -> nr_used_callee_reg ; i += 2 ) {
@@ -461,6 +473,19 @@ static void pop_callee_regs(struct jit_ctx *ctx)
461473 }
462474}
463475
476+ static void emit_percpu_ptr (const u8 dst_reg , void __percpu * ptr ,
477+ struct jit_ctx * ctx )
478+ {
479+ const u8 tmp = bpf2a64 [TMP_REG_1 ];
480+
481+ emit_a64_mov_i64 (dst_reg , (__force const u64 )ptr , ctx );
482+ if (cpus_have_cap (ARM64_HAS_VIRT_HOST_EXTN ))
483+ emit (A64_MRS_TPIDR_EL2 (tmp ), ctx );
484+ else
485+ emit (A64_MRS_TPIDR_EL1 (tmp ), ctx );
486+ emit (A64_ADD (1 , dst_reg , dst_reg , tmp ), ctx );
487+ }
488+
464489#define BTI_INSNS (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) ? 1 : 0)
465490#define PAC_INSNS (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) ? 1 : 0)
466491
@@ -476,6 +501,8 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
476501 const bool is_main_prog = !bpf_is_subprog (prog );
477502 const u8 fp = bpf2a64 [BPF_REG_FP ];
478503 const u8 arena_vm_base = bpf2a64 [ARENA_VM_START ];
504+ const u8 priv_sp = bpf2a64 [PRIVATE_SP ];
505+ void __percpu * priv_stack_ptr ;
479506 const int idx0 = ctx -> idx ;
480507 int cur_offset ;
481508
@@ -551,15 +578,23 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
551578 emit (A64_SUB_I (1 , A64_SP , A64_FP , 96 ), ctx );
552579 }
553580
554- if (ctx -> fp_used )
555- /* Set up BPF prog stack base register */
556- emit (A64_MOV (1 , fp , A64_SP ), ctx );
557-
558581 /* Stack must be multiples of 16B */
559582 ctx -> stack_size = round_up (prog -> aux -> stack_depth , 16 );
560583
584+ if (ctx -> fp_used ) {
585+ if (ctx -> priv_sp_used ) {
586+ /* Set up private stack pointer */
587+ priv_stack_ptr = prog -> aux -> priv_stack_ptr + PRIV_STACK_GUARD_SZ ;
588+ emit_percpu_ptr (priv_sp , priv_stack_ptr , ctx );
589+ emit (A64_ADD_I (1 , fp , priv_sp , ctx -> stack_size ), ctx );
590+ } else {
591+ /* Set up BPF prog stack base register */
592+ emit (A64_MOV (1 , fp , A64_SP ), ctx );
593+ }
594+ }
595+
561596 /* Set up function call stack */
562- if (ctx -> stack_size )
597+ if (ctx -> stack_size && ! ctx -> priv_sp_used )
563598 emit (A64_SUB_I (1 , A64_SP , A64_SP , ctx -> stack_size ), ctx );
564599
565600 if (ctx -> arena_vm_start )
@@ -623,7 +658,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
623658 emit (A64_STR64I (tcc , ptr , 0 ), ctx );
624659
625660 /* restore SP */
626- if (ctx -> stack_size )
661+ if (ctx -> stack_size && ! ctx -> priv_sp_used )
627662 emit (A64_ADD_I (1 , A64_SP , A64_SP , ctx -> stack_size ), ctx );
628663
629664 pop_callee_regs (ctx );
@@ -991,7 +1026,7 @@ static void build_epilogue(struct jit_ctx *ctx, bool was_classic)
9911026 const u8 ptr = bpf2a64 [TCCNT_PTR ];
9921027
9931028 /* We're done with BPF stack */
994- if (ctx -> stack_size )
1029+ if (ctx -> stack_size && ! ctx -> priv_sp_used )
9951030 emit (A64_ADD_I (1 , A64_SP , A64_SP , ctx -> stack_size ), ctx );
9961031
9971032 pop_callee_regs (ctx );
@@ -1120,6 +1155,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
11201155 const u8 tmp2 = bpf2a64 [TMP_REG_2 ];
11211156 const u8 fp = bpf2a64 [BPF_REG_FP ];
11221157 const u8 arena_vm_base = bpf2a64 [ARENA_VM_START ];
1158+ const u8 priv_sp = bpf2a64 [PRIVATE_SP ];
11231159 const s16 off = insn -> off ;
11241160 const s32 imm = insn -> imm ;
11251161 const int i = insn - ctx -> prog -> insnsi ;
@@ -1564,7 +1600,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
15641600 src = tmp2 ;
15651601 }
15661602 if (src == fp ) {
1567- src_adj = A64_SP ;
1603+ src_adj = ctx -> priv_sp_used ? priv_sp : A64_SP ;
15681604 off_adj = off + ctx -> stack_size ;
15691605 } else {
15701606 src_adj = src ;
@@ -1630,17 +1666,14 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
16301666 return ret ;
16311667 break ;
16321668
1633- /* speculation barrier */
1669+ /* speculation barrier against v1 and v4 */
16341670 case BPF_ST | BPF_NOSPEC :
1635- /*
1636- * Nothing required here.
1637- *
1638- * In case of arm64, we rely on the firmware mitigation of
1639- * Speculative Store Bypass as controlled via the ssbd kernel
1640- * parameter. Whenever the mitigation is enabled, it works
1641- * for all of the kernel code with no need to provide any
1642- * additional instructions.
1643- */
1671+ if (alternative_has_cap_likely (ARM64_HAS_SB )) {
1672+ emit (A64_SB , ctx );
1673+ } else {
1674+ emit (A64_DSB_NSH , ctx );
1675+ emit (A64_ISB , ctx );
1676+ }
16441677 break ;
16451678
16461679 /* ST: *(size *)(dst + off) = imm */
@@ -1657,7 +1690,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
16571690 dst = tmp2 ;
16581691 }
16591692 if (dst == fp ) {
1660- dst_adj = A64_SP ;
1693+ dst_adj = ctx -> priv_sp_used ? priv_sp : A64_SP ;
16611694 off_adj = off + ctx -> stack_size ;
16621695 } else {
16631696 dst_adj = dst ;
@@ -1719,7 +1752,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
17191752 dst = tmp2 ;
17201753 }
17211754 if (dst == fp ) {
1722- dst_adj = A64_SP ;
1755+ dst_adj = ctx -> priv_sp_used ? priv_sp : A64_SP ;
17231756 off_adj = off + ctx -> stack_size ;
17241757 } else {
17251758 dst_adj = dst ;
@@ -1862,6 +1895,39 @@ static inline void bpf_flush_icache(void *start, void *end)
18621895 flush_icache_range ((unsigned long )start , (unsigned long )end );
18631896}
18641897
1898+ static void priv_stack_init_guard (void __percpu * priv_stack_ptr , int alloc_size )
1899+ {
1900+ int cpu , underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ ) >> 3 ;
1901+ u64 * stack_ptr ;
1902+
1903+ for_each_possible_cpu (cpu ) {
1904+ stack_ptr = per_cpu_ptr (priv_stack_ptr , cpu );
1905+ stack_ptr [0 ] = PRIV_STACK_GUARD_VAL ;
1906+ stack_ptr [1 ] = PRIV_STACK_GUARD_VAL ;
1907+ stack_ptr [underflow_idx ] = PRIV_STACK_GUARD_VAL ;
1908+ stack_ptr [underflow_idx + 1 ] = PRIV_STACK_GUARD_VAL ;
1909+ }
1910+ }
1911+
1912+ static void priv_stack_check_guard (void __percpu * priv_stack_ptr , int alloc_size ,
1913+ struct bpf_prog * prog )
1914+ {
1915+ int cpu , underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ ) >> 3 ;
1916+ u64 * stack_ptr ;
1917+
1918+ for_each_possible_cpu (cpu ) {
1919+ stack_ptr = per_cpu_ptr (priv_stack_ptr , cpu );
1920+ if (stack_ptr [0 ] != PRIV_STACK_GUARD_VAL ||
1921+ stack_ptr [1 ] != PRIV_STACK_GUARD_VAL ||
1922+ stack_ptr [underflow_idx ] != PRIV_STACK_GUARD_VAL ||
1923+ stack_ptr [underflow_idx + 1 ] != PRIV_STACK_GUARD_VAL ) {
1924+ pr_err ("BPF private stack overflow/underflow detected for prog %sx\n" ,
1925+ bpf_jit_get_prog_name (prog ));
1926+ break ;
1927+ }
1928+ }
1929+ }
1930+
18651931struct arm64_jit_data {
18661932 struct bpf_binary_header * header ;
18671933 u8 * ro_image ;
@@ -1874,9 +1940,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
18741940 int image_size , prog_size , extable_size , extable_align , extable_offset ;
18751941 struct bpf_prog * tmp , * orig_prog = prog ;
18761942 struct bpf_binary_header * header ;
1877- struct bpf_binary_header * ro_header ;
1943+ struct bpf_binary_header * ro_header = NULL ;
18781944 struct arm64_jit_data * jit_data ;
1945+ void __percpu * priv_stack_ptr = NULL ;
18791946 bool was_classic = bpf_prog_was_classic (prog );
1947+ int priv_stack_alloc_sz ;
18801948 bool tmp_blinded = false;
18811949 bool extra_pass = false;
18821950 struct jit_ctx ctx ;
@@ -1908,6 +1976,23 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
19081976 }
19091977 prog -> aux -> jit_data = jit_data ;
19101978 }
1979+ priv_stack_ptr = prog -> aux -> priv_stack_ptr ;
1980+ if (!priv_stack_ptr && prog -> aux -> jits_use_priv_stack ) {
1981+ /* Allocate actual private stack size with verifier-calculated
1982+ * stack size plus two memory guards to protect overflow and
1983+ * underflow.
1984+ */
1985+ priv_stack_alloc_sz = round_up (prog -> aux -> stack_depth , 16 ) +
1986+ 2 * PRIV_STACK_GUARD_SZ ;
1987+ priv_stack_ptr = __alloc_percpu_gfp (priv_stack_alloc_sz , 16 , GFP_KERNEL );
1988+ if (!priv_stack_ptr ) {
1989+ prog = orig_prog ;
1990+ goto out_priv_stack ;
1991+ }
1992+
1993+ priv_stack_init_guard (priv_stack_ptr , priv_stack_alloc_sz );
1994+ prog -> aux -> priv_stack_ptr = priv_stack_ptr ;
1995+ }
19111996 if (jit_data -> ctx .offset ) {
19121997 ctx = jit_data -> ctx ;
19131998 ro_image_ptr = jit_data -> ro_image ;
@@ -1931,6 +2016,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
19312016 ctx .user_vm_start = bpf_arena_get_user_vm_start (prog -> aux -> arena );
19322017 ctx .arena_vm_start = bpf_arena_get_kern_vm_start (prog -> aux -> arena );
19332018
2019+ if (priv_stack_ptr )
2020+ ctx .priv_sp_used = true;
2021+
19342022 /* Pass 1: Estimate the maximum image size.
19352023 *
19362024 * BPF line info needs ctx->offset[i] to be the offset of
@@ -2070,7 +2158,12 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
20702158 ctx .offset [i ] *= AARCH64_INSN_SIZE ;
20712159 bpf_prog_fill_jited_linfo (prog , ctx .offset + 1 );
20722160out_off :
2161+ if (!ro_header && priv_stack_ptr ) {
2162+ free_percpu (priv_stack_ptr );
2163+ prog -> aux -> priv_stack_ptr = NULL ;
2164+ }
20732165 kvfree (ctx .offset );
2166+ out_priv_stack :
20742167 kfree (jit_data );
20752168 prog -> aux -> jit_data = NULL ;
20762169 }
@@ -2089,6 +2182,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
20892182 goto out_off ;
20902183}
20912184
2185+ bool bpf_jit_supports_private_stack (void )
2186+ {
2187+ return true;
2188+ }
2189+
20922190bool bpf_jit_supports_kfunc_call (void )
20932191{
20942192 return true;
@@ -2243,11 +2341,6 @@ static int calc_arg_aux(const struct btf_func_model *m,
22432341
22442342 /* the rest arguments are passed through stack */
22452343 for (; i < m -> nr_args ; i ++ ) {
2246- /* We can not know for sure about exact alignment needs for
2247- * struct passed on stack, so deny those
2248- */
2249- if (m -> arg_flags [i ] & BTF_FMODEL_STRUCT_ARG )
2250- return - ENOTSUPP ;
22512344 stack_slots = (m -> arg_size [i ] + 7 ) / 8 ;
22522345 a -> bstack_for_args += stack_slots * 8 ;
22532346 a -> ostack_for_args = a -> ostack_for_args + stack_slots * 8 ;
@@ -2911,6 +3004,17 @@ bool bpf_jit_supports_percpu_insn(void)
29113004 return true;
29123005}
29133006
3007+ bool bpf_jit_bypass_spec_v4 (void )
3008+ {
3009+ /* In case of arm64, we rely on the firmware mitigation of Speculative
3010+ * Store Bypass as controlled via the ssbd kernel parameter. Whenever
3011+ * the mitigation is enabled, it works for all of the kernel code with
3012+ * no need to provide any additional instructions. Therefore, skip
3013+ * inserting nospec insns against Spectre v4.
3014+ */
3015+ return true;
3016+ }
3017+
29143018bool bpf_jit_inlines_helper_call (s32 imm )
29153019{
29163020 switch (imm ) {
@@ -2928,6 +3032,8 @@ void bpf_jit_free(struct bpf_prog *prog)
29283032 if (prog -> jited ) {
29293033 struct arm64_jit_data * jit_data = prog -> aux -> jit_data ;
29303034 struct bpf_binary_header * hdr ;
3035+ void __percpu * priv_stack_ptr ;
3036+ int priv_stack_alloc_sz ;
29313037
29323038 /*
29333039 * If we fail the final pass of JIT (from jit_subprogs),
@@ -2941,6 +3047,13 @@ void bpf_jit_free(struct bpf_prog *prog)
29413047 }
29423048 hdr = bpf_jit_binary_pack_hdr (prog );
29433049 bpf_jit_binary_pack_free (hdr , NULL );
3050+ priv_stack_ptr = prog -> aux -> priv_stack_ptr ;
3051+ if (priv_stack_ptr ) {
3052+ priv_stack_alloc_sz = round_up (prog -> aux -> stack_depth , 16 ) +
3053+ 2 * PRIV_STACK_GUARD_SZ ;
3054+ priv_stack_check_guard (priv_stack_ptr , priv_stack_alloc_sz , prog );
3055+ free_percpu (prog -> aux -> priv_stack_ptr );
3056+ }
29443057 WARN_ON_ONCE (!bpf_prog_kallsyms_verify_off (prog ));
29453058 }
29463059
0 commit comments