diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp index f4f414d192df0..c3ce25237637e 100644 --- a/llvm/lib/Target/BPF/BPFISelLowering.cpp +++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp @@ -38,6 +38,10 @@ static cl::opt BPFExpandMemcpyInOrder("bpf-expand-memcpy-in-order", cl::Hidden, cl::init(false), cl::desc("Expand memcpy into load/store pairs in order")); +static cl::opt BPFAllowMisalignedMemAccess("bpf-allow-misaligned-mem-access", + cl::Hidden, cl::init(false), + cl::desc("Allow misaligned memory access")); + static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg, SDValue Val = {}) { std::string Str; @@ -198,6 +202,26 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM, HasMovsx = STI.hasMovsx(); } +bool BPFTargetLowering::allowsMisalignedMemoryAccesses( + EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const { + if (!BPFAllowMisalignedMemAccess) { + // --bpf-allow-misaligned-mem-access isn't opted in + return false; + } + + if (!VT.isSimple()) { + // only allow misalignment for simple value types + return false; + } + + if (Fast) { + // always assume fast mode when BPFAllowMisalignedMemAccess is enabled + *Fast = true; + } + + return true; +} + bool BPFTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { return false; } diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h index 8f60261c10e9e..0b9ece5ab18c4 100644 --- a/llvm/lib/Target/BPF/BPFISelLowering.h +++ b/llvm/lib/Target/BPF/BPFISelLowering.h @@ -46,6 +46,10 @@ class BPFTargetLowering : public TargetLowering { // with the given GlobalAddress is legal. bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; + bool allowsMisalignedMemoryAccesses(EVT VT, unsigned, Align, + MachineMemOperand::Flags, + unsigned *) const override; + BPFTargetLowering::ConstraintType getConstraintType(StringRef Constraint) const override; diff --git a/llvm/test/CodeGen/BPF/unaligned_load_store.ll b/llvm/test/CodeGen/BPF/unaligned_load_store.ll new file mode 100644 index 0000000000000..c01de4623af97 --- /dev/null +++ b/llvm/test/CodeGen/BPF/unaligned_load_store.ll @@ -0,0 +1,208 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 + +; RUN: llc -mtriple=bpfel -bpf-allow-misaligned-mem-access -verify-machineinstrs %s -o - \ +; RUN: | FileCheck --check-prefixes=ALL,MISALIGN %s +; RUN: llc -mtriple=bpfeb -bpf-allow-misaligned-mem-access -verify-machineinstrs %s -o - \ +; RUN: | FileCheck --check-prefixes=ALL,MISALIGN %s + +; RUN: llc -mtriple=bpfel -verify-machineinstrs %s -o - \ +; RUN: | FileCheck --check-prefixes=ALL,ALIGN %s +; RUN: llc -mtriple=bpfeb -verify-machineinstrs %s -o - \ +; RUN: | FileCheck --check-prefixes=ALL,ALIGN %s +; NOTE: +; This test verifies that the new +bpf-allow-misaligned-mem-access +; feature allows the BPF backend to emit direct unaligned load/store +; instructions instead of byte-by-byte emulation sequences. + +; --------------------------------------------------------------------- +; i8 load +; --------------------------------------------------------------------- +define i8 @test_load_i8(i8* %p) { +; ALL-LABEL: test_load_i8: +; ALL: # %bb.0: +; ALL-NEXT: w{{[0-9]+}} = *(u8 *)(r1 + 0) +; ALL-NEXT: exit + %v = load i8, i8* %p, align 1 + ret i8 %v +} + +; --------------------------------------------------------------------- +; i8 store +; --------------------------------------------------------------------- +define void @test_store_i8(i8* %p, i8 %v) { +; ALL-LABEL: test_store_i8: +; ALL: # %bb.0: +; ALL-NEXT: *(u8 *)(r1 + 0) = w2 +; ALL-NEXT: exit + store i8 %v, i8* %p, align 1 + ret void +} + +; --------------------------------------------------------------------- +; i16 load +; --------------------------------------------------------------------- +define i16 @test_load_i16(i16* %p) { +; MISALIGN-LABEL: test_load_i16: +; MISALIGN: # %bb.0: +; MISCHECK: w{{[0-9]+}} = *(u16 *)(r1 + 0) +; MISCHECK: exit +; +; ALIGN-LABEL: test_load_i16: +; ALIGN: # %bb.0: +; CHECK: w{{[0-9]+}} = *(u8 *)(r1 + [[OFFSET:[0-9]+]]) +; CHECK: w{{[0-9]+}} = *(u8 *)(r1 + [[OFFSET:[0-9]+]]) +; CHECK: w{{[0-9]+}} <<= 8 +; CHECK: w{{[0-9]+}} |= w{{[0-9]+}} +; CHECK: exit + %v = load i16, i16* %p, align 1 + ret i16 %v +} + +; --------------------------------------------------------------------- +; i16 store +; --------------------------------------------------------------------- +define void @test_store_i16(i16* %p, i16 %v) { +; MISALIGN-LABEL: test_store_i16: +; MISALIGN: # %bb.0: +; MISCHECK: *(u16 *)(r1 + 0) = w2 +; MISCHECK: exit +; +; ALIGN-LABEL: test_store_i16: +; ALIGN: # %bb.0: +; CHECK: *(u8 *)(r1 + [[OFFSET:[0-9]+]]) = w{{[0-9]+}} +; CHECK: w{{[0-9]+}} >>= 8 +; CHECK: *(u8 *)(r1 + [[OFFSET:[0-9]+]]) = w{{[0-9]+}} +; CHECK: exit + store i16 %v, i16* %p, align 1 + ret void +} + +; --------------------------------------------------------------------- +; i32 load +; --------------------------------------------------------------------- + +define i32 @test_load_i32(i32* %p) { +; MISALIGN-LABEL: test_load_i32: +; MISALIGN: # %bb.0: +; MISCHECK: w{{[0-9]+}} = *(u32 *)(r1 + 0) +; MISCHECK: exit +; +; ALIGN-LABEL: test_load_i32: +; ALIGN: # %bb.0: +; CHECK: w{{[0-9]+}} = *(u8 *)(r1 + [[OFFSET:[0-9]+]]) +; CHECK: w{{[0-9]+}} <<= 8 +; CHECK: w{{[0-9]+}} = *(u8 *)(r1 + [[OFFSET:[0-9]+]]) +; CHECK: w{{[0-9]+}} |= w{{[0-9]+}} +; CHECK: w{{[0-9]+}} = *(u8 *)(r1 + [[OFFSET:[0-9]+]]) +; CHECK: w{{[0-9]+}} <<= 16 +; CHECK: w{{[0-9]+}} = *(u8 *)(r1 + [[OFFSET:[0-9]+]]) +; CHECK: w{{[0-9]+}} <<= 24 +; CHECK: w{{[0-9]+}} |= w{{[0-9]+}} +; CHECK: w{{[0-9]+}} |= w{{[0-9]+}} +; CHECK: exit + %v = load i32, i32* %p, align 1 + ret i32 %v +} + +; --------------------------------------------------------------------- +; i32 store +; --------------------------------------------------------------------- + +define void @test_store_i32(i32* %p, i32 %v) { +; MISALIGN-LABEL: test_store_i32: +; MISALIGN: # %bb.0: +; MISCHECK: *(u32 *)(r1 + 0) = w{{[0-9]+}} +; MISCHECK: exit +; +; ALIGN-LABEL: test_store_i32: +; ALIGN: # %bb.0: +; CHECK: w{{[0-9]+}} = w{{[0-9]+}} +; CHECK: w{{[0-9]+}} >>= 24 +; CHECK: *(u8 *)(r1 + [[OFFSET:[0-9]+]]) = w{{[0-9]+}} +; CHECK: w{{[0-9]+}} = w{{[0-9]+}} +; CHECK: w{{[0-9]+}} >>= 16 +; CHECK: *(u8 *)(r1 + [[OFFSET:[0-9]+]]) = w{{[0-9]+}} +; CHECK: *(u8 *)(r1 + [[OFFSET:[0-9]+]]) = w{{[0-9]+}} +; CHECK: w{{[0-9]+}} >>= 8 +; CHECK: *(u8 *)(r1 + [[OFFSET:[0-9]+]]) = w{{[0-9]+}} +; CHECK: exit + store i32 %v, i32* %p, align 1 + ret void +} + +; --------------------------------------------------------------------- +; i64 load +; --------------------------------------------------------------------- + +define i64 @test_load_i64(i64* %p) { +; MISALIGN-LABEL: test_load_i64: +; MISALIGN: # %bb.0: +; MISCHECK: r0 = *(u64 *)(r1 + 0) +; MISCHECK: exit +; +; ALIGN-LABEL: test_load_i64: +; ALIGN: # %bb.0: +; CHECK: w{{[0-9]+}} = *(u8 *)(r1 + [[OFFSET:[0-9]+]]) +; CHECK: w{{[0-9]+}} = *(u8 *)(r1 + [[OFFSET:[0-9]+]]) +; CHECK: r3 <<= 8 +; CHECK: r3 |= r2 +; CHECK: w{{[0-9]+}} = *(u8 *)(r1 + [[OFFSET:[0-9]+]]) +; CHECK: r4 <<= 16 +; CHECK: w{{[0-9]+}} = *(u8 *)(r1 + [[OFFSET:[0-9]+]]) +; CHECK: r2 <<= 24 +; CHECK: r2 |= r4 +; CHECK: r2 |= r3 +; CHECK: w{{[0-9]+}} = *(u8 *)(r1 + [[OFFSET:[0-9]+]]) +; CHECK: w{{[0-9]+}} <<= 8 +; CHECK: w{{[0-9]+}} = *(u8 *)(r1 + [[OFFSET:[0-9]+]]) +; CHECK: w{{[0-9]+}} |= w{{[0-9]+}} +; CHECK: w{{[0-9]+}} = *(u8 *)(r1 + [[OFFSET:[0-9]+]]) +; CHECK: w{{[0-9]+}} <<= 16 +; CHECK: w{{[0-9]+}} = *(u8 *)(r1 + [[OFFSET:[0-9]+]]) +; CHECK: w{{[0-9]+}} <<= 24 +; CHECK: w{{[0-9]+}} |= w{{[0-9]+}} +; CHECK: w{{[0-9]+}} |= w{{[0-9]+}} +; CHECK: r0 <<= 32 +; CHECK: r0 |= r2 +; CHECK: exit + %v = load i64, i64* %p, align 1 + ret i64 %v +} + +; --------------------------------------------------------------------- +; i64 store +; --------------------------------------------------------------------- + +define void @test_store_i64(i64* %p, i64 %v) { +; MISALIGN-LABEL: test_store_i64: +; MISALIGN: # %bb.0: +; MISCHECK: *(u64 *)(r1 + 0) = r2 +; MISCHECK: exit +; +; ALIGN-LABEL: test_store_i64: +; ALIGN: # %bb.0: +; CHECK: *(u8 *)(r1 + [[OFFSET:[0-9]+]]) = w{{[0-9]+}} +; CHECK: r3 = r2 +; CHECK: r3 >>= 56 +; CHECK: *(u8 *)(r1 + [[OFFSET:[0-9]+]]) = w{{[0-9]+}} +; CHECK: r3 = r2 +; CHECK: r3 >>= 48 +; CHECK: *(u8 *)(r1 + [[OFFSET:[0-9]+]]) = w{{[0-9]+}} +; CHECK: r3 = r2 +; CHECK: r3 >>= 40 +; CHECK: *(u8 *)(r1 + [[OFFSET:[0-9]+]]) = w{{[0-9]+}} +; CHECK: r3 = r2 +; CHECK: r3 >>= 32 +; CHECK: *(u8 *)(r1 + [[OFFSET:[0-9]+]]) = w{{[0-9]+}} +; CHECK: r3 = r2 +; CHECK: r3 >>= 24 +; CHECK: *(u8 *)(r1 + [[OFFSET:[0-9]+]]) = w{{[0-9]+}} +; CHECK: r3 = r2 +; CHECK: r3 >>= 16 +; CHECK: *(u8 *)(r1 + [[OFFSET:[0-9]+]]) = w{{[0-9]+}} +; CHECK: r2 >>= 8 +; CHECK: *(u8 *)(r1 + [[OFFSET:[0-9]+]]) = w{{[0-9]+}} +; CHECK: exit + store i64 %v, i64* %p, align 1 + ret void +}