Skip to content

Commit b7b9714

Browse files
zoecarverhuixie90
authored andcommitted
[Builtin] Add __builtin_zero_non_value_bits.
Adds `__builtin_zero_non_value_bits` to zero all padding bits of a struct. This builtin should match the behavior of those in NVCC and GCC (and MSVC?). There are some tests in this patch but hopefully we'll also get tests from other compilers (so all builtins can be as similar as possible). I'm planning to add support for unions, bitfields (both as members and members of sub-objects), and booleans as follow up patches. Differential Revision: https://reviews.llvm.org/D87974
1 parent b3e80d8 commit b7b9714

File tree

6 files changed

+493
-0
lines changed

6 files changed

+493
-0
lines changed

clang/include/clang/Basic/Builtins.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -633,6 +633,7 @@ BUILTIN(__builtin_vsscanf, "icC*RcC*Ra", "FS:1:")
633633
BUILTIN(__builtin_thread_pointer, "v*", "nc")
634634
BUILTIN(__builtin_launder, "v*v*", "ntE")
635635
LANGBUILTIN(__builtin_is_constant_evaluated, "b", "nE", CXX_LANG)
636+
LANGBUILTIN(__builtin_zero_non_value_bits, "v.", "n", CXX_LANG)
636637

637638
// GCC exception builtins
638639
BUILTIN(__builtin_eh_return, "vzv*", "r") // FIXME: Takes intptr_t, not size_t!

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2456,6 +2456,95 @@ static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF,
24562456
return RValue::get(CGF->Builder.CreateCall(UBF, Args));
24572457
}
24582458

2459+
static void RecursivelyZeroNonValueBits(CodeGenFunction &CGF, Value *Ptr,
2460+
QualType Ty) {
2461+
auto *I8Ptr = CGF.Builder.CreateBitCast(Ptr, CGF.Int8PtrTy);
2462+
auto *Zero = ConstantInt::get(CGF.Int8Ty, 0);
2463+
auto WriteZeroAtOffset = [&](size_t Offset) {
2464+
auto Index = ConstantInt::get(CGF.IntTy, Offset);
2465+
auto Element = CGF.Builder.CreateGEP(I8Ptr, Index);
2466+
CGF.Builder.CreateAlignedStore(
2467+
Zero, Element,
2468+
CharUnits::One().alignmentAtOffset(CharUnits::fromQuantity(Offset)));
2469+
};
2470+
auto GetStructLayout = [&CGF](llvm::Type *Ty) {
2471+
auto ST = cast<StructType>(Ty);
2472+
return CGF.CGM.getModule().getDataLayout().getStructLayout(ST);
2473+
};
2474+
2475+
auto ST = cast<StructType>(Ptr->getType()->getPointerElementType());
2476+
auto SL = GetStructLayout(ST);
2477+
auto R = cast<CXXRecordDecl>(Ty->getAsRecordDecl());
2478+
const ASTRecordLayout &ASTLayout = CGF.getContext().getASTRecordLayout(R);
2479+
size_t RunningOffset = 0;
2480+
for (auto Base : R->bases()) {
2481+
// Zero padding between base elements.
2482+
auto BaseRecord = cast<CXXRecordDecl>(Base.getType()->getAsRecordDecl());
2483+
auto Offset = static_cast<size_t>(
2484+
ASTLayout.getBaseClassOffset(BaseRecord).getQuantity());
2485+
for (; RunningOffset < Offset; ++RunningOffset) {
2486+
WriteZeroAtOffset(RunningOffset);
2487+
}
2488+
// Recursively zero out base classes.
2489+
auto Index = SL->getElementContainingOffset(Offset);
2490+
auto BaseElement = CGF.Builder.CreateStructGEP(Ptr, Index);
2491+
RecursivelyZeroNonValueBits(CGF, BaseElement, Base.getType());
2492+
// Use the LLVM StructType data layout so we pick up on packed types.
2493+
auto SL = GetStructLayout(ST->getElementType(Index));
2494+
auto Size = SL->getSizeInBytes();
2495+
RunningOffset = Offset + Size;
2496+
}
2497+
2498+
size_t NumFields = std::distance(R->field_begin(), R->field_end());
2499+
auto CurrentField = R->field_begin();
2500+
for (size_t I = 0; I < NumFields; ++I, ++CurrentField) {
2501+
// Size needs to be in bytes so we can compare it later.
2502+
auto Offset = ASTLayout.getFieldOffset(I) / 8;
2503+
for (; RunningOffset < Offset; ++RunningOffset) {
2504+
WriteZeroAtOffset(RunningOffset);
2505+
}
2506+
2507+
auto Index = SL->getElementContainingOffset(Offset);
2508+
// If this field is an object, it may have non-zero padding.
2509+
if (CurrentField->getType()->isRecordType()) {
2510+
auto Element = CGF.Builder.CreateStructGEP(Ptr, Index);
2511+
RecursivelyZeroNonValueBits(CGF, Element, CurrentField->getType());
2512+
}
2513+
2514+
// TODO: warn if non-constant array type.
2515+
if (isa<ConstantArrayType>(CurrentField->getType()) &&
2516+
CurrentField->getType()
2517+
->getArrayElementTypeNoTypeQual()
2518+
->isRecordType()) {
2519+
auto FieldElement = CGF.Builder.CreateStructGEP(Ptr, Index);
2520+
auto AT = cast<ConstantArrayType>(CurrentField->getType());
2521+
for (size_t ArrIndex = 0; ArrIndex < AT->getSize().getLimitedValue();
2522+
++ArrIndex) {
2523+
auto ElementRecord = AT->getElementType()->getAsRecordDecl();
2524+
auto ElementAlign =
2525+
CGF.getContext().getASTRecordLayout(ElementRecord).getAlignment();
2526+
Address FieldElementAddr{FieldElement, ElementAlign};
2527+
auto Element =
2528+
CGF.Builder.CreateConstArrayGEP(FieldElementAddr, ArrIndex);
2529+
RecursivelyZeroNonValueBits(CGF, Element.getPointer(),
2530+
AT->getElementType());
2531+
}
2532+
}
2533+
2534+
auto Size = CGF.CGM.getModule()
2535+
.getDataLayout()
2536+
.getTypeSizeInBits(ST->getElementType(Index))
2537+
.getKnownMinSize() /
2538+
8;
2539+
RunningOffset = Offset + Size;
2540+
}
2541+
// Clear all bits after the last field.
2542+
auto Size = SL->getSizeInBytes();
2543+
for (; RunningOffset < Size; ++RunningOffset) {
2544+
WriteZeroAtOffset(RunningOffset);
2545+
}
2546+
}
2547+
24592548
RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
24602549
const CallExpr *E,
24612550
ReturnValueSlot ReturnValue) {
@@ -4315,6 +4404,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
43154404

43164405
return RValue::get(Ptr);
43174406
}
4407+
case Builtin::BI__builtin_zero_non_value_bits: {
4408+
const Expr *Op = E->getArg(0);
4409+
Value *Address = EmitScalarExpr(Op);
4410+
auto PointeeTy = Op->getType()->getPointeeType();
4411+
RecursivelyZeroNonValueBits(*this, Address, PointeeTy);
4412+
return RValue::get(nullptr);
4413+
}
43184414
case Builtin::BI__sync_fetch_and_add:
43194415
case Builtin::BI__sync_fetch_and_sub:
43204416
case Builtin::BI__sync_fetch_and_or:

clang/lib/Sema/SemaChecking.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2327,6 +2327,26 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
23272327
}
23282328
case Builtin::BI__builtin_launder:
23292329
return SemaBuiltinLaunder(*this, TheCall);
2330+
case Builtin::BI__builtin_zero_non_value_bits: {
2331+
const Expr *PtrArg = TheCall->getArg(0)->IgnoreParenImpCasts();
2332+
const QualType PtrArgType = PtrArg->getType();
2333+
if (!PtrArgType->isPointerType() ||
2334+
!PtrArgType->getPointeeType()->isRecordType()) {
2335+
Diag(PtrArg->getBeginLoc(), diag::err_typecheck_convert_incompatible)
2336+
<< PtrArgType << "structure pointer" << 1 << 0 << 3 << 1 << PtrArgType
2337+
<< "structure pointer";
2338+
return ExprError();
2339+
}
2340+
if (PtrArgType->getPointeeType().isConstQualified()) {
2341+
Diag(PtrArg->getBeginLoc(), diag::err_typecheck_assign_const)
2342+
<< TheCall->getSourceRange() << 5 /*ConstUnknown*/;
2343+
return ExprError();
2344+
}
2345+
if (RequireCompleteType(PtrArg->getBeginLoc(), PtrArgType->getPointeeType(),
2346+
diag::err_typecheck_decl_incomplete_type))
2347+
return ExprError();
2348+
break;
2349+
}
23302350
case Builtin::BI__sync_fetch_and_add:
23312351
case Builtin::BI__sync_fetch_and_add_1:
23322352
case Builtin::BI__sync_fetch_and_add_2:
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
// RUN: %clang_cc1 -triple=x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s
2+
3+
struct alignas(4) Foo {
4+
char a;
5+
alignas(2) char b;
6+
};
7+
8+
struct alignas(4) Bar {
9+
char c;
10+
alignas(2) char d;
11+
};
12+
13+
struct alignas(4) Baz : Foo {
14+
char e;
15+
Bar f;
16+
};
17+
18+
// Baz structure:
19+
// "a", PAD_1, "b", PAD_2, "c", PAD_3, PAD_4, PAD_5, "c", PAD_6, "d", PAD_7
20+
// %struct.Baz = type { %struct.Foo, i8, [3 x i8], %struct.Bar }
21+
// %struct.Foo = type { i8, i8, i8, i8 }
22+
// %struct.Bar = type { i8, i8, i8, i8 }
23+
24+
// CHECK-LABEL: define void @_Z7testBazP3Baz(%struct.Baz* %baz)
25+
// CHECK: [[ADDR:%.*]] = alloca %struct.Baz*
26+
// CHECK: store %struct.Baz* %baz, %struct.Baz** [[ADDR]]
27+
// CHECK: [[BAZ:%.*]] = load %struct.Baz*, %struct.Baz** [[ADDR]]
28+
// CHECK: [[BAZ_RAW_PTR:%.*]] = bitcast %struct.Baz* [[BAZ]] to i8*
29+
30+
// CHECK: [[FOO_BASE:%.*]] = getelementptr inbounds %struct.Baz, %struct.Baz* [[BAZ]], i32 0, i32 0
31+
// CHECK: [[FOO_RAW_PTR:%.*]] = bitcast %struct.Foo* [[FOO_BASE]] to i8*
32+
// CHECK: [[PAD_1:%.*]] = getelementptr i8, i8* [[FOO_RAW_PTR]], i32 1
33+
// CHECK: store i8 0, i8* [[PAD_1]]
34+
// CHECK: [[PAD_2:%.*]] = getelementptr i8, i8* [[FOO_RAW_PTR]], i32 3
35+
// CHECK: store i8 0, i8* [[PAD_2]]
36+
37+
// CHECK: [[PAD_3:%.*]] = getelementptr i8, i8* [[BAZ_RAW_PTR]], i32 5
38+
// CHECK: store i8 0, i8* [[PAD_3]]
39+
// CHECK: [[PAD_4:%.*]] = getelementptr i8, i8* [[BAZ_RAW_PTR]], i32 6
40+
// CHECK: store i8 0, i8* [[PAD_4]]
41+
// CHECK: [[PAD_5:%.*]] = getelementptr i8, i8* [[BAZ_RAW_PTR]], i32 7
42+
// CHECK: store i8 0, i8* [[PAD_5]]
43+
44+
// CHECK: [[BAR_MEMBER:%.*]] = getelementptr inbounds %struct.Baz, %struct.Baz* [[BAZ]], i32 0, i32 3
45+
// CHECK: [[BAR_RAW_PTR:%.*]] = bitcast %struct.Bar* [[BAR_MEMBER]] to i8*
46+
// CHECK: [[PAD_6:%.*]] = getelementptr i8, i8* [[BAR_RAW_PTR]], i32 1
47+
// CHECK: store i8 0, i8* [[PAD_6]]
48+
// CHECK: [[PAD_7:%.*]] = getelementptr i8, i8* [[BAR_RAW_PTR]], i32 3
49+
// CHECK: store i8 0, i8* [[PAD_7]]
50+
// CHECK: ret void
51+
void testBaz(Baz *baz) {
52+
__builtin_zero_non_value_bits(baz);
53+
}
54+
55+
struct UnsizedTail {
56+
int size;
57+
alignas(8) char buf[];
58+
59+
UnsizedTail(int size) : size(size) {}
60+
};
61+
62+
// UnsizedTail structure:
63+
// "size", PAD_1, PAD_2, PAD_3, PAD_4
64+
// %struct.UnsizedTail = type { i32, [4 x i8], [0 x i8] }
65+
66+
// CHECK-LABEL: define void @_Z15testUnsizedTailP11UnsizedTail(%struct.UnsizedTail* %u)
67+
// CHECK: [[U_ADDR:%.*]] = alloca %struct.UnsizedTail*
68+
// CHECK: store %struct.UnsizedTail* %u, %struct.UnsizedTail** [[U_ADDR]]
69+
// CHECK: [[U:%.*]] = load %struct.UnsizedTail*, %struct.UnsizedTail** [[U_ADDR]]
70+
// CHECK: [[U_RAW_PTR:%.*]] = bitcast %struct.UnsizedTail* [[U]] to i8*
71+
// CHECK: [[PAD_1:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 4
72+
// CHECK: store i8 0, i8* [[PAD_1]]
73+
// CHECK: [[PAD_2:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 5
74+
// CHECK: store i8 0, i8* [[PAD_2]]
75+
// CHECK: [[PAD_3:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 6
76+
// CHECK: store i8 0, i8* [[PAD_3]]
77+
// CHECK: [[PAD_4:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 7
78+
// CHECK: store i8 0, i8* [[PAD_4]]
79+
// CHECK: ret void
80+
void testUnsizedTail(UnsizedTail *u) {
81+
__builtin_zero_non_value_bits(u);
82+
}
83+
84+
struct ArrOfStructsWithPadding {
85+
Bar bars[2];
86+
};
87+
88+
// ArrOfStructsWithPadding structure:
89+
// "c" (1), PAD_1, "d" (1), PAD_2, "c" (2), PAD_3, "d" (2), PAD_4
90+
// %struct.ArrOfStructsWithPadding = type { [2 x %struct.Bar] }
91+
92+
// CHECK-LABEL: define void @_Z27testArrOfStructsWithPaddingP23ArrOfStructsWithPadding(%struct.ArrOfStructsWithPadding* %arr)
93+
// CHECK: [[ARR_ADDR:%.*]] = alloca %struct.ArrOfStructsWithPadding*
94+
// CHECK: store %struct.ArrOfStructsWithPadding* %arr, %struct.ArrOfStructsWithPadding** [[ARR_ADDR]]
95+
// CHECK: [[ARR:%.*]] = load %struct.ArrOfStructsWithPadding*, %struct.ArrOfStructsWithPadding** [[ARR_ADDR]]
96+
// CHECK: [[BARS:%.*]] = getelementptr inbounds %struct.ArrOfStructsWithPadding, %struct.ArrOfStructsWithPadding* [[ARR]], i32 0, i32 0
97+
// CHECK: [[FIRST:%.*]] = getelementptr inbounds [2 x %struct.Bar], [2 x %struct.Bar]* [[BARS]], i64 0, i64 0
98+
// CHECK: [[FIRST_RAW_PTR:%.*]] = bitcast %struct.Bar* [[FIRST]] to i8*
99+
// CHECK: [[PAD_1:%.*]] = getelementptr i8, i8* [[FIRST_RAW_PTR]], i32 1
100+
// CHECK: store i8 0, i8* [[PAD_1]]
101+
// CHECK: [[PAD_2:%.*]] = getelementptr i8, i8* %4, i32 3
102+
// CHECK: store i8 0, i8* [[PAD_2]]
103+
// CHECK: [[SECOND:%.*]] = getelementptr inbounds [2 x %struct.Bar], [2 x %struct.Bar]* [[BARS]], i64 0, i64 1
104+
// CHECK: [[SECOND_RAW_PTR:%.*]] = bitcast %struct.Bar* [[SECOND]] to i8*
105+
// CHECK: [[PAD_3:%.*]] = getelementptr i8, i8* [[SECOND_RAW_PTR]], i32 1
106+
// CHECK: store i8 0, i8* [[PAD_3]]
107+
// CHECK: [[PAD_4:%.*]] = getelementptr i8, i8* [[SECOND_RAW_PTR]], i32 3
108+
// CHECK: store i8 0, i8* [[PAD_4]]
109+
// CHECK: ret void
110+
void testArrOfStructsWithPadding(ArrOfStructsWithPadding *arr) {
111+
__builtin_zero_non_value_bits(arr);
112+
}

0 commit comments

Comments
 (0)