Skip to content
6 changes: 6 additions & 0 deletions clang/include/clang/Basic/Builtins.td
Original file line number Diff line number Diff line change
Expand Up @@ -976,6 +976,12 @@ def IsWithinLifetime : LangBuiltin<"CXX_LANG"> {
let Prototype = "bool(void*)";
}

def ClearPadding : LangBuiltin<"CXX_LANG"> {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should expose this in C mode as well. Did you have any particular implementation-reason to restrict to C++?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for bringing this up. The reason I am creating this patch is to implement a C++ paper https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0528r3.html

I am not familiar with the C standard and not sure if this operation is needed. And the implementation is trying to access the Cxx record and I am not sure how easy to make it work for C.

let Spellings = ["__builtin_clear_padding"];
let Attributes = [NoThrow, CustomTypeChecking];
let Prototype = "void(void*)";
}

def GetVtablePointer : LangBuiltin<"CXX_LANG"> {
let Spellings = ["__builtin_get_vtable_pointer"];
let Attributes = [CustomTypeChecking, NoThrow, Const];
Expand Down
285 changes: 285 additions & 0 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,11 @@
#include "llvm/IR/MatrixBuilder.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/ScopedPrinter.h"
#include <algorithm>
#include <deque>
#include <optional>
#include <utility>
#include <vector>

using namespace clang;
using namespace CodeGen;
Expand Down Expand Up @@ -2592,6 +2595,281 @@ static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF,
return RValue::get(CGF->Builder.CreateCall(UBF, Args));
}

namespace {

// PaddingClearer is a utility class that clears padding bits in a
// c++ type. It traverses the type recursively, collecting occupied
// bit intervals, and then compute the padding intervals.
// In the end, it clears the padding bits by writing zeros
// to the padding intervals bytes-by-bytes. If a byte only contains
// some padding bits, it writes zeros to only those bits. This is
// the case for bit-fields.
struct PaddingClearer {
PaddingClearer(CodeGenFunction &F)
: CGF(F), CharWidth(CGF.getContext().getCharWidth()) {}

void run(Value *Ptr, QualType Ty) {
OccuppiedIntervals.clear();
Queue.clear();

Queue.push_back(Data{0, Ty, true});
while (!Queue.empty()) {
auto Current = Queue.back();
Queue.pop_back();
Visit(Current);
}

MergeOccuppiedIntervals();
auto PaddingIntervals =
GetPaddingIntervals(CGF.getContext().getTypeSize(Ty));
for (const auto &Interval : PaddingIntervals) {
ClearPadding(Ptr, Interval);
}
}

private:
struct BitInterval {
// [First, Last)
uint64_t First;
uint64_t Last;
};

struct Data {
uint64_t StartBitOffset;
QualType Ty;
bool VisitVirtualBase;
};

void Visit(Data const &D) {
if (auto *AT = dyn_cast<ConstantArrayType>(D.Ty)) {
VisitArray(AT, D.StartBitOffset);
return;
}

if (auto *Record = D.Ty->getAsCXXRecordDecl()) {
VisitStruct(Record, D.StartBitOffset, D.VisitVirtualBase);
return;
}

if (D.Ty->isAtomicType()) {
auto Unwrapped = D;
Unwrapped.Ty = D.Ty.getAtomicUnqualifiedType();
Queue.push_back(Unwrapped);
return;
}

if (const auto *Complex = D.Ty->getAs<ComplexType>()) {
VisitComplex(Complex, D.StartBitOffset);
return;
}

auto *Type = CGF.ConvertTypeForMem(D.Ty);
auto SizeBit = CGF.CGM.getModule()
.getDataLayout()
.getTypeSizeInBits(Type)
.getKnownMinValue();
OccuppiedIntervals.push_back(
BitInterval{D.StartBitOffset, D.StartBitOffset + SizeBit});
}

void VisitArray(const ConstantArrayType *AT, uint64_t StartBitOffset) {
for (uint64_t ArrIndex = 0; ArrIndex < AT->getSize().getLimitedValue();
++ArrIndex) {

QualType ElementQualType = AT->getElementType();
auto ElementSize = CGF.getContext().getTypeSizeInChars(ElementQualType);
auto ElementAlign = CGF.getContext().getTypeAlignInChars(ElementQualType);
auto Offset = ElementSize.alignTo(ElementAlign);

Queue.push_back(
Data{StartBitOffset + ArrIndex * Offset.getQuantity() * CharWidth,
ElementQualType, /*VisitVirtualBase*/ true});
}
}

void VisitStruct(const CXXRecordDecl *R, uint64_t StartBitOffset,
bool VisitVirtualBase) {
const auto &DL = CGF.CGM.getModule().getDataLayout();

const ASTRecordLayout &ASTLayout = CGF.getContext().getASTRecordLayout(R);
if (ASTLayout.hasOwnVFPtr()) {
OccuppiedIntervals.push_back(BitInterval{
StartBitOffset, StartBitOffset + DL.getPointerSizeInBits()});
}

const auto VisitBase = [&ASTLayout, StartBitOffset, this](
const CXXBaseSpecifier &Base, auto GetOffset) {
auto *BaseRecord = Base.getType()->getAsCXXRecordDecl();
if (!BaseRecord) {
return;
}
auto BaseOffset =
std::invoke(GetOffset, ASTLayout, BaseRecord).getQuantity();

Queue.push_back(Data{StartBitOffset + BaseOffset * CharWidth,
Base.getType(), /*VisitVirtualBase*/ false});
};

for (auto Base : R->bases()) {
if (!Base.isVirtual()) {
VisitBase(Base, &ASTRecordLayout::getBaseClassOffset);
}
}

if (VisitVirtualBase) {
for (auto VBase : R->vbases()) {
VisitBase(VBase, &ASTRecordLayout::getVBaseClassOffset);
}
}

for (auto *Field : R->fields()) {
auto FieldOffset = ASTLayout.getFieldOffset(Field->getFieldIndex());
if (Field->isBitField()) {
OccuppiedIntervals.push_back(BitInterval{
StartBitOffset + FieldOffset,
StartBitOffset + FieldOffset + Field->getBitWidthValue()});
} else {
Queue.push_back(Data{StartBitOffset + FieldOffset, Field->getType(),
/*VisitVirtualBase*/ true});
}
}
}

void VisitComplex(const ComplexType *CT, uint64_t StartBitOffset) {
QualType ElementQualType = CT->getElementType();
auto ElementSize = CGF.getContext().getTypeSizeInChars(ElementQualType);
auto ElementAlign = CGF.getContext().getTypeAlignInChars(ElementQualType);
auto ImgOffset = ElementSize.alignTo(ElementAlign);

Queue.push_back(
Data{StartBitOffset, ElementQualType, /*VisitVirtualBase*/ true});
Queue.push_back(Data{StartBitOffset + ImgOffset.getQuantity() * CharWidth,
ElementQualType, /*VisitVirtualBase*/ true});
}

void MergeOccuppiedIntervals() {
std::sort(OccuppiedIntervals.begin(), OccuppiedIntervals.end(),
[](const BitInterval &lhs, const BitInterval &rhs) {
return std::tie(lhs.First, lhs.Last) <
std::tie(rhs.First, rhs.Last);
});

std::vector<BitInterval> Merged;
Merged.reserve(OccuppiedIntervals.size());

for (const BitInterval &NextInterval : OccuppiedIntervals) {
if (Merged.empty()) {
Merged.push_back(NextInterval);
continue;
}
auto &LastInterval = Merged.back();

if (NextInterval.First > LastInterval.Last) {
Merged.push_back(NextInterval);
} else {
LastInterval.Last = std::max(LastInterval.Last, NextInterval.Last);
}
}

OccuppiedIntervals = Merged;
}

std::vector<BitInterval> GetPaddingIntervals(uint64_t SizeInBits) const {
std::vector<BitInterval> Results;
if (OccuppiedIntervals.size() == 1 &&
OccuppiedIntervals.front().First == 0 &&
OccuppiedIntervals.end()->Last == SizeInBits) {
return Results;
}
Results.reserve(OccuppiedIntervals.size() + 1);
uint64_t CurrentPos = 0;
for (const BitInterval &OccupiedInterval : OccuppiedIntervals) {
if (OccupiedInterval.First > CurrentPos) {
Results.push_back(BitInterval{CurrentPos, OccupiedInterval.First});
}
CurrentPos = OccupiedInterval.Last;
}
if (SizeInBits > CurrentPos) {
Results.push_back(BitInterval{CurrentPos, SizeInBits});
}
return Results;
}

void ClearPadding(Value *Ptr, const BitInterval &PaddingInterval) {
auto *I8Ptr = CGF.Builder.CreateBitCast(Ptr, CGF.Int8PtrTy);
auto *Zero = ConstantInt::get(CGF.Int8Ty, 0);

// Calculate byte indices and bit positions
auto StartByte = PaddingInterval.First / CharWidth;
auto StartBit = PaddingInterval.First % CharWidth;
auto EndByte = PaddingInterval.Last / CharWidth;
auto EndBit = PaddingInterval.Last % CharWidth;

if (StartByte == EndByte) {
// Interval is within a single byte
auto *Index = ConstantInt::get(CGF.IntTy, StartByte);
auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
Address ElementAddr(Element, CGF.Int8Ty, CharUnits::One());

auto *Value = CGF.Builder.CreateLoad(ElementAddr);

// Create mask to clear bits within the byte
uint8_t mask = ((1 << EndBit) - 1) & ~((1 << StartBit) - 1);
auto *MaskValue = ConstantInt::get(CGF.Int8Ty, mask);
auto *NewValue = CGF.Builder.CreateAnd(Value, MaskValue);

CGF.Builder.CreateStore(NewValue, ElementAddr);
} else {
// Handle the start byte
if (StartBit != 0) {
auto *Index = ConstantInt::get(CGF.IntTy, StartByte);
auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
Address ElementAddr(Element, CGF.Int8Ty, CharUnits::One());

auto *Value = CGF.Builder.CreateLoad(ElementAddr);

uint8_t startMask = ((1 << (CharWidth - StartBit)) - 1) << StartBit;
auto *MaskValue = ConstantInt::get(CGF.Int8Ty, ~startMask);
auto *NewValue = CGF.Builder.CreateAnd(Value, MaskValue);

CGF.Builder.CreateStore(NewValue, ElementAddr);
++StartByte;
}

// Handle full bytes in the middle
for (auto Offset = StartByte; Offset < EndByte; ++Offset) {
auto *Index = ConstantInt::get(CGF.IntTy, Offset);
auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
Address ElementAddr(Element, CGF.Int8Ty, CharUnits::One());

CGF.Builder.CreateStore(Zero, ElementAddr);
}

// Handle the end byte
if (EndBit != 0) {
auto *Index = ConstantInt::get(CGF.IntTy, EndByte);
auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
Address ElementAddr(Element, CGF.Int8Ty, CharUnits::One());

auto *Value = CGF.Builder.CreateLoad(ElementAddr);

uint8_t endMask = (1 << EndBit) - 1;
auto *MaskValue = ConstantInt::get(CGF.Int8Ty, endMask);
auto *NewValue = CGF.Builder.CreateAnd(Value, MaskValue);

CGF.Builder.CreateStore(NewValue, ElementAddr);
}
}
}

CodeGenFunction &CGF;
const uint64_t CharWidth;
std::deque<Data> Queue;
std::vector<BitInterval> OccuppiedIntervals;
};

} // namespace

RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
const CallExpr *E,
ReturnValueSlot ReturnValue) {
Expand Down Expand Up @@ -4839,6 +5117,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,

return RValue::get(Ptr);
}
case Builtin::BI__builtin_clear_padding: {
Address Src = EmitPointerWithAlignment(E->getArg(0));
auto PointeeTy = E->getArg(0)->getType()->getPointeeType();
PaddingClearer clearer{*this};
clearer.run(Src.getBasePointer(), PointeeTy);
return RValue::get(nullptr);
}
case Builtin::BI__sync_fetch_and_add:
case Builtin::BI__sync_fetch_and_sub:
case Builtin::BI__sync_fetch_and_or:
Expand Down
30 changes: 30 additions & 0 deletions clang/lib/Sema/SemaChecking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2899,7 +2899,37 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
return BuiltinIsWithinLifetime(*this, TheCall);
case Builtin::BI__builtin_trivially_relocate:
return BuiltinTriviallyRelocate(*this, TheCall);
case Builtin::BI__builtin_clear_padding: {
const auto numArgs = TheCall->getNumArgs();
if (numArgs < 1) {
Diag(TheCall->getEndLoc(), diag::err_typecheck_call_too_few_args_one)
<< 0 /*function call*/ << "T*" << 0;
return ExprError();
}
if (numArgs > 1) {
Diag(TheCall->getEndLoc(), diag::err_typecheck_call_too_many_args_one)
<< 0 /*function call*/ << "T*" << numArgs << 0;
return ExprError();
}

const Expr *PtrArg = TheCall->getArg(0);
const QualType PtrArgType = PtrArg->getType();
if (!PtrArgType->isPointerType()) {
Diag(PtrArg->getBeginLoc(), diag::err_typecheck_convert_incompatible)
<< PtrArgType << "pointer" << 1 << 0 << 3 << 1 << PtrArgType
<< "pointer";
return ExprError();
}
if (PtrArgType->getPointeeType().isConstQualified()) {
Diag(PtrArg->getBeginLoc(), diag::err_typecheck_assign_const)
<< TheCall->getSourceRange() << 5 /*ConstUnknown*/;
return ExprError();
}
if (RequireCompleteType(PtrArg->getBeginLoc(), PtrArgType->getPointeeType(),
diag::err_typecheck_decl_incomplete_type))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please define an error message that explains what's actually going wrong here, instead of reusing err_typecheck_decl_incomplete_type. (The other errors could also be improved a bit.)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please define an error message that explains what's actually going wrong here, instead of reusing err_typecheck_decl_incomplete_type. (The other errors could also be improved a bit.)

hmm. the current error message looks like this

/tmp/a.cpp:5:29: error: variable has incomplete type 'A'
    5 |     __builtin_clear_padding(a);
      |                             ^
/tmp/a.cpp:1:8: note: forward declaration of 'A'
    1 | struct A;

In my opinion, the error is quite clear. It suggested the user that it does not work with incomplete type.

return ExprError();
break;
}
case Builtin::BI__sync_fetch_and_add:
case Builtin::BI__sync_fetch_and_add_1:
case Builtin::BI__sync_fetch_and_add_2:
Expand Down
Loading