3636#include " llvm/BinaryFormat/COFF.h"
3737#include " llvm/BinaryFormat/Dwarf.h"
3838#include " llvm/BinaryFormat/ELF.h"
39+ #include " llvm/Bitcode/BitcodeWriter.h"
3940#include " llvm/CodeGen/GCMetadata.h"
4041#include " llvm/CodeGen/GCMetadataPrinter.h"
4142#include " llvm/CodeGen/MachineBasicBlock.h"
@@ -141,6 +142,10 @@ static cl::opt<bool>
141142 DisableDebugInfoPrinting (" disable-debug-info-print" , cl::Hidden,
142143 cl::desc (" Disable debug info printing" ));
143144
145+ static cl::opt<bool >
146+ EmbedBitcodeFinal (" embed-bitcode-final" , cl::NotHidden,
147+ cl::desc (" Embed final IR as bitcode after all optimisations and transformations have run." ));
148+
144149const char DWARFGroupName[] = " dwarf" ;
145150const char DWARFGroupDescription[] = " DWARF Emission" ;
146151const char DbgTimerName[] = " emit" ;
@@ -1151,6 +1156,36 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
11511156 OutStreamer->emitSymbolValue (FunctionSymbol, getPointerSize ());
11521157 // Emit the total number of basic blocks in this function.
11531158 OutStreamer->emitULEB128IntValue (MF.size ());
1159+ const Function &F = MF.getFunction ();
1160+
1161+ // LLVM's codegen can can merge multiple BasicBlocks into a single
1162+ // MachineBasicBlock. Unfortunately, MachineBasicBlock::getBasicBlock() only
1163+ // returns the first BasicBlock in the merged sequence, so we have to find
1164+ // the other corresponding BasicBlock(s) (if any) in the merged sequence
1165+ // another way. We do so in two steps:
1166+ //
1167+ // 1. We create a set, MergedBBs, which is the set of BasicBlocks that are
1168+ // *not* returned by MachineBasicBlock::getBasicBlock(MBB) for any
1169+ // MachineBasicBlock, MBB, in the parent MachineFunction -- in other words,
1170+ // it's the set of BasicBlocks that have been merged into a predecessor
1171+ // during codegen.
1172+ //
1173+ // 2. For each BasicBlock BBX returned by
1174+ // MachineBasicBlock::getBasicBlock() we check if it is terminated by an
1175+ // unconditional branch. If so and that unconditional branch transfers to a
1176+ // block BBY, and BBY is a member of MergedBBs, then we know that BBX and
1177+ // BBY were merged during codegen. [Note that we then see if another BBZ
1178+ // was also merged into BBY and so on]
1179+ std::set<const BasicBlock *> MergedBBs;
1180+ for (const BasicBlock &BB : F) {
1181+ MergedBBs.insert (&BB);
1182+ }
1183+ for (const MachineBasicBlock &MBB : MF) {
1184+ const BasicBlock *BB = MBB.getBasicBlock ();
1185+ if (BB != nullptr ) {
1186+ MergedBBs.erase (BB);
1187+ }
1188+ }
11541189 // Emit BB Information for each basic block in the funciton.
11551190 for (const MachineBasicBlock &MBB : MF) {
11561191 const MCSymbol *MBBSymbol =
@@ -1161,6 +1196,43 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
11611196 // always be computed from their offsets.
11621197 emitLabelDifferenceAsULEB128 (MBB.getEndSymbol (), MBBSymbol);
11631198 OutStreamer->emitULEB128IntValue (getBBAddrMapMetadata (MBB));
1199+ // Find BBs corresponding with this MBB as described above.
1200+ const BasicBlock *CorrBB = MBB.getBasicBlock ();
1201+ std::vector<const BasicBlock *> CorrBBs;
1202+ while (CorrBB != nullptr ) {
1203+ CorrBBs.push_back (CorrBB);
1204+ const Instruction *Term = CorrBB->getTerminator ();
1205+ assert (Term != nullptr );
1206+ if ((isa<BranchInst>(Term)) &&
1207+ (!(dyn_cast<const BranchInst>(Term))->isConditional ()))
1208+ {
1209+ CorrBB = CorrBB->getUniqueSuccessor ();
1210+ assert (CorrBB != nullptr );
1211+ if (MergedBBs.count (CorrBB) == 0 ) {
1212+ CorrBB = nullptr ;
1213+ }
1214+ } else {
1215+ CorrBB = nullptr ;
1216+ }
1217+ }
1218+ // Emit the number of corresponding BasicBlocks.
1219+ OutStreamer->emitULEB128IntValue (CorrBBs.size ());
1220+ // Emit the corresponding block indices.
1221+ for (auto CorrBB : CorrBBs) {
1222+ size_t I = 0 ;
1223+ bool Found = false ;
1224+ for (auto It = F.begin (); It != F.end (); It++) {
1225+ const BasicBlock *BB = &*It;
1226+ if (BB == CorrBB) {
1227+ Found = true ;
1228+ break ;
1229+ }
1230+ I++;
1231+ }
1232+ if (!Found)
1233+ OutContext.reportError (SMLoc (), " Couldn't find the block's index" );
1234+ OutStreamer->emitULEB128IntValue (I);
1235+ }
11641236 }
11651237 OutStreamer->PopSection ();
11661238}
@@ -1711,6 +1783,18 @@ void AsmPrinter::emitRemarksSection(remarks::RemarkStreamer &RS) {
17111783}
17121784
17131785bool AsmPrinter::doFinalization (Module &M) {
1786+ // The `embed-bitcode` flag serialises the IR after only architecture
1787+ // agnostic optimisations have been run, but then proceeds to apply other
1788+ // optimisations and transformations afterwards. Sometimes this final version
1789+ // is precisely what we are interested in. The `embed-bitcode-final` flag
1790+ // waits until all optimisations/transformations have been run before
1791+ // embedding the IR.
1792+ if (EmbedBitcodeFinal)
1793+ llvm::EmbedBitcodeInModule (M, llvm::MemoryBufferRef (),
1794+ /* EmbedBitcode*/ true ,
1795+ /* EmbedCmdline*/ false ,
1796+ /* CmdArgs*/ std::vector<uint8_t >());
1797+
17141798 // Set the MachineFunction to nullptr so that we can catch attempted
17151799 // accesses to MF specific features at the module level and so that
17161800 // we can conditionalize accesses based on whether or not it is nullptr.
0 commit comments