66
77#include " llvm/ADT/Triple.h"
88#include " llvm/BinaryFormat/ELF.h"
9+ #include " llvm/IR/Constant.h"
910#include " llvm/IR/Constants.h"
11+ #include " llvm/IR/InstrTypes.h"
12+ #include " llvm/IR/Instructions.h"
1013#include " llvm/IR/Module.h"
1114#include " llvm/MC/MCContext.h"
1215#include " llvm/MC/MCSectionELF.h"
1316#include " llvm/MC/MCStreamer.h"
17+ #include " llvm/Support/ErrorHandling.h"
1418
1519using namespace llvm ;
1620using namespace std ;
1721
1822namespace {
1923
24+ class SerialiseInstructionException {
25+ private:
26+ string S;
27+
28+ public:
29+ SerialiseInstructionException (string S) : S(S) {}
30+ string &what () { return S; }
31+ };
32+
2033const char *SectionName = " .yk_ir" ;
2134const uint32_t Magic = 0xedd5f00d ;
2235const uint32_t Version = 0 ;
2336
2437enum OpCode {
2538 Nop = 0 ,
39+ Load,
40+ Store,
41+ Alloca,
42+ Call,
43+ GetElementPtr,
44+ Branch,
45+ ICmp,
46+ BinaryOperator,
47+ Ret,
48+ UnimplementedInstruction = 255 , // YKFIXME: Will eventually be deleted.
49+ };
50+
51+ enum OperandKind {
52+ Constant = 0 ,
53+ LocalVariable,
54+ String,
55+ };
56+
57+ enum TypeKind {
58+ Integer = 0 ,
59+ UnimplementedType = 255 , // YKFIXME: Will eventually be deleted.
2660};
2761
62+ string valueToString (Value *V) {
63+ string S;
64+ raw_string_ostream SS (S);
65+ V->print (SS);
66+ return S;
67+ }
68+
69+ // Get the index of an element in its parent container.
70+ template <class C , class E > size_t getIndex (C *Container, E *FindElement) {
71+ bool Found = false ;
72+ size_t Idx = 0 ;
73+ for (E &AnElement : *Container) {
74+ if (&AnElement == FindElement) {
75+ Found = true ;
76+ break ;
77+ }
78+ Idx++;
79+ }
80+ assert (Found);
81+ return Idx;
82+ }
83+
84+ #define GENERIC_INST_SERIALISE (LLVM_INST, LLVM_INST_TYPE, YKIR_OPCODE ) \
85+ if (isa<LLVM_INST_TYPE>(LLVM_INST)) { \
86+ serialiseInstGeneric (LLVM_INST, YKIR_OPCODE); \
87+ return ; \
88+ }
89+
2890class YkIRWriter {
2991private:
3092 Module &M;
3193 MCStreamer &OutStreamer;
3294
95+ vector<llvm::Type *> Types;
96+ vector<llvm::Constant *> Constants;
97+
98+ // Return the index of the LLVM type `Ty`, inserting a new entry if
99+ // necessary.
100+ size_t typeIndex (Type *Ty) {
101+ vector<Type *>::iterator Found = std::find (Types.begin (), Types.end (), Ty);
102+ if (Found != Types.end ()) {
103+ return std::distance (Types.begin (), Found);
104+ }
105+ size_t Idx = Types.size ();
106+ Types.push_back (Ty);
107+ return Idx;
108+ }
109+
110+ // Return the index of the LLVM constant `C`, inserting a new entry if
111+ // necessary.
112+ size_t constantIndex (class Constant *C) {
113+ vector<class Constant *>::iterator Found =
114+ std::find (Constants.begin (), Constants.end (), C);
115+ if (Found != Constants.end ()) {
116+ return std::distance (Constants.begin (), Found);
117+ }
118+ size_t Idx = Constants.size ();
119+ Constants.push_back (C);
120+ return Idx;
121+ }
122+
33123public:
34124 YkIRWriter (Module &M, MCStreamer &OutStreamer)
35125 : M(M), OutStreamer(OutStreamer) {}
@@ -40,13 +130,94 @@ class YkIRWriter {
40130 OutStreamer.emitInt8 (0 ); // null terminator.
41131 }
42132
133+ void serialiseOpcode (OpCode Code) { OutStreamer.emitInt8 (Code); }
134+
135+ void serialiseConstantOperand (Instruction *Parent, llvm::Constant *C) {
136+ OutStreamer.emitInt8 (OperandKind::Constant);
137+ OutStreamer.emitSizeT (constantIndex (C));
138+ }
139+
140+ void serialiseLocalVariableOperand (Instruction *I) {
141+ // For now we assume that there is a one to one relationship between LLVM
142+ // instructions and Yk IR instructions, and that the instruction
143+ // (and block) indices are the same in both IRs.
144+ BasicBlock *ParentBlock = I->getParent ();
145+ Function *ParentFunc = ParentBlock->getParent ();
146+
147+ size_t BlockIdx = getIndex (ParentFunc, ParentBlock);
148+ size_t InstIdx = getIndex (ParentBlock, I);
149+
150+ OutStreamer.emitInt8 (OperandKind::LocalVariable);
151+ OutStreamer.emitSizeT (BlockIdx);
152+ OutStreamer.emitSizeT (InstIdx);
153+ }
154+
155+ void serialiseStringOperand (const char *S) {
156+ OutStreamer.emitInt8 (OperandKind::String);
157+ serialiseString (S);
158+ }
159+
160+ // YKFIXME: This allows programs which we haven't yet defined a
161+ // lowering for to compile. For now We just emit a string operand containing
162+ // the unhandled LLVM operand in textual form.
163+ void serialiseUnimplementedOperand (Value *V) {
164+ OutStreamer.emitInt8 (OperandKind::String);
165+ OutStreamer.emitInt8 (' ?' );
166+ serialiseString (valueToString (V));
167+ }
168+
169+ void serialiseOperand (Instruction *Parent, Value *V) {
170+ if (llvm::Constant *C = dyn_cast<llvm::Constant>(V)) {
171+ serialiseConstantOperand (Parent, C);
172+ } else if (Instruction *I = dyn_cast<Instruction>(V)) {
173+ // If an instruction defines the operand, it's a local variable.
174+ serialiseLocalVariableOperand (I);
175+ } else {
176+ serialiseUnimplementedOperand (V);
177+ }
178+ }
179+
180+ // / Does a naiave serialisation of an LLVM instruction by iterating over its
181+ // / operands and serialising them in turn.
182+ void serialiseInstGeneric (Instruction *I, OpCode Opc) {
183+ serialiseOpcode (Opc);
184+ OutStreamer.emitInt32 (I->getNumOperands ());
185+ for (Value *O : I->operands ()) {
186+ serialiseOperand (I, O);
187+ }
188+ }
189+
190+ void serialiseInst (Instruction *I) {
191+ GENERIC_INST_SERIALISE (I, LoadInst, Load)
192+ GENERIC_INST_SERIALISE (I, StoreInst, Store)
193+ GENERIC_INST_SERIALISE (I, AllocaInst, Alloca)
194+ GENERIC_INST_SERIALISE (I, CallInst, Call)
195+ GENERIC_INST_SERIALISE (I, GetElementPtrInst, GetElementPtr)
196+ GENERIC_INST_SERIALISE (I, BranchInst, Branch)
197+ GENERIC_INST_SERIALISE (I, ICmpInst, ICmp)
198+ GENERIC_INST_SERIALISE (I, llvm::BinaryOperator, BinaryOperator)
199+ GENERIC_INST_SERIALISE (I, ReturnInst, Ret)
200+
201+ // GENERIC_INST_SERIALISE does an early return upon a match, so if we get
202+ // here then the instruction wasn't handled.
203+ serialiseUnimplementedInstruction (I);
204+ }
205+
206+ void serialiseUnimplementedInstruction (Instruction *I) {
207+ // opcode:
208+ serialiseOpcode (UnimplementedInstruction);
209+ // num_operands:
210+ OutStreamer.emitInt32 (1 );
211+ // problem instruction:
212+ serialiseStringOperand (valueToString (I).data ());
213+ }
214+
43215 void serialiseBlock (BasicBlock &BB) {
44216 // num_instrs:
45217 OutStreamer.emitSizeT (BB.size ());
46218 // instrs:
47219 for (Instruction &I : BB) {
48- (void )I;
49- OutStreamer.emitInt8 (OpCode::Nop);
220+ serialiseInst (&I);
50221 }
51222 }
52223
@@ -61,6 +232,41 @@ class YkIRWriter {
61232 }
62233 }
63234
235+ void serialiseType (Type *Ty) {
236+ if (IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
237+ OutStreamer.emitInt8 (TypeKind::Integer);
238+ OutStreamer.emitInt32 (ITy->getBitWidth ());
239+ } else {
240+ OutStreamer.emitInt8 (TypeKind::UnimplementedType);
241+ }
242+ }
243+
244+ void serialiseConstantInt (ConstantInt *CI) {
245+ // OutStreamer.emitInt8(OperandKind::Constant);
246+ OutStreamer.emitSizeT (typeIndex (CI->getType ()));
247+ OutStreamer.emitSizeT (CI->getBitWidth () / 8 );
248+ for (size_t I = 0 ; I < CI->getBitWidth (); I += 8 ) {
249+ uint64_t Byte = CI->getValue ().extractBitsAsZExtValue (8 , I);
250+ OutStreamer.emitInt8 (Byte);
251+ }
252+ }
253+
254+ void serialiseUnimplementedConstant (class Constant *C) {
255+ // type_index:
256+ OutStreamer.emitSizeT (typeIndex (C->getType ()));
257+ // num_bytes:
258+ // Just report zero for now.
259+ OutStreamer.emitSizeT (0 );
260+ }
261+
262+ void serialiseConstant (class Constant *C) {
263+ if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
264+ serialiseConstantInt (CI);
265+ } else {
266+ serialiseUnimplementedConstant (C);
267+ }
268+ }
269+
64270 void serialise () {
65271 // header:
66272 OutStreamer.emitInt32 (Magic);
@@ -72,6 +278,20 @@ class YkIRWriter {
72278 for (Function &F : M) {
73279 serialiseFunc (F);
74280 }
281+
282+ // num_types:
283+ OutStreamer.emitSizeT (Types.size ());
284+ // types:
285+ for (Type *&Ty : Types) {
286+ serialiseType (Ty);
287+ }
288+
289+ // num_constants:
290+ OutStreamer.emitSizeT (Constants.size ());
291+ // constants:
292+ for (class Constant *&C : Constants) {
293+ serialiseConstant (C);
294+ }
75295 }
76296};
77297} // anonymous namespace
0 commit comments