77// ===----------------------------------------------------------------------===//
88
99#include " AMDGPUMCExpr.h"
10+ #include " GCNSubtarget.h"
11+ #include " Utils/AMDGPUBaseInfo.h"
12+ #include " llvm/IR/Function.h"
1013#include " llvm/MC/MCContext.h"
1114#include " llvm/MC/MCStreamer.h"
1215#include " llvm/MC/MCSymbol.h"
1619#include < optional>
1720
1821using namespace llvm ;
22+ using namespace llvm ::AMDGPU;
1923
2024AMDGPUVariadicMCExpr::AMDGPUVariadicMCExpr (VariadicKind Kind,
2125 ArrayRef<const MCExpr *> Args,
@@ -61,6 +65,18 @@ void AMDGPUVariadicMCExpr::printImpl(raw_ostream &OS,
6165 case AGVK_Max:
6266 OS << " max(" ;
6367 break ;
68+ case AGVK_ExtraSGPRs:
69+ OS << " extrasgprs(" ;
70+ break ;
71+ case AGVK_TotalNumVGPRs:
72+ OS << " totalnumvgprs(" ;
73+ break ;
74+ case AGVK_AlignTo:
75+ OS << " alignto(" ;
76+ break ;
77+ case AGVK_Occupancy:
78+ OS << " occupancy(" ;
79+ break ;
6480 }
6581 for (auto It = Args.begin (); It != Args.end (); ++It) {
6682 (*It)->print (OS, MAI, /* InParens=*/ false );
@@ -82,10 +98,151 @@ static int64_t op(AMDGPUVariadicMCExpr::VariadicKind Kind, int64_t Arg1,
8298 }
8399}
84100
101+ bool AMDGPUVariadicMCExpr::evaluateExtraSGPRs (MCValue &Res,
102+ const MCAsmLayout *Layout,
103+ const MCFixup *Fixup) const {
104+ auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) {
105+ MCValue MCVal;
106+ if (!Arg->evaluateAsRelocatable (MCVal, Layout, Fixup) ||
107+ !MCVal.isAbsolute ())
108+ return false ;
109+
110+ ConstantValue = MCVal.getConstant ();
111+ return true ;
112+ };
113+
114+ assert (Args.size () == 3 &&
115+ " AMDGPUVariadic Argument count incorrect for ExtraSGPRs" );
116+ const MCSubtargetInfo *STI = Ctx.getSubtargetInfo ();
117+ uint64_t VCCUsed = 0 , FlatScrUsed = 0 , XNACKUsed = 0 ;
118+
119+ bool Success = TryGetMCExprValue (Args[2 ], XNACKUsed);
120+
121+ assert (Success && " Arguments 3 for ExtraSGPRs should be a known constant" );
122+ if (!Success || !TryGetMCExprValue (Args[0 ], VCCUsed) ||
123+ !TryGetMCExprValue (Args[1 ], FlatScrUsed))
124+ return false ;
125+
126+ uint64_t ExtraSGPRs = IsaInfo::getNumExtraSGPRs (
127+ STI, (bool )VCCUsed, (bool )FlatScrUsed, (bool )XNACKUsed);
128+ Res = MCValue::get (ExtraSGPRs);
129+ return true ;
130+ }
131+
132+ bool AMDGPUVariadicMCExpr::evaluateTotalNumVGPR (MCValue &Res,
133+ const MCAsmLayout *Layout,
134+ const MCFixup *Fixup) const {
135+ auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) {
136+ MCValue MCVal;
137+ if (!Arg->evaluateAsRelocatable (MCVal, Layout, Fixup) ||
138+ !MCVal.isAbsolute ())
139+ return false ;
140+
141+ ConstantValue = MCVal.getConstant ();
142+ return true ;
143+ };
144+ assert (Args.size () == 2 &&
145+ " AMDGPUVariadic Argument count incorrect for TotalNumVGPRs" );
146+ const MCSubtargetInfo *STI = Ctx.getSubtargetInfo ();
147+ uint64_t NumAGPR = 0 , NumVGPR = 0 ;
148+
149+ bool Has90AInsts = AMDGPU::isGFX90A (*STI);
150+
151+ if (!TryGetMCExprValue (Args[0 ], NumAGPR) ||
152+ !TryGetMCExprValue (Args[1 ], NumVGPR))
153+ return false ;
154+
155+ uint64_t TotalNum = Has90AInsts && NumAGPR ? alignTo (NumVGPR, 4 ) + NumAGPR
156+ : std::max (NumVGPR, NumAGPR);
157+ Res = MCValue::get (TotalNum);
158+ return true ;
159+ }
160+
161+ bool AMDGPUVariadicMCExpr::evaluateAlignTo (MCValue &Res,
162+ const MCAsmLayout *Layout,
163+ const MCFixup *Fixup) const {
164+ auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) {
165+ MCValue MCVal;
166+ if (!Arg->evaluateAsRelocatable (MCVal, Layout, Fixup) ||
167+ !MCVal.isAbsolute ())
168+ return false ;
169+
170+ ConstantValue = MCVal.getConstant ();
171+ return true ;
172+ };
173+
174+ assert (Args.size () == 2 &&
175+ " AMDGPUVariadic Argument count incorrect for AlignTo" );
176+ uint64_t Value = 0 , Align = 0 ;
177+ if (!TryGetMCExprValue (Args[0 ], Value) || !TryGetMCExprValue (Args[1 ], Align))
178+ return false ;
179+
180+ Res = MCValue::get (alignTo (Value, Align));
181+ return true ;
182+ }
183+
184+ bool AMDGPUVariadicMCExpr::evaluateOccupancy (MCValue &Res,
185+ const MCAsmLayout *Layout,
186+ const MCFixup *Fixup) const {
187+ auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) {
188+ MCValue MCVal;
189+ if (!Arg->evaluateAsRelocatable (MCVal, Layout, Fixup) ||
190+ !MCVal.isAbsolute ())
191+ return false ;
192+
193+ ConstantValue = MCVal.getConstant ();
194+ return true ;
195+ };
196+ assert (Args.size () == 7 &&
197+ " AMDGPUVariadic Argument count incorrect for Occupancy" );
198+ uint64_t InitOccupancy, MaxWaves, Granule, TargetTotalNumVGPRs, Generation,
199+ NumSGPRs, NumVGPRs;
200+
201+ bool Success = true ;
202+ Success &= TryGetMCExprValue (Args[0 ], MaxWaves);
203+ Success &= TryGetMCExprValue (Args[1 ], Granule);
204+ Success &= TryGetMCExprValue (Args[2 ], TargetTotalNumVGPRs);
205+ Success &= TryGetMCExprValue (Args[3 ], Generation);
206+ Success &= TryGetMCExprValue (Args[4 ], InitOccupancy);
207+
208+ assert (Success && " Arguments 1 to 5 for Occupancy should be known constants" );
209+
210+ if (!Success || !TryGetMCExprValue (Args[5 ], NumSGPRs) ||
211+ !TryGetMCExprValue (Args[6 ], NumVGPRs))
212+ return false ;
213+
214+ unsigned Occupancy = InitOccupancy;
215+ if (NumSGPRs)
216+ Occupancy = std::min (
217+ Occupancy, IsaInfo::getOccupancyWithNumSGPRs (
218+ NumSGPRs, MaxWaves,
219+ static_cast <AMDGPUSubtarget::Generation>(Generation)));
220+ if (NumVGPRs)
221+ Occupancy = std::min (Occupancy,
222+ IsaInfo::getNumWavesPerEUWithNumVGPRs (
223+ NumVGPRs, Granule, MaxWaves, TargetTotalNumVGPRs));
224+
225+ Res = MCValue::get (Occupancy);
226+ return true ;
227+ }
228+
85229bool AMDGPUVariadicMCExpr::evaluateAsRelocatableImpl (
86230 MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const {
87231 std::optional<int64_t > Total;
88232
233+ switch (Kind) {
234+ default :
235+ break ;
236+ case AGVK_ExtraSGPRs:
237+ return evaluateExtraSGPRs (Res, Layout, Fixup);
238+ case AGVK_AlignTo:
239+ return evaluateAlignTo (Res, Layout, Fixup);
240+ case AGVK_TotalNumVGPRs:
241+ return evaluateTotalNumVGPR (Res, Layout, Fixup);
242+ case AGVK_Occupancy:
243+ return evaluateOccupancy (Res, Layout, Fixup);
244+ }
245+
89246 for (const MCExpr *Arg : Args) {
90247 MCValue ArgRes;
91248 if (!Arg->evaluateAsRelocatable (ArgRes, Layout, Fixup) ||
@@ -113,3 +270,47 @@ MCFragment *AMDGPUVariadicMCExpr::findAssociatedFragment() const {
113270 }
114271 return nullptr ;
115272}
273+
274+ // / Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed
275+ // / are unresolvable but needed for further MCExprs). Derived from
276+ // / implementation of IsaInfo::getNumExtraSGPRs in AMDGPUBaseInfo.cpp.
277+ // /
278+ const AMDGPUVariadicMCExpr *
279+ AMDGPUVariadicMCExpr::createExtraSGPRs (const MCExpr *VCCUsed,
280+ const MCExpr *FlatScrUsed,
281+ bool XNACKUsed, MCContext &Ctx) {
282+
283+ return create (AGVK_ExtraSGPRs,
284+ {VCCUsed, FlatScrUsed, MCConstantExpr::create (XNACKUsed, Ctx)},
285+ Ctx);
286+ }
287+
288+ const AMDGPUVariadicMCExpr *AMDGPUVariadicMCExpr::createTotalNumVGPR (
289+ const MCExpr *NumAGPR, const MCExpr *NumVGPR, MCContext &Ctx) {
290+ return create (AGVK_TotalNumVGPRs, {NumAGPR, NumVGPR}, Ctx);
291+ }
292+
293+ // / Mimics GCNSubtarget::computeOccupancy for MCExpr.
294+ // /
295+ // / Remove dependency on GCNSubtarget and depend only only the necessary values
296+ // / for said occupancy computation. Should match computeOccupancy implementation
297+ // / without passing \p STM on.
298+ const AMDGPUVariadicMCExpr *
299+ AMDGPUVariadicMCExpr::createOccupancy (unsigned InitOcc, const MCExpr *NumSGPRs,
300+ const MCExpr *NumVGPRs,
301+ const GCNSubtarget &STM, MCContext &Ctx) {
302+ unsigned MaxWaves = IsaInfo::getMaxWavesPerEU (&STM);
303+ unsigned Granule = IsaInfo::getVGPRAllocGranule (&STM);
304+ unsigned TargetTotalNumVGPRs = IsaInfo::getTotalNumVGPRs (&STM);
305+ unsigned Generation = STM.getGeneration ();
306+
307+ auto CreateExpr = [&Ctx](unsigned Value) {
308+ return MCConstantExpr::create (Value, Ctx);
309+ };
310+
311+ return create (AGVK_Occupancy,
312+ {CreateExpr (MaxWaves), CreateExpr (Granule),
313+ CreateExpr (TargetTotalNumVGPRs), CreateExpr (Generation),
314+ CreateExpr (InitOcc), NumSGPRs, NumVGPRs},
315+ Ctx);
316+ }
0 commit comments