@@ -113,7 +113,7 @@ def FindBB(bbs, ins): # find BB of an instruction
113113
114114 return None , None # never reaches here
115115
116- def AnalyzeSbidStalls (kernel , args , df , report ):
116+ def AnalyzeStalls (kernel , args , stalldf , report ):
117117 shaders = os .listdir (args .shaderdump )
118118 files = [] # .asm files
119119
@@ -172,9 +172,6 @@ def AnalyzeSbidStalls(kernel, args, df, report):
172172
173173 asm = asm + ".ip"
174174
175- df = df [["IP[Address]" , "SbidStall[Events]" ]]
176- df = df [df ["SbidStall[Events]" ] > 0 ] # drop 0s
177- df = df .sort_values (by = ["SbidStall[Events]" ], ascending = False )
178175 source_available = False
179176 instructions = []
180177 with open (asm , "r" ) as inf :
@@ -189,9 +186,14 @@ def AnalyzeSbidStalls(kernel, args, df, report):
189186
190187 bbs = ConstructCFG (instructions )
191188
189+ df = stalldf [["IP[Address]" , "SbidStall[Events]" ]]
190+ df = df [df ["SbidStall[Events]" ] > 0 ] # drop 0s
191+ df = df .sort_values (by = ["SbidStall[Events]" ], ascending = False )
192+
192193 print ("Kernel: " + kernel , file = report )
193194 print ("Assembly with instruction addresses: " + asm , file = report )
194- print ("SBID stalls: " , file = report )
195+ print ("***********************************************************************************************" , file = report )
196+ print ("Sbid Stalls: " , file = report )
195197
196198 for index , row in df .iterrows ():
197199 ip = row ["IP[Address]" ]
@@ -200,15 +202,24 @@ def AnalyzeSbidStalls(kernel, args, df, report):
200202 if ((ins .startswith ("//" ) == False ) and ("//" in ins )):
201203 if (ins .startswith ("/* [" + str ('{:08X}' .format (pc ))+ "] */ " ) == True ): # found stalled instruction
202204 words = ins .split ("{" )
203- if (len (words ) < 2 ):
204- break # invalid
205-
206205 sbids_stalled = []
207- for token in words [1 ].split ("}" )[0 ].split ("," ):
208- if (token .startswith ("$" ) == True ):
209- subtokens = token .split ("." )
210- if (len (subtokens ) > 1 ):
211- sbids_stalled .append (subtokens [0 ])
206+ if (len (words ) >= 2 ):
207+ for token in words [1 ].split ("}" )[0 ].split ("," ):
208+ if (token .startswith ("$" ) == True ):
209+ sbids_stalled .append (token )
210+ if (len (sbids_stalled ) == 0 ):
211+ words = ins .split ("(" ) # check if SBID tokens are in (...)
212+ if (len (words ) >= 2 ):
213+ i = 1
214+ done = False
215+ while (done == False ):
216+ for token in words [i ].split (")" )[0 ].split ("," ):
217+ if (token .startswith ("$" ) == True ):
218+ sbids_stalled .append (token )
219+ done = True
220+ i = i + 1
221+ if (i == len (words )): # all words are inspected
222+ break
212223
213224 ins_stalled_not_line_resolved = addr
214225 ins_stalled_not_file_resolved = addr
@@ -229,18 +240,26 @@ def AnalyzeSbidStalls(kernel, args, df, report):
229240 bid , start , end = bbs_to_check [j ]
230241 for addr2 , ins2 in enumerate (reversed (instructions [start : end + 1 ])):
231242 if (len (sbids_stalled ) > 0 ):
232- if ((ins2 .startswith ("//" ) == False ) and ("//" in ins2 )):
243+ if ((ins2 .startswith ("//" ) == False ) and ("//" in ins2 ) and ( re . match ( "/\* *\[" , ins2 ) is not None ) ):
233244 tokens = ins2 .split ("{" )
234245 if (len (tokens ) > 1 ):
235- sbids = []
246+ sbids_stall = []
236247 for token in tokens [1 ].split ("}" )[0 ].split ("," ):
237248 if (token .startswith ("$" ) == True ):
238- sbids .append (token )
249+ sbids_stall .append (token )
239250 for sbid in sbids_stalled :
240- if (sbid in sbids ): # instruction stalled depends on ins2
241- sbids_stalled .remove (sbid ) # remove sbid from the sbids of the instruction stalled
242- ins_stall_not_line_resolved .append (end - addr2 ) # source lines/files to be resolved
243- ins_stall_not_file_resolved .append (end - addr2 )
251+ for sbid2 in sbids_stall :
252+ if (len (sbid .split ("." )) > 1 ): # .dst or .src in bid
253+ if (sbid2 == sbid .split ("." )[0 ]):
254+ sbids_stalled .remove (sbid ) # remove sbid from the sbids of the instruction stalled
255+ ins_stall_not_line_resolved .append (end - addr2 ) # source lines/files to be resolved
256+ ins_stall_not_file_resolved .append (end - addr2 )
257+ else :
258+ if (sbid == sbid2 .split ("." )[0 ]): # stalled ins depends on ins2 or dependency already resolved
259+ sbids_stalled .remove (sbid ) # remove sbid from the sbids of the instruction stalled
260+ if ((sbid == sbid2 ) and ("sync." not in ins2 )): # ins2 not a sync. ins depends on ins2
261+ ins_stall_not_line_resolved .append (end - addr2 ) # source lines/files to be resolved
262+ ins_stall_not_file_resolved .append (end - addr2 )
244263
245264 if (re .match ("// *Line" , ins2 ) is not None ):
246265 if (ins_stalled_not_line_resolved != None ): # source line of stalled instruction
@@ -300,6 +319,58 @@ def AnalyzeSbidStalls(kernel, args, df, report):
300319 print ("is stalled" , file = report )
301320
302321 break
322+
323+
324+ # analyze stalls of other types
325+
326+ type = ["ControlStall[Events]" , "PipeStall[Events]" , "SendStall[Events]" , "DistStall[Events]" , "SyncStall[Events]" , "InstrFetchStall[Events]" , "OtherStall[Events]" ]
327+ for t in type :
328+ df = stalldf [["IP[Address]" , t ]]
329+ df = df [df [t ] > 0 ] # drop 0s
330+ if (df .shape [0 ] == 0 ): # zero stalls. move to the next type
331+ continue
332+ df = df .sort_values (by = [t ], ascending = False )
333+
334+ print ("***********************************************************************************************" , file = report )
335+ print (t .split ("Stall" )[0 ] + " Stalls: " , file = report )
336+
337+ for index , row in df .iterrows ():
338+ ip = row ["IP[Address]" ]
339+ pc = int (ip , 16 )
340+ for addr , ins in enumerate (instructions ):
341+ if ((ins .startswith ("//" ) == False ) and ("//" in ins )):
342+ if (ins .startswith ("/* [" + str ('{:08X}' .format (pc ))+ "] */ " ) == True ): # found stalled instruction
343+ if (source_available == True ):
344+ ins_stalled_not_line_resolved = addr
345+ ins_stalled_not_file_resolved = addr
346+ source_line_stalled = None
347+ source_file_stalled = None
348+
349+ for addr2 , ins2 in enumerate (reversed (instructions [0 : addr ])):
350+ if (re .match ("// *Line" , ins2 ) is not None ):
351+ if (ins_stalled_not_line_resolved != None ): # source line of stalled instruction
352+ source_line_stalled = addr - 1 - addr2
353+ ins_stalled_not_line_resolved = None
354+
355+ if (re .match ("// *File" , ins2 ) is not None ):
356+ if (ins_stalled_not_file_resolved != None ): # source file of stalled instruction
357+ source_file_stalled = addr - 1 - addr2
358+ ins_stalled_not_file_resolved = None
359+
360+ if ((ins_stalled_not_line_resolved == None ) and (ins_stalled_not_file_resolved == None )):
361+ break # we are done
362+
363+ print ("\n Instruction" , file = report )
364+ print (" " + ins , file = report )
365+ if (source_line_stalled != None ):
366+ print (" " + instructions [source_line_stalled ][3 :], file = report )
367+ if (source_file_stalled != None ):
368+ print (" " + instructions [source_file_stalled ][3 :], file = report )
369+
370+ print ("is stalled" , file = report )
371+
372+ break
373+
303374 print ("===============================================================================================" , file = report )
304375
305376def AnalyzeStallMetrics (args , header , last ):
@@ -357,7 +428,7 @@ def AnalyzeStallMetrics(args, header, last):
357428 plt .close (fig ) # close figure to save memory
358429
359430 if (args .shaderdump is not None ):
360- AnalyzeSbidStalls (kernel , args , df2 , report_out )
431+ AnalyzeStalls (kernel , args , df2 , report_out )
361432
362433 print ("\n Analyzed kernel " + kernel )
363434
@@ -396,7 +467,7 @@ def AnalyzeStallMetrics(args, header, last):
396467 plt .close (fig ) # close figure to save memory
397468
398469 if (args .shaderdump is not None ):
399- AnalyzeSbidStalls (kernel , args , df2 , report_out )
470+ AnalyzeStalls (kernel , args , df2 , report_out )
400471
401472 print ("\n Analyzed kernel " + kernel )
402473 if (p != None ):
@@ -405,7 +476,7 @@ def AnalyzeStallMetrics(args, header, last):
405476
406477 if ((args .shaderdump is not None ) and (args .report is not None )):
407478 report_out .close ()
408- print ("SBID stall report is in file " + args .report )
479+ print ("Stall report is in file " + args .report )
409480
410481 else :
411482 counting = True
@@ -455,12 +526,12 @@ def AnalyzeStallMetrics(args, header, last):
455526 plt .savefig (args .output )
456527
457528 if (args .shaderdump is not None ):
458- AnalyzeSbidStalls (kernel , args , df2 , report_out )
529+ AnalyzeStalls (kernel , args , df2 , report_out )
459530
460531 print ("\n Stall metric chart in file " + args .output + " has been successfully generated." )
461532 if ((args .shaderdump is not None ) and (args .report is not None )):
462533 report_out .close ()
463- print ("SBID stall report is in file " + args .report )
534+ print ("Stall report is in file " + args .report )
464535
465536def PlotKernelInstancePerfMetrics (args , kernel , df , metrics ):
466537 k = 0
@@ -536,7 +607,7 @@ def AnalyzePerfMetrics(args, header, last):
536607 ax = df3 .plot (y = metrics_cleansed , kind = 'line' , xlabel = args .xlabel , ylabel = args .ylabel )
537608 else :
538609 ax = df3 .plot (y = metrics_cleansed , kind = 'bar' , xlabel = args .xlabel , ylabel = args .ylabel )
539-
610+
540611 plt .grid (visible = True , which = 'both' , axis = 'y' )
541612 plt .legend (loc = 'best' , fontsize = 4 )
542613 plt .title (label = args .title + "\n (" + kernel + ")" , loc = 'center' , fontsize = 8 , wrap = True )
0 commit comments