@@ -55,7 +55,11 @@ void GpuTrace::PrintRunningContexts() const
5555
5656 printf (" SequenceId=%u" , entry.mSequenceId );
5757 if (entry.mPacketTrace == nullptr ) {
58- printf (" WAIT" );
58+ if (entry.mCompleted ) {
59+ printf (" DONE" );
60+ } else {
61+ printf (" WAIT" );
62+ }
5963 } else {
6064 printf (" ProcessId=%u" , LookupPacketTraceProcessId (entry.mPacketTrace ));
6165 }
@@ -295,6 +299,7 @@ void GpuTrace::EnqueueWork(Context* context, uint32_t sequenceId, uint64_t times
295299 auto entry = &node->mQueue [queueIndex];
296300 entry->mPacketTrace = packetTrace;
297301 entry->mSequenceId = sequenceId;
302+ entry->mCompleted = false ;
298303 node->mQueueCount += 1 ;
299304
300305 // If the queue was empty, the packet starts running right away, otherwise
@@ -311,7 +316,6 @@ void GpuTrace::EnqueueWork(Context* context, uint32_t sequenceId, uint64_t times
311316
312317bool GpuTrace::CompleteWork (Context* context, uint32_t sequenceId, uint64_t timestamp)
313318{
314- auto packetTrace = context->mPacketTrace ;
315319 auto node = context->mNode ;
316320
317321 // It's possible to miss DmaPacket events during realtime analysis, so try
@@ -329,17 +333,17 @@ bool GpuTrace::CompleteWork(Context* context, uint32_t sequenceId, uint64_t time
329333 // actual: [-----] [-----] [-----] [-----]-----]-------]
330334 // ^ ^ x ^ ^ ^ x ^ ^
331335 // s1 i1 s2 i2 s3 i3 s2 i1 s3
332- if (node->mQueueCount == 0 ) {
336+ if (context-> mPacketTrace == nullptr || node->mQueueCount == 0 ) {
333337 return false ;
334338 }
335339
336340 auto runningSequenceId = node->mQueue [node->mQueueIndex ].mSequenceId ;
337- if (packetTrace == nullptr || sequenceId < runningSequenceId) {
341+ if (sequenceId < runningSequenceId) {
338342 return false ;
339343 }
340344
341345 // If we get a DmaPacket_Start event with no corresponding DmaPacket_Info,
342- // then sequenceId will be larger than expected. If this happens, we seach
346+ // then sequenceId will be larger than expected. If this happens, we search
343347 // through the queue for a match and if no match was found then we ignore
344348 // this event (we missed both the DmaPacket_Start and DmaPacket_Info for
345349 // the packet). In this case, both the missing packet's execution time as
@@ -366,37 +370,54 @@ bool GpuTrace::CompleteWork(Context* context, uint32_t sequenceId, uint64_t time
366370 }
367371
368372 uint32_t queueIndex = (node->mQueueIndex + missingCount) % (uint32_t ) node->mQueue .size ();
369- if (node->mQueue [queueIndex].mSequenceId == sequenceId) {
370- // Move current packet into this slot
371- node->mQueue [queueIndex] = node->mQueue [node->mQueueIndex ];
373+ auto entry = &node->mQueue [queueIndex];
374+ if (entry->mSequenceId == sequenceId) {
375+
376+ // On some 3000-series NVIDIA cards using hardware scheduling, we sometimes get
377+ // QueuePacket_Stop events for monitored fence packets out of order (too early).
378+ // This is NOT due to missed events, and any previous render packets should still be
379+ // considered running/enqueued. So, we flag these packets as completed so that it
380+ // is immediately completed once it reaches the front of the queue.
381+ if (entry->mPacketTrace == nullptr ) {
382+ entry->mCompleted = true ;
383+ return true ;
384+ }
385+
386+ // Otherwise, move current packet into this slot
387+ *entry = node->mQueue [node->mQueueIndex ];
372388 node->mQueueIndex = queueIndex;
373389 node->mQueueCount -= missingCount;
374390 break ;
375391 }
376392 }
377393 }
378394
379- // Pop the completed packet from the queue.
380- //
381395 // If this was the process' last executing packet, accumulate the execution
382396 // duration into the process' count.
383- node->mQueueCount -= 1 ;
384-
385- packetTrace = node->mQueue [node->mQueueIndex ].mPacketTrace ;
386- if (packetTrace != nullptr ) {
387- packetTrace->mRunningPacketCount -= 1 ;
388- if (packetTrace->mRunningPacketCount == 0 ) {
389- CompletePacket (packetTrace, timestamp);
397+ auto entry = &node->mQueue [node->mQueueIndex ];
398+ if (entry->mPacketTrace != nullptr ) {
399+ entry->mPacketTrace ->mRunningPacketCount -= 1 ;
400+ if (entry->mPacketTrace ->mRunningPacketCount == 0 ) {
401+ CompletePacket (entry->mPacketTrace , timestamp);
390402 }
391403 }
392404
393- // If there was another queued packet, start it
394- if (node-> mQueueCount > 0 ) {
405+ // Pop the completed packet from the queue, and start the next one.
406+ for (;; ) {
395407 node->mQueueIndex = (node->mQueueIndex + 1 ) % (uint32_t ) node->mQueue .size ();
408+ node->mQueueCount -= 1 ;
409+ if (node->mQueueCount == 0 ) {
410+ break ;
411+ }
412+
413+ entry = &node->mQueue [node->mQueueIndex ];
414+ if (entry->mPacketTrace != nullptr ) {
415+ StartPacket (entry->mPacketTrace , timestamp);
416+ break ;
417+ }
396418
397- packetTrace = node->mQueue [node->mQueueIndex ].mPacketTrace ;
398- if (packetTrace != nullptr ) {
399- StartPacket (packetTrace, timestamp);
419+ if (!entry->mCompleted ) {
420+ break ;
400421 }
401422 }
402423
0 commit comments