Skip to content

Commit e06ecd3

Browse files
Add support for out-of-order monitored fence completion
1 parent 8707447 commit e06ecd3

File tree

2 files changed

+44
-22
lines changed

2 files changed

+44
-22
lines changed

PresentData/GpuTrace.cpp

Lines changed: 43 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,11 @@ void GpuTrace::PrintRunningContexts() const
5555

5656
printf(" SequenceId=%u", entry.mSequenceId);
5757
if (entry.mPacketTrace == nullptr) {
58-
printf(" WAIT");
58+
if (entry.mCompleted) {
59+
printf(" DONE");
60+
} else {
61+
printf(" WAIT");
62+
}
5963
} else {
6064
printf(" ProcessId=%u", LookupPacketTraceProcessId(entry.mPacketTrace));
6165
}
@@ -295,6 +299,7 @@ void GpuTrace::EnqueueWork(Context* context, uint32_t sequenceId, uint64_t times
295299
auto entry = &node->mQueue[queueIndex];
296300
entry->mPacketTrace = packetTrace;
297301
entry->mSequenceId = sequenceId;
302+
entry->mCompleted = false;
298303
node->mQueueCount += 1;
299304

300305
// If the queue was empty, the packet starts running right away, otherwise
@@ -311,7 +316,6 @@ void GpuTrace::EnqueueWork(Context* context, uint32_t sequenceId, uint64_t times
311316

312317
bool GpuTrace::CompleteWork(Context* context, uint32_t sequenceId, uint64_t timestamp)
313318
{
314-
auto packetTrace = context->mPacketTrace;
315319
auto node = context->mNode;
316320

317321
// It's possible to miss DmaPacket events during realtime analysis, so try
@@ -329,17 +333,17 @@ bool GpuTrace::CompleteWork(Context* context, uint32_t sequenceId, uint64_t time
329333
// actual: [-----] [-----] [-----] [-----]-----]-------]
330334
// ^ ^ x ^ ^ ^ x ^ ^
331335
// s1 i1 s2 i2 s3 i3 s2 i1 s3
332-
if (node->mQueueCount == 0) {
336+
if (context->mPacketTrace == nullptr || node->mQueueCount == 0) {
333337
return false;
334338
}
335339

336340
auto runningSequenceId = node->mQueue[node->mQueueIndex].mSequenceId;
337-
if (packetTrace == nullptr || sequenceId < runningSequenceId) {
341+
if (sequenceId < runningSequenceId) {
338342
return false;
339343
}
340344

341345
// If we get a DmaPacket_Start event with no corresponding DmaPacket_Info,
342-
// then sequenceId will be larger than expected. If this happens, we seach
346+
// then sequenceId will be larger than expected. If this happens, we search
343347
// through the queue for a match and if no match was found then we ignore
344348
// this event (we missed both the DmaPacket_Start and DmaPacket_Info for
345349
// the packet). In this case, both the missing packet's execution time as
@@ -366,37 +370,54 @@ bool GpuTrace::CompleteWork(Context* context, uint32_t sequenceId, uint64_t time
366370
}
367371

368372
uint32_t queueIndex = (node->mQueueIndex + missingCount) % (uint32_t) node->mQueue.size();
369-
if (node->mQueue[queueIndex].mSequenceId == sequenceId) {
370-
// Move current packet into this slot
371-
node->mQueue[queueIndex] = node->mQueue[node->mQueueIndex];
373+
auto entry = &node->mQueue[queueIndex];
374+
if (entry->mSequenceId == sequenceId) {
375+
376+
// On some 3000-series NVIDIA cards using hardware scheduling, we sometimes get
377+
// QueuePacket_Stop events for monitored fence packets out of order (too early).
378+
// This is NOT due to missed events, and any previous render packets should still be
379+
// considered running/enqueued. So, we flag these packets as completed so that it
380+
// is immediately completed once it reaches the front of the queue.
381+
if (entry->mPacketTrace == nullptr) {
382+
entry->mCompleted = true;
383+
return true;
384+
}
385+
386+
// Otherwise, move current packet into this slot
387+
*entry = node->mQueue[node->mQueueIndex];
372388
node->mQueueIndex = queueIndex;
373389
node->mQueueCount -= missingCount;
374390
break;
375391
}
376392
}
377393
}
378394

379-
// Pop the completed packet from the queue.
380-
//
381395
// If this was the process' last executing packet, accumulate the execution
382396
// duration into the process' count.
383-
node->mQueueCount -= 1;
384-
385-
packetTrace = node->mQueue[node->mQueueIndex].mPacketTrace;
386-
if (packetTrace != nullptr) {
387-
packetTrace->mRunningPacketCount -= 1;
388-
if (packetTrace->mRunningPacketCount == 0) {
389-
CompletePacket(packetTrace, timestamp);
397+
auto entry = &node->mQueue[node->mQueueIndex];
398+
if (entry->mPacketTrace != nullptr) {
399+
entry->mPacketTrace->mRunningPacketCount -= 1;
400+
if (entry->mPacketTrace->mRunningPacketCount == 0) {
401+
CompletePacket(entry->mPacketTrace, timestamp);
390402
}
391403
}
392404

393-
// If there was another queued packet, start it
394-
if (node->mQueueCount > 0) {
405+
// Pop the completed packet from the queue, and start the next one.
406+
for (;;) {
395407
node->mQueueIndex = (node->mQueueIndex + 1) % (uint32_t) node->mQueue.size();
408+
node->mQueueCount -= 1;
409+
if (node->mQueueCount == 0) {
410+
break;
411+
}
412+
413+
entry = &node->mQueue[node->mQueueIndex];
414+
if (entry->mPacketTrace != nullptr) {
415+
StartPacket(entry->mPacketTrace, timestamp);
416+
break;
417+
}
396418

397-
packetTrace = node->mQueue[node->mQueueIndex].mPacketTrace;
398-
if (packetTrace != nullptr) {
399-
StartPacket(packetTrace, timestamp);
419+
if (!entry->mCompleted) {
420+
break;
400421
}
401422
}
402423

PresentData/GpuTrace.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ class GpuTrace {
2828
struct EnqueuedPacket {
2929
PacketTrace* mPacketTrace; // Frame trace for this packet
3030
uint32_t mSequenceId; // Sequence ID for this packet
31+
bool mCompleted; // Flag to signal that the packet completed out-of-order
3132
};
3233
std::vector<EnqueuedPacket> mQueue; // Ring buffer of current enqueued packets
3334
uint32_t mQueueIndex; // Index into mQueue for currently-running packet

0 commit comments

Comments
 (0)