@@ -19,6 +19,7 @@ public class Engine : IEngine
1919
2020 [ PublicAPI ] public IHost Host { get ; }
2121 [ PublicAPI ] public Action < long > WorkloadAction { get ; }
22+ [ PublicAPI ] public Action < long > WorkloadActionNoUnroll { get ; }
2223 [ PublicAPI ] public Action Dummy1Action { get ; }
2324 [ PublicAPI ] public Action Dummy2Action { get ; }
2425 [ PublicAPI ] public Action Dummy3Action { get ; }
@@ -44,19 +45,22 @@ public class Engine : IEngine
4445 private readonly EnginePilotStage pilotStage ;
4546 private readonly EngineWarmupStage warmupStage ;
4647 private readonly EngineActualStage actualStage ;
47- private readonly bool includeExtraStats ;
4848 private readonly Random random ;
49+ private readonly bool includeExtraStats , includeSurvivedMemory ;
50+
51+ private long ? survivedBytes ;
52+ private bool survivedBytesMeasured ;
4953
5054 internal Engine (
5155 IHost host ,
5256 IResolver resolver ,
53- Action dummy1Action , Action dummy2Action , Action dummy3Action , Action < long > overheadAction , Action < long > workloadAction , Job targetJob ,
57+ Action dummy1Action , Action dummy2Action , Action dummy3Action , Action < long > overheadAction , Action < long > workloadAction , Action < long > workloadActionNoUnroll , Job targetJob ,
5458 Action globalSetupAction , Action globalCleanupAction , Action iterationSetupAction , Action iterationCleanupAction , long operationsPerInvoke ,
55- bool includeExtraStats , string benchmarkName )
59+ bool includeExtraStats , bool includeSurvivedMemory , string benchmarkName )
5660 {
57-
5861 Host = host ;
5962 OverheadAction = overheadAction ;
63+ WorkloadActionNoUnroll = workloadActionNoUnroll ;
6064 Dummy1Action = dummy1Action ;
6165 Dummy2Action = dummy2Action ;
6266 Dummy3Action = dummy3Action ;
@@ -69,6 +73,7 @@ internal Engine(
6973 OperationsPerInvoke = operationsPerInvoke ;
7074 this . includeExtraStats = includeExtraStats ;
7175 BenchmarkName = benchmarkName ;
76+ this . includeSurvivedMemory = includeSurvivedMemory ;
7277
7378 Resolver = resolver ;
7479
@@ -84,6 +89,23 @@ internal Engine(
8489 actualStage = new EngineActualStage ( this ) ;
8590
8691 random = new Random ( 12345 ) ; // we are using constant seed to try to get repeatable results
92+
93+ if ( includeSurvivedMemory && GcStats . InitSurvivedBytes ( ) )
94+ {
95+ // Measure bytes to allow GC monitor to make its allocations.
96+ GcStats . GetTotalBytes ( ) ;
97+ // Run the clock once to allow it to make its allocations.
98+ MeasureAction ( _ => { } , 0 ) ;
99+ GcStats . GetTotalBytes ( ) ;
100+ }
101+ }
102+
103+ internal Engine WithInitialData ( Engine other )
104+ {
105+ // Copy the survived bytes from the other engine so we only measure it once.
106+ survivedBytes = other . survivedBytes ;
107+ survivedBytesMeasured = other . survivedBytesMeasured ;
108+ return this ;
87109 }
88110
89111 public void Dispose ( )
@@ -168,10 +190,36 @@ public Measurement RunIteration(IterationData data)
168190
169191 Span < byte > stackMemory = randomizeMemory ? stackalloc byte [ random . Next ( 32 ) ] : Span < byte > . Empty ;
170192
171- // Measure
172- var clock = Clock . Start ( ) ;
173- action ( invokeCount / unrollFactor ) ;
174- var clockSpan = clock . GetElapsed ( ) ;
193+ bool needsSurvivedMeasurement = includeSurvivedMemory && ! isOverhead && ! survivedBytesMeasured ;
194+ double nanoseconds ;
195+ if ( needsSurvivedMeasurement )
196+ {
197+ // Measure survived bytes for only the first invocation.
198+ survivedBytesMeasured = true ;
199+ if ( totalOperations == 1 )
200+ {
201+ // Measure normal invocation for both survived memory and time.
202+ long ? beforeBytes = GcStats . GetTotalBytes ( ) ;
203+ nanoseconds = MeasureAction ( action , invokeCount / unrollFactor ) ;
204+ long ? afterBytes = GcStats . GetTotalBytes ( ) ;
205+ survivedBytes = afterBytes - beforeBytes ;
206+ }
207+ else
208+ {
209+ // Measure a single invocation for survived memory, plus normal invocations for time.
210+ ++ totalOperations ;
211+ long ? beforeBytes = GcStats . GetTotalBytes ( ) ;
212+ nanoseconds = MeasureAction ( WorkloadActionNoUnroll , 1 ) ;
213+ long ? afterBytes = GcStats . GetTotalBytes ( ) ;
214+ survivedBytes = afterBytes - beforeBytes ;
215+ nanoseconds += MeasureAction ( action , invokeCount / unrollFactor ) ;
216+ }
217+ }
218+ else
219+ {
220+ // Measure time normally.
221+ nanoseconds = MeasureAction ( action , invokeCount / unrollFactor ) ;
222+ }
175223
176224 if ( EngineEventSource . Log . IsEnabled ( ) )
177225 EngineEventSource . Log . IterationStop ( data . IterationMode , data . IterationStage , totalOperations ) ;
@@ -185,7 +233,7 @@ public Measurement RunIteration(IterationData data)
185233 GcCollect ( ) ;
186234
187235 // Results
188- var measurement = new Measurement ( 0 , data . IterationMode , data . IterationStage , data . Index , totalOperations , clockSpan . GetNanoseconds ( ) ) ;
236+ var measurement = new Measurement ( 0 , data . IterationMode , data . IterationStage , data . Index , totalOperations , nanoseconds ) ;
189237 WriteLine ( measurement . ToString ( ) ) ;
190238 if ( measurement . IterationStage == IterationStage . Jitting )
191239 jittingMeasurements . Add ( measurement ) ;
@@ -195,6 +243,15 @@ public Measurement RunIteration(IterationData data)
195243 return measurement ;
196244 }
197245
246+ // This is necessary for the CORE runtime to clean up the memory from the clock.
247+ [ MethodImpl ( MethodImplOptions . NoInlining ) ]
248+ private double MeasureAction ( Action < long > action , long arg )
249+ {
250+ var clock = Clock . Start ( ) ;
251+ action ( arg ) ;
252+ return clock . GetElapsed ( ) . GetNanoseconds ( ) ;
253+ }
254+
198255 private ( GcStats , ThreadingStats , double ) GetExtraStats ( IterationData data )
199256 {
200257 // we enable monitoring after main target run, for this single iteration which is executed at the end
@@ -218,8 +275,8 @@ public Measurement RunIteration(IterationData data)
218275 IterationCleanupAction ( ) ; // we run iteration cleanup after collecting GC stats
219276
220277 var totalOperationsCount = data . InvokeCount * OperationsPerInvoke ;
221- GcStats gcStats = ( finalGcStats - initialGcStats ) . WithTotalOperations ( totalOperationsCount ) ;
222- ThreadingStats threadingStats = ( finalThreadingStats - initialThreadingStats ) . WithTotalOperations ( data . InvokeCount * OperationsPerInvoke ) ;
278+ GcStats gcStats = ( finalGcStats - initialGcStats ) . WithTotalOperationsAndSurvivedBytes ( totalOperationsCount , survivedBytes ) ;
279+ ThreadingStats threadingStats = ( finalThreadingStats - initialThreadingStats ) . WithTotalOperations ( totalOperationsCount ) ;
223280
224281 return ( gcStats , threadingStats , exceptionsStats . ExceptionsCount / ( double ) totalOperationsCount ) ;
225282 }
@@ -253,7 +310,7 @@ private void GcCollect()
253310 ForceGcCollect ( ) ;
254311 }
255312
256- private static void ForceGcCollect ( )
313+ internal static void ForceGcCollect ( )
257314 {
258315 GC . Collect ( ) ;
259316 GC . WaitForPendingFinalizers ( ) ;
0 commit comments