@@ -20,6 +20,7 @@ public class Engine : IEngine
2020
2121 [ PublicAPI ] public IHost Host { get ; }
2222 [ PublicAPI ] public Action < long > WorkloadAction { get ; }
23+ [ PublicAPI ] public Action < long > WorkloadActionNoUnroll { get ; }
2324 [ PublicAPI ] public Action Dummy1Action { get ; }
2425 [ PublicAPI ] public Action Dummy2Action { get ; }
2526 [ PublicAPI ] public Action Dummy3Action { get ; }
@@ -44,19 +45,24 @@ public class Engine : IEngine
4445 private readonly EnginePilotStage pilotStage ;
4546 private readonly EngineWarmupStage warmupStage ;
4647 private readonly EngineActualStage actualStage ;
47- private readonly bool includeExtraStats ;
4848 private readonly Random random ;
49+ private readonly bool includeExtraStats , includeSurvivedMemory ;
50+
51+ // These must be static since more than one Engine is used.
52+ private static long survivedBytes ;
53+ private static bool survivedBytesMeasured ;
54+ private static Func < long > GetTotalBytes { get ; set ; }
4955
5056 internal Engine (
5157 IHost host ,
5258 IResolver resolver ,
53- Action dummy1Action , Action dummy2Action , Action dummy3Action , Action < long > overheadAction , Action < long > workloadAction , Job targetJob ,
59+ Action dummy1Action , Action dummy2Action , Action dummy3Action , Action < long > overheadAction , Action < long > workloadAction , Action < long > workloadActionNoUnroll , Job targetJob ,
5460 Action globalSetupAction , Action globalCleanupAction , Action iterationSetupAction , Action iterationCleanupAction , long operationsPerInvoke ,
55- bool includeExtraStats , string benchmarkName )
61+ bool includeExtraStats , bool includeSurvivedMemory , string benchmarkName )
5662 {
57-
5863 Host = host ;
5964 OverheadAction = overheadAction ;
65+ WorkloadActionNoUnroll = workloadActionNoUnroll ;
6066 Dummy1Action = dummy1Action ;
6167 Dummy2Action = dummy2Action ;
6268 Dummy3Action = dummy3Action ;
@@ -69,6 +75,7 @@ internal Engine(
6975 OperationsPerInvoke = operationsPerInvoke ;
7076 this . includeExtraStats = includeExtraStats ;
7177 BenchmarkName = benchmarkName ;
78+ this . includeSurvivedMemory = includeSurvivedMemory ;
7279
7380 Resolver = resolver ;
7481
@@ -84,6 +91,53 @@ internal Engine(
8491 actualStage = new EngineActualStage ( this ) ;
8592
8693 random = new Random ( 12345 ) ; // we are using constant seed to try to get repeatable results
94+
95+ if ( includeSurvivedMemory && GetTotalBytes is null )
96+ {
97+ // CreateGetTotalBytesFunc enables monitoring, so we only call it if we need to measure survived memory.
98+ GetTotalBytes = CreateGetTotalBytesFunc ( ) ;
99+
100+ // Necessary for CORE runtimes.
101+ // Measure bytes to allow GC monitor to make its allocations.
102+ GetTotalBytes ( ) ;
103+ // Run the clock once to allow it to make its allocations.
104+ MeasureAction ( _ => { } , 0 ) ;
105+ GetTotalBytes ( ) ;
106+ }
107+ }
108+
109+ private static Func < long > CreateGetTotalBytesFunc ( )
110+ {
111+ // Don't try to measure in Mono, Monitoring is not available, and GC.GetTotalMemory is very inaccurate.
112+ if ( RuntimeInformation . IsMono )
113+ return ( ) => 0 ;
114+ try
115+ {
116+ // Docs say this should be available in .NET Core 2.1, but it throws an exception.
117+ // Just try this on all non-Mono runtimes, fallback to GC.GetTotalMemory.
118+ AppDomain . MonitoringIsEnabled = true ;
119+ return ( ) =>
120+ {
121+ // Enforce GC.Collect here to make sure we get accurate results.
122+ ForceGcCollect ( ) ;
123+ return AppDomain . CurrentDomain . MonitoringSurvivedMemorySize ;
124+ } ;
125+ }
126+ catch
127+ {
128+ return ( ) =>
129+ {
130+ // Enforce GC.Collect here to make sure we get accurate results.
131+ ForceGcCollect ( ) ;
132+ return GC . GetTotalMemory ( true ) ;
133+ } ;
134+ }
135+ }
136+
137+ private static void ResetSurvived ( )
138+ {
139+ survivedBytes = 0 ;
140+ survivedBytesMeasured = false ;
87141 }
88142
89143 public void Dispose ( )
@@ -155,7 +209,9 @@ public Measurement RunIteration(IterationData data)
155209 var action = isOverhead ? OverheadAction : WorkloadAction ;
156210
157211 if ( ! isOverhead )
212+ {
158213 IterationSetupAction ( ) ;
214+ }
159215
160216 GcCollect ( ) ;
161217
@@ -164,10 +220,36 @@ public Measurement RunIteration(IterationData data)
164220
165221 Span < byte > stackMemory = randomizeMemory ? stackalloc byte [ random . Next ( 32 ) ] : Span < byte > . Empty ;
166222
167- // Measure
168- var clock = Clock . Start ( ) ;
169- action ( invokeCount / unrollFactor ) ;
170- var clockSpan = clock . GetElapsed ( ) ;
223+ bool needsSurvivedMeasurement = includeSurvivedMemory && ! isOverhead && ! survivedBytesMeasured ;
224+ double nanoseconds ;
225+ if ( needsSurvivedMeasurement )
226+ {
227+ // Measure survived bytes for only the first invocation.
228+ survivedBytesMeasured = true ;
229+ if ( totalOperations == 1 )
230+ {
231+ // Measure normal invocation for both survived memory and time.
232+ long beforeBytes = GetTotalBytes ( ) ;
233+ nanoseconds = MeasureAction ( action , invokeCount / unrollFactor ) ;
234+ long afterBytes = GetTotalBytes ( ) ;
235+ survivedBytes = afterBytes - beforeBytes ;
236+ }
237+ else
238+ {
239+ // Measure a single invocation for survived memory, plus normal invocations for time.
240+ ++ totalOperations ;
241+ long beforeBytes = GetTotalBytes ( ) ;
242+ nanoseconds = MeasureAction ( WorkloadActionNoUnroll , 1 ) ;
243+ long afterBytes = GetTotalBytes ( ) ;
244+ survivedBytes = afterBytes - beforeBytes ;
245+ nanoseconds += MeasureAction ( action , invokeCount / unrollFactor ) ;
246+ }
247+ }
248+ else
249+ {
250+ // Measure time normally.
251+ nanoseconds = MeasureAction ( action , invokeCount / unrollFactor ) ;
252+ }
171253
172254 if ( EngineEventSource . Log . IsEnabled ( ) )
173255 EngineEventSource . Log . IterationStop ( data . IterationMode , data . IterationStage , totalOperations ) ;
@@ -181,14 +263,23 @@ public Measurement RunIteration(IterationData data)
181263 GcCollect ( ) ;
182264
183265 // Results
184- var measurement = new Measurement ( 0 , data . IterationMode , data . IterationStage , data . Index , totalOperations , clockSpan . GetNanoseconds ( ) ) ;
266+ var measurement = new Measurement ( 0 , data . IterationMode , data . IterationStage , data . Index , totalOperations , nanoseconds ) ;
185267 WriteLine ( measurement . ToString ( ) ) ;
186268
187269 Consume ( stackMemory ) ;
188270
189271 return measurement ;
190272 }
191273
274+ // This is necessary for the CORE runtime to clean up the memory from the clock.
275+ [ MethodImpl ( MethodImplOptions . NoInlining ) ]
276+ private double MeasureAction ( Action < long > action , long arg )
277+ {
278+ var clock = Clock . Start ( ) ;
279+ action ( arg ) ;
280+ return clock . GetElapsed ( ) . GetNanoseconds ( ) ;
281+ }
282+
192283 private ( GcStats , ThreadingStats , double ) GetExtraStats ( IterationData data )
193284 {
194285 // we enable monitoring after main target run, for this single iteration which is executed at the end
@@ -212,9 +303,10 @@ public Measurement RunIteration(IterationData data)
212303 IterationCleanupAction ( ) ; // we run iteration cleanup after collecting GC stats
213304
214305 var totalOperationsCount = data . InvokeCount * OperationsPerInvoke ;
215- GcStats gcStats = ( finalGcStats - initialGcStats ) . WithTotalOperations ( totalOperationsCount ) ;
306+ GcStats gcStats = ( finalGcStats - initialGcStats ) . WithTotalOperationsAndSurvivedBytes ( data . InvokeCount * OperationsPerInvoke , survivedBytes ) ;
216307 ThreadingStats threadingStats = ( finalThreadingStats - initialThreadingStats ) . WithTotalOperations ( data . InvokeCount * OperationsPerInvoke ) ;
217308
309+ ResetSurvived ( ) ;
218310 return ( gcStats , threadingStats , exceptionsStats . ExceptionsCount / ( double ) totalOperationsCount ) ;
219311 }
220312
0 commit comments