1+ import { createHash } from "crypto" ;
12import type { SummaryEvent } from "./log-summary" ;
23
34export interface PipelineSummary {
45 steps : string [ ] ;
56 /** Total counts for each step in the RA array, across all iterations */
67 counts : number [ ] ;
8+ hash : string ;
79}
810
911/**
@@ -26,6 +28,9 @@ export interface PerformanceComparisonDataFromLog {
2628 */
2729 names : string [ ] ;
2830
31+ /** RA hash of the `i`th predicate event */
32+ raHashes : string [ ] ;
33+
2934 /** Number of milliseconds spent evaluating the `i`th predicate from the `names` array. */
3035 timeCosts : number [ ] ;
3136
@@ -52,41 +57,51 @@ export interface PerformanceComparisonDataFromLog {
5257 * All the pipeline runs seen for the `i`th predicate from the `names` array.
5358 */
5459 pipelineSummaryList : Array < Record < string , PipelineSummary > > ;
60+
61+ /** All dependencies of the `i`th predicate from the `names` array, encoded as a list of indices in `names`. */
62+ dependencyLists : number [ ] [ ] ;
5563}
5664
5765export class PerformanceOverviewScanner {
58- private readonly nameToIndex = new Map < string , number > ( ) ;
5966 private readonly data : PerformanceComparisonDataFromLog = {
6067 names : [ ] ,
68+ raHashes : [ ] ,
6169 timeCosts : [ ] ,
6270 tupleCosts : [ ] ,
6371 cacheHitIndices : [ ] ,
6472 sentinelEmptyIndices : [ ] ,
6573 pipelineSummaryList : [ ] ,
6674 evaluationCounts : [ ] ,
6775 iterationCounts : [ ] ,
76+ dependencyLists : [ ] ,
6877 } ;
78+ private readonly raToIndex = new Map < string , number > ( ) ;
79+ private readonly mainHashToRepr = new Map < string , number > ( ) ;
80+ private readonly nameToIndex = new Map < string , number > ( ) ;
6981
70- private getPredicateIndex ( name : string ) : number {
71- const { nameToIndex } = this ;
72- let index = nameToIndex . get ( name ) ;
82+ private getPredicateIndex ( name : string , ra : string ) : number {
83+ let index = this . raToIndex . get ( ra ) ;
7384 if ( index === undefined ) {
74- index = nameToIndex . size ;
75- nameToIndex . set ( name , index ) ;
85+ index = this . raToIndex . size ;
86+ this . raToIndex . set ( ra , index ) ;
7687 const {
7788 names,
89+ raHashes,
7890 timeCosts,
7991 tupleCosts,
8092 iterationCounts,
8193 evaluationCounts,
8294 pipelineSummaryList,
95+ dependencyLists,
8396 } = this . data ;
8497 names . push ( name ) ;
98+ raHashes . push ( ra ) ;
8599 timeCosts . push ( 0 ) ;
86100 tupleCosts . push ( 0 ) ;
87101 iterationCounts . push ( 0 ) ;
88102 evaluationCounts . push ( 0 ) ;
89103 pipelineSummaryList . push ( { } ) ;
104+ dependencyLists . push ( [ ] ) ;
90105 }
91106 return index ;
92107 }
@@ -96,46 +111,63 @@ export class PerformanceOverviewScanner {
96111 }
97112
98113 onEvent ( event : SummaryEvent ) : void {
99- if (
100- event . completionType !== undefined &&
101- event . completionType !== "SUCCESS"
102- ) {
114+ const { completionType, evaluationStrategy, predicateName, raHash } = event ;
115+ if ( completionType !== undefined && completionType !== "SUCCESS" ) {
103116 return ; // Skip any evaluation that wasn't successful
104117 }
105118
106- switch ( event . evaluationStrategy ) {
107- case "EXTENSIONAL" :
119+ switch ( evaluationStrategy ) {
120+ case "EXTENSIONAL" : {
121+ break ;
122+ }
108123 case "COMPUTED_EXTENSIONAL" : {
124+ if ( predicateName . startsWith ( "cached_" ) ) {
125+ // Add a dependency from a cached COMPUTED_EXTENSIONAL to the predicate with the actual contents.
126+ // The raHash of the this event may appear in a CACHE_HIT event in the other event log. The dependency
127+ // we're adding here is needed in order to associate the original predicate with such a cache hit.
128+ const originalName = predicateName . substring ( "cached_" . length ) ;
129+ const originalIndex = this . nameToIndex . get ( originalName ) ;
130+ if ( originalIndex != null ) {
131+ const index = this . getPredicateIndex ( predicateName , raHash ) ;
132+ this . data . dependencyLists [ index ] . push ( originalIndex ) ;
133+ }
134+ }
109135 break ;
110136 }
111137 case "CACHE_HIT" :
112138 case "CACHACA" : {
113139 // Record a cache hit, but only if the predicate has not been seen before.
114140 // We're mainly interested in the reuse of caches from an earlier query run as they can distort comparisons.
115- if ( ! this . nameToIndex . has ( event . predicateName ) ) {
141+ if ( ! this . raToIndex . has ( raHash ) ) {
116142 this . data . cacheHitIndices . push (
117- this . getPredicateIndex ( event . predicateName ) ,
143+ this . getPredicateIndex ( predicateName , raHash ) ,
118144 ) ;
119145 }
120146 break ;
121147 }
122148 case "SENTINEL_EMPTY" : {
123- this . data . sentinelEmptyIndices . push (
124- this . getPredicateIndex ( event . predicateName ) ,
125- ) ;
149+ const index = this . getPredicateIndex ( predicateName , raHash ) ;
150+ this . data . sentinelEmptyIndices . push ( index ) ;
151+ const sentinelIndex = this . raToIndex . get ( event . sentinelRaHash ) ;
152+ if ( sentinelIndex != null ) {
153+ this . data . dependencyLists [ index ] . push ( sentinelIndex ) ; // needed for matching up cache hits
154+ }
126155 break ;
127156 }
128157 case "COMPUTE_RECURSIVE" :
129158 case "COMPUTE_SIMPLE" :
159+ case "NAMED_LOCAL" :
130160 case "IN_LAYER" : {
131- const index = this . getPredicateIndex ( event . predicateName ) ;
161+ const index = this . getPredicateIndex ( predicateName , raHash ) ;
162+ this . nameToIndex . set ( predicateName , index ) ;
132163 let totalTime = 0 ;
133164 let totalTuples = 0 ;
134- if ( event . evaluationStrategy !== "IN_LAYER ") {
165+ if ( evaluationStrategy === "COMPUTE_SIMPLE ") {
135166 totalTime += event . millis ;
136167 } else {
137- // IN_LAYER events do no record of their total time.
138- // Make a best-effort estimate by adding up the positive iteration times (they can be negative).
168+ // Make a best-effort estimate of the total time by adding up the positive iteration times (they can be negative).
169+ // Note that for COMPUTE_RECURSIVE the "millis" field contain the total time of the SCC, not just that predicate,
170+ // but we don't have a good way to show that in the UI, so we rely on the accumulated iteration times.
139171 for ( const millis of event . predicateIterationMillis ?? [ ] ) {
140172 if ( millis > 0 ) {
141173 totalTime += millis ;
@@ -148,13 +180,16 @@ export class PerformanceOverviewScanner {
148180 iterationCounts,
149181 evaluationCounts,
150182 pipelineSummaryList,
183+ dependencyLists,
151184 } = this . data ;
152185 const pipelineSummaries = pipelineSummaryList [ index ] ;
186+ const dependencyList = dependencyLists [ index ] ;
153187 for ( const { counts, raReference } of event . pipelineRuns ?? [ ] ) {
154188 // Get or create the pipeline summary for this RA
155189 const pipelineSummary = ( pipelineSummaries [ raReference ] ??= {
156190 steps : event . ra [ raReference ] ,
157191 counts : counts . map ( ( ) => 0 ) ,
192+ hash : getPipelineHash ( event . ra [ raReference ] ) ,
158193 } ) ;
159194 const { counts : totalTuplesPerStep } = pipelineSummary ;
160195 for ( let i = 0 , length = counts . length ; i < length ; ++ i ) {
@@ -169,6 +204,25 @@ export class PerformanceOverviewScanner {
169204 totalTuplesPerStep [ i ] += count ;
170205 }
171206 }
207+ for ( const dependencyHash of Object . values ( event . dependencies ?? { } ) ) {
208+ const dependencyIndex = this . raToIndex . get ( dependencyHash ) ;
209+ if ( dependencyIndex != null ) {
210+ dependencyList . push ( dependencyIndex ) ;
211+ }
212+ }
213+ // For predicates in the same SCC, add two-way dependencies with an arbitrary SCC member
214+ const sccHash =
215+ event . mainHash ??
216+ ( evaluationStrategy === "COMPUTE_RECURSIVE" ? raHash : null ) ;
217+ if ( sccHash != null ) {
218+ const mainIndex = this . mainHashToRepr . get ( sccHash ) ;
219+ if ( mainIndex == null ) {
220+ this . mainHashToRepr . set ( sccHash , index ) ;
221+ } else {
222+ dependencyLists [ index ] . push ( mainIndex ) ;
223+ dependencyLists [ mainIndex ] . push ( index ) ;
224+ }
225+ }
172226 timeCosts [ index ] += totalTime ;
173227 tupleCosts [ index ] += totalTuples ;
174228 iterationCounts [ index ] += event . pipelineRuns ?. length ?? 0 ;
@@ -178,3 +232,11 @@ export class PerformanceOverviewScanner {
178232 }
179233 }
180234}
235+
236+ function getPipelineHash ( steps : string [ ] ) {
237+ const md5 = createHash ( "md5" ) ;
238+ for ( const step of steps ) {
239+ md5 . write ( step ) ;
240+ }
241+ return md5 . digest ( "base64" ) ;
242+ }
0 commit comments