1- import type { Location , Result } from "sarif" ;
1+ import type { Location , Result , ThreadFlowLocation } from "sarif" ;
22
33function toCanonicalLocation ( location : Location ) : Location {
44 if ( location . physicalLocation ?. artifactLocation ?. index !== undefined ) {
@@ -25,6 +25,19 @@ function toCanonicalLocation(location: Location): Location {
2525 return location ;
2626}
2727
28+ function toCanonicalThreadFlowLocation (
29+ threadFlowLocation : ThreadFlowLocation ,
30+ ) : ThreadFlowLocation {
31+ if ( threadFlowLocation . location ) {
32+ return {
33+ ...threadFlowLocation ,
34+ location : toCanonicalLocation ( threadFlowLocation . location ) ,
35+ } ;
36+ }
37+
38+ return threadFlowLocation ;
39+ }
40+
2841function toCanonicalResult ( result : Result ) : Result {
2942 const canonicalResult = {
3043 ...result ,
@@ -40,37 +53,30 @@ function toCanonicalResult(result: Result): Result {
4053 canonicalResult . relatedLocations . map ( toCanonicalLocation ) ;
4154 }
4255
43- if ( canonicalResult . codeFlows ) {
44- canonicalResult . codeFlows = canonicalResult . codeFlows . map ( ( codeFlow ) => {
45- if ( codeFlow . threadFlows ) {
46- return {
47- ...codeFlow ,
48- threadFlows : codeFlow . threadFlows . map ( ( threadFlow ) => {
49- if ( threadFlow . locations ) {
50- return {
51- ...threadFlow ,
52- locations : threadFlow . locations . map ( ( threadFlowLocation ) => {
53- if ( threadFlowLocation . location ) {
54- return {
55- ...threadFlowLocation ,
56- location : toCanonicalLocation (
57- threadFlowLocation . location ,
58- ) ,
59- } ;
60- }
61-
62- return threadFlowLocation ;
63- } ) ,
64- } ;
65- }
66-
67- return threadFlow ;
68- } ) ,
69- } ;
70- }
71-
72- return codeFlow ;
73- } ) ;
56+ if ( canonicalResult . codeFlows && canonicalResult . codeFlows . length > 0 ) {
57+ // If there are codeFlows, we don't want to compare the full codeFlows. Instead, we just want to compare the
58+ // source and the sink (i.e. the first and last item). CodeQL should guarantee that the first and last threadFlow
59+ // of every codeFlow is the same (i.e. every codeFlow has the same source and sink). Therefore, we just compare the
60+ // first codeFlow and ignore the other codeFlows completely.
61+ // If the codeFlow has a length of 1, this doesn't change the result.
62+
63+ const source = {
64+ ...canonicalResult . codeFlows [ 0 ] . threadFlows [ 0 ] ,
65+ } ;
66+ const sink = {
67+ ...canonicalResult . codeFlows [ 0 ] . threadFlows [
68+ canonicalResult . codeFlows [ 0 ] . threadFlows . length - 1
69+ ] ,
70+ } ;
71+ source . locations = source . locations . map ( toCanonicalThreadFlowLocation ) ;
72+ sink . locations = sink . locations . map ( toCanonicalThreadFlowLocation ) ;
73+
74+ canonicalResult . codeFlows = [
75+ {
76+ ...canonicalResult . codeFlows [ 0 ] ,
77+ threadFlows : [ source , sink ] ,
78+ } ,
79+ ] ;
7480 }
7581
7682 return canonicalResult ;
@@ -79,11 +85,9 @@ function toCanonicalResult(result: Result): Result {
7985/**
8086 * Compare the alerts of two queries. Use deep equality to determine if
8187 * results have been added or removed across two invocations of a query.
82- *
83- * Assumptions:
84- *
85- * 1. Queries have the same sort order
86- * 2. Results are not changed or re-ordered, they are only added or removed
88+ * It first canonicalizes the results by to ensure that when small changes
89+ * to the query are made, the results are still considered the same. This
90+ * includes the removal of all paths except for the source and sink.
8791 *
8892 * @param fromResults the source query
8993 * @param toResults the target query
@@ -104,19 +108,30 @@ export function sarifDiff(fromResults: Result[], toResults: Result[]) {
104108 const canonicalFromResults = fromResults . map ( toCanonicalResult ) ;
105109 const canonicalToResults = toResults . map ( toCanonicalResult ) ;
106110
107- const results = {
111+ const diffResults = {
108112 from : arrayDiff ( canonicalFromResults , canonicalToResults ) ,
109113 to : arrayDiff ( canonicalToResults , canonicalFromResults ) ,
110114 } ;
111115
112116 if (
113- fromResults . length === results . from . length &&
114- toResults . length === results . to . length
117+ fromResults . length === diffResults . from . length &&
118+ toResults . length === diffResults . to . length
115119 ) {
116120 throw new Error ( "CodeQL Compare: No overlap between the selected queries." ) ;
117121 }
118122
119- return results ;
123+ // We don't want to return the canonical results, we want to return the original results.
124+ // We can retrieve this by finding the index of the canonical result in the canonical results
125+ // and then using that index to find the original result. This is possible because we know that
126+ // we did a 1-to-1 map between the canonical results and the original results.
127+ return {
128+ from : diffResults . from . map (
129+ ( result ) => fromResults [ canonicalFromResults . indexOf ( result ) ] ,
130+ ) ,
131+ to : diffResults . to . map (
132+ ( result ) => toResults [ canonicalToResults . indexOf ( result ) ] ,
133+ ) ,
134+ } ;
120135}
121136
122137function arrayDiff < T > ( source : readonly T [ ] , toRemove : readonly T [ ] ) : T [ ] {
0 commit comments