@@ -60,7 +60,7 @@ private module SensitiveDataModeling {
6060 ) {
6161 t .start ( ) and
6262 exists ( Function f |
63- nameIndicatesSensitiveData ( f .getName ( ) , classification ) and
63+ f .getName ( ) = sensitiveString ( classification ) and
6464 result .asExpr ( ) = f .getDefinition ( )
6565 )
6666 or
@@ -83,7 +83,7 @@ private module SensitiveDataModeling {
8383 // Note: If this is implemented with type-tracking, we will get cross-talk as
8484 // illustrated in python/ql/test/experimental/dataflow/sensitive-data/test.py
8585 exists ( DataFlow:: LocalSourceNode source |
86- nameIndicatesSensitiveData ( source .asExpr ( ) .( StrConst ) .getText ( ) , classification ) and
86+ source .asExpr ( ) .( StrConst ) .getText ( ) = sensitiveString ( classification ) and
8787 source .flowsTo ( result )
8888 )
8989 }
@@ -97,7 +97,7 @@ private module SensitiveDataModeling {
9797 or
9898 // to cover functions that we don't have the definition for, and where the
9999 // reference to the function has not already been marked as being sensitive
100- nameIndicatesSensitiveData ( this .getFunction ( ) .asCfgNode ( ) .( NameNode ) .getId ( ) , classification )
100+ this .getFunction ( ) .asCfgNode ( ) .( NameNode ) .getId ( ) = sensitiveString ( classification )
101101 }
102102
103103 override SensitiveDataClassification getClassification ( ) { result = classification }
@@ -164,6 +164,68 @@ private module SensitiveDataModeling {
164164 nodeFrom = possibleSensitiveCallable ( )
165165 }
166166
167+ pragma [ nomagic]
168+ private string sensitiveStrConstCandidate ( ) {
169+ result = any ( StrConst s | not s .isDocString ( ) ) .getText ( ) and
170+ not result .regexpMatch ( notSensitiveRegexp ( ) )
171+ }
172+
173+ pragma [ nomagic]
174+ private string sensitiveAttributeNameCandidate ( ) {
175+ result = any ( DataFlow:: AttrRead a ) .getAttributeName ( ) and
176+ not result .regexpMatch ( notSensitiveRegexp ( ) )
177+ }
178+
179+ pragma [ nomagic]
180+ private string sensitiveParameterNameCandidate ( ) {
181+ result = any ( Parameter p ) .getName ( ) and
182+ not result .regexpMatch ( notSensitiveRegexp ( ) )
183+ }
184+
185+ pragma [ nomagic]
186+ private string sensitiveFunctionNameCandidate ( ) {
187+ result = any ( Function f ) .getName ( ) and
188+ not result .regexpMatch ( notSensitiveRegexp ( ) )
189+ }
190+
191+ pragma [ nomagic]
192+ private string sensitiveNameCandidate ( ) {
193+ result = any ( Name n ) .getId ( ) and
194+ not result .regexpMatch ( notSensitiveRegexp ( ) )
195+ }
196+
197+ /**
198+ * This helper predicate serves to deduplicate the results of the preceding predicates. This
199+ * means that if, say, an attribute and a function parameter have the same name, then that name will
200+ * only be matched once, which greatly cuts down on the number of regexp matches that have to be
201+ * performed.
202+ *
203+ * Under normal circumstances, deduplication is only performed when a predicate is materialized, and
204+ * so to see the effect of this we must create a separate predicate that calculates the union of the
205+ * preceding predicates.
206+ */
207+ pragma [ nomagic]
208+ private string sensitiveStringCandidate ( ) {
209+ result in [
210+ sensitiveNameCandidate ( ) , sensitiveAttributeNameCandidate ( ) ,
211+ sensitiveParameterNameCandidate ( ) , sensitiveFunctionNameCandidate ( ) ,
212+ sensitiveStrConstCandidate ( )
213+ ]
214+ }
215+
216+ /**
217+ * Returns strings (primarily the names of various program entities) that may contain sensitive data
218+ * with the classification `classification`.
219+ *
220+ * This helper predicate ends up being very similar to `nameIndicatesSensitiveData`,
221+ * but is performance optimized to limit the number of regexp matches that have to be performed.
222+ */
223+ pragma [ nomagic]
224+ private string sensitiveString ( SensitiveDataClassification classification ) {
225+ result = sensitiveStringCandidate ( ) and
226+ result .regexpMatch ( maybeSensitiveRegexp ( classification ) )
227+ }
228+
167229 /**
168230 * Any kind of variable assignment (also including with/for) where the name indicates
169231 * it contains sensitive data.
@@ -182,7 +244,7 @@ private module SensitiveDataModeling {
182244
183245 SensitiveVariableAssignment ( ) {
184246 exists ( DefinitionNode def |
185- nameIndicatesSensitiveData ( def .( NameNode ) .getId ( ) , classification ) and
247+ def .( NameNode ) .getId ( ) = sensitiveString ( classification ) and
186248 (
187249 this .asCfgNode ( ) = def .getValue ( )
188250 or
@@ -193,7 +255,7 @@ private module SensitiveDataModeling {
193255 )
194256 or
195257 exists ( With with |
196- nameIndicatesSensitiveData ( with .getOptionalVars ( ) .( Name ) .getId ( ) , classification ) and
258+ with .getOptionalVars ( ) .( Name ) .getId ( ) = sensitiveString ( classification ) and
197259 this .asExpr ( ) = with .getContextExpr ( )
198260 )
199261 }
@@ -209,7 +271,7 @@ private module SensitiveDataModeling {
209271 // Things like `foo.<sensitive-name>` or `from <module> import <sensitive-name>`
210272 // I considered excluding any `from ... import something_sensitive`, but then realized that
211273 // we should flag up `form ... import password as ...` as a password
212- nameIndicatesSensitiveData ( this .( DataFlow:: AttrRead ) .getAttributeName ( ) , classification )
274+ this .( DataFlow:: AttrRead ) .getAttributeName ( ) = sensitiveString ( classification )
213275 or
214276 // Things like `getattr(foo, <reference-to-string>)`
215277 this .( DataFlow:: AttrRead ) .getAttributeNameExpr ( ) = sensitiveLookupStringConst ( classification )
@@ -246,9 +308,7 @@ private module SensitiveDataModeling {
246308 class SensitiveParameter extends SensitiveDataSource:: Range , DataFlow:: ParameterNode {
247309 SensitiveDataClassification classification ;
248310
249- SensitiveParameter ( ) {
250- nameIndicatesSensitiveData ( this .getParameter ( ) .getName ( ) , classification )
251- }
311+ SensitiveParameter ( ) { this .getParameter ( ) .getName ( ) = sensitiveString ( classification ) }
252312
253313 override SensitiveDataClassification getClassification ( ) { result = classification }
254314 }
0 commit comments