@@ -127,7 +127,7 @@ string captureQualifierFlow(DataFlowSummaryTargetApi api) {
127127 api = returnNodeEnclosingCallable ( ret ) and
128128 isOwnInstanceAccessNode ( ret )
129129 ) and
130- result = Printing:: asValueModel ( api , qualifierString ( ) , "ReturnValue" )
130+ result = Printing:: asLiftedValueModel ( api , qualifierString ( ) , "ReturnValue" )
131131}
132132
133133private int accessPathLimit0 ( ) { result = 2 }
@@ -237,7 +237,7 @@ string captureThroughFlow0(
237237 input = parameterNodeAsInput ( p ) and
238238 output = getOutput ( returnNodeExt ) and
239239 input != output and
240- result = Printing:: asTaintModel ( api , input , output )
240+ result = Printing:: asLiftedTaintModel ( api , input , output )
241241 )
242242}
243243
@@ -291,26 +291,257 @@ private string getContent(PropagateContentFlow::AccessPath ap, int i) {
291291 )
292292}
293293
294+ /**
295+ * Gets the MaD string representation of a store step access path.
296+ */
294297private string printStoreAccessPath ( PropagateContentFlow:: AccessPath ap ) {
295298 result = concat ( int i | | getContent ( ap , i ) , "" order by i )
296299}
297300
301+ /**
302+ * Gets the MaD string representation of a read step access path.
303+ */
298304private string printReadAccessPath ( PropagateContentFlow:: AccessPath ap ) {
299305 result = concat ( int i | | getContent ( ap , i ) , "" order by i desc )
300306}
301307
302- string captureContentFlow ( DataFlowSummaryTargetApi api ) {
308+ /**
309+ * Holds if the access path `ap` contains a field or synthetic field access.
310+ */
311+ private predicate mentionsField ( PropagateContentFlow:: AccessPath ap ) {
312+ exists ( ContentSet head , PropagateContentFlow:: AccessPath tail |
313+ head = ap .getHead ( ) and
314+ tail = ap .getTail ( )
315+ |
316+ mentionsField ( tail ) or isField ( head )
317+ )
318+ }
319+
320+ private predicate apiFlow (
321+ DataFlowSummaryTargetApi api , DataFlow:: ParameterNode p , PropagateContentFlow:: AccessPath reads ,
322+ ReturnNodeExt returnNodeExt , PropagateContentFlow:: AccessPath stores , boolean preservesValue
323+ ) {
324+ PropagateContentFlow:: flow ( p , reads , returnNodeExt , stores , preservesValue ) and
325+ returnNodeExt .getEnclosingCallable ( ) = api and
326+ p .getEnclosingCallable ( ) = api
327+ }
328+
329+ /**
330+ * A class of APIs relevant for modeling using content flow.
331+ * The following heuristic is applied:
332+ * Content flow is only relevant for an API, if
333+ * #content flow <= 2 * #parameters + 3
334+ * If an API produces more content flow, it is likely that
335+ * 1. Types are not sufficiently constrained leading to a combinatorial
336+ * explosion in dispatch and thus in the generated summaries.
337+ * 2. It is a reasonable approximation to use the non-content based flow
338+ * detection instead, as reads and stores would use a significant
339+ * part of an objects internal state.
340+ */
341+ private class ContentDataFlowSummaryTargetApi extends DataFlowSummaryTargetApi {
342+ ContentDataFlowSummaryTargetApi ( ) {
343+ count ( string input , string output |
344+ exists (
345+ DataFlow:: ParameterNode p , PropagateContentFlow:: AccessPath reads ,
346+ ReturnNodeExt returnNodeExt , PropagateContentFlow:: AccessPath stores
347+ |
348+ apiFlow ( this , p , reads , returnNodeExt , stores , _) and
349+ input = parameterNodeAsContentInput ( p ) + printReadAccessPath ( reads ) and
350+ output = getContentOutput ( returnNodeExt ) + printStoreAccessPath ( stores )
351+ )
352+ ) <= 2 * this .getNumberOfParameters ( ) + 3
353+ }
354+ }
355+
356+ pragma [ nomagic]
357+ private predicate apiContentFlow (
358+ ContentDataFlowSummaryTargetApi api , DataFlow:: ParameterNode p ,
359+ PropagateContentFlow:: AccessPath reads , ReturnNodeExt returnNodeExt ,
360+ PropagateContentFlow:: AccessPath stores , boolean preservesValue
361+ ) {
362+ PropagateContentFlow:: flow ( p , reads , returnNodeExt , stores , preservesValue ) and
363+ returnNodeExt .getEnclosingCallable ( ) = api and
364+ p .getEnclosingCallable ( ) = api
365+ }
366+
367+ /**
368+ * Holds if any of the content sets in `path` translates into a synthetic field.
369+ */
370+ private predicate hasSyntheticContent ( PropagateContentFlow:: AccessPath path ) {
371+ exists ( PropagateContentFlow:: AccessPath tail , ContentSet head |
372+ head = path .getHead ( ) and
373+ tail = path .getTail ( )
374+ |
375+ exists ( getSyntheticName ( head ) ) or
376+ hasSyntheticContent ( tail )
377+ )
378+ }
379+
380+ /**
381+ * A module containing predicates for validating access paths containing content sets
382+ * that translates into synthetic fields, when used for generated summary models.
383+ */
384+ private module AccessPathSyntheticValidation {
385+ /**
386+ * Holds if there exists an API that has content flow from `read` (on type `t1`)
387+ * to `store` (on type `t2`).
388+ */
389+ private predicate step (
390+ Type t1 , PropagateContentFlow:: AccessPath read , Type t2 , PropagateContentFlow:: AccessPath store
391+ ) {
392+ exists ( DataFlow:: ParameterNode p , ReturnNodeExt returnNodeExt |
393+ p .getType ( ) = t1 and
394+ returnNodeExt .getType ( ) = t2 and
395+ apiContentFlow ( _, p , read , returnNodeExt , store , _)
396+ )
397+ }
398+
399+ /**
400+ * Holds if there exists an API that has content flow from `read` (on type `t1`)
401+ * to `store` (on type `t2`), where `read` does not have synthetic content and `store` does.
402+ *
403+ * Step A -> Synth.
404+ */
405+ private predicate synthPathEntry (
406+ Type t1 , PropagateContentFlow:: AccessPath read , Type t2 , PropagateContentFlow:: AccessPath store
407+ ) {
408+ not hasSyntheticContent ( read ) and
409+ hasSyntheticContent ( store ) and
410+ step ( t1 , read , t2 , store )
411+ }
412+
413+ /**
414+ * Holds if there exists an API that has content flow from `read` (on type `t1`)
415+ * to `store` (on type `t2`), where `read` has synthetic content
416+ * and `store` does not.
417+ *
418+ * Step Synth -> A.
419+ */
420+ private predicate synthPathExit (
421+ Type t1 , PropagateContentFlow:: AccessPath read , Type t2 , PropagateContentFlow:: AccessPath store
422+ ) {
423+ hasSyntheticContent ( read ) and
424+ not hasSyntheticContent ( store ) and
425+ step ( t1 , read , t2 , store )
426+ }
427+
428+ /**
429+ * Holds if there exists a path of steps from `read` to an exit.
430+ *
431+ * read ->* Synth -> A
432+ */
433+ private predicate reachesSynthExit ( Type t , PropagateContentFlow:: AccessPath read ) {
434+ synthPathExit ( t , read , _, _)
435+ or
436+ hasSyntheticContent ( read ) and
437+ exists ( PropagateContentFlow:: AccessPath mid , Type midType |
438+ hasSyntheticContent ( mid ) and
439+ step ( t , read , midType , mid ) and
440+ reachesSynthExit ( midType , mid .reverse ( ) )
441+ )
442+ }
443+
444+ /**
445+ * Holds if there exists a path of steps from an entry to `store`.
446+ *
447+ * A -> Synth ->* store
448+ */
449+ private predicate synthEntryReaches ( Type t , PropagateContentFlow:: AccessPath store ) {
450+ synthPathEntry ( _, _, t , store )
451+ or
452+ hasSyntheticContent ( store ) and
453+ exists ( PropagateContentFlow:: AccessPath mid , Type midType |
454+ hasSyntheticContent ( mid ) and
455+ step ( midType , mid , t , store ) and
456+ synthEntryReaches ( midType , mid .reverse ( ) )
457+ )
458+ }
459+
460+ /**
461+ * Holds if at least one of the access paths `read` (on type `t1`) and `store` (on type `t2`)
462+ * contain content that will be translated into a synthetic field, when being used in
463+ * a MaD summary model, and if there is a range of APIs, such that
464+ * when chaining their flow access paths, there exists access paths `A` and `B` where
465+ * A ->* read -> store ->* B and where `A` and `B` do not contain content that will
466+ * be translated into a synthetic field.
467+ *
468+ * This is needed because we don't want to include summaries that reads from or
469+ * stores into a "dead" synthetic field.
470+ *
471+ * Example:
472+ * Assume we have a type `t` (in this case `t1` = `t2`) with methods `getX` and
473+ * `setX`, which gets and sets a private field `X` on `t`.
474+ * This would lead to the following content flows
475+ * getX : Argument[this].SyntheticField[t.X] -> ReturnValue.
476+ * setX : Argument[0] -> Argument[this].SyntheticField[t.X]
477+ * As the reads and stores are on synthetic fields we should only make summaries
478+ * if both of these methods exist.
479+ */
480+ pragma [ nomagic]
481+ predicate acceptReadStore (
482+ Type t1 , PropagateContentFlow:: AccessPath read , Type t2 , PropagateContentFlow:: AccessPath store
483+ ) {
484+ synthPathEntry ( t1 , read , t2 , store ) and reachesSynthExit ( t2 , store .reverse ( ) )
485+ or
486+ exists ( PropagateContentFlow:: AccessPath store0 | store0 .reverse ( ) = read |
487+ synthEntryReaches ( t1 , store0 ) and synthPathExit ( t1 , read , t2 , store )
488+ or
489+ synthEntryReaches ( t1 , store0 ) and
490+ step ( t1 , read , t2 , store ) and
491+ reachesSynthExit ( t2 , store .reverse ( ) )
492+ )
493+ }
494+ }
495+
496+ /**
497+ * Holds, if the API `api` has relevant flow from `read` on `p` to `store` on `returnNodeExt`.
498+ * Flow is considered relevant,
499+ * 1. If `read` or `store` do not contain a content set that translates into a synthetic field.
500+ * 2. If `read` or `store` contain a content set that translates into a synthetic field, and if
501+ * the synthetic content is "live" on the relevant declaring type.
502+ */
503+ private predicate apiRelevantContentFlow (
504+ ContentDataFlowSummaryTargetApi api , DataFlow:: ParameterNode p ,
505+ PropagateContentFlow:: AccessPath read , ReturnNodeExt returnNodeExt ,
506+ PropagateContentFlow:: AccessPath store , boolean preservesValue
507+ ) {
508+ apiContentFlow ( api , p , read , returnNodeExt , store , preservesValue ) and
509+ (
510+ not hasSyntheticContent ( read ) and not hasSyntheticContent ( store )
511+ or
512+ AccessPathSyntheticValidation:: acceptReadStore ( p .getType ( ) , read , returnNodeExt .getType ( ) , store )
513+ )
514+ }
515+
516+ pragma [ nomagic]
517+ private predicate captureContentFlow0 (
518+ ContentDataFlowSummaryTargetApi api , string input , string output , boolean preservesValue ,
519+ boolean lift
520+ ) {
303521 exists (
304- DataFlow:: ParameterNode p , ReturnNodeExt returnNodeExt , string input , string output ,
305- PropagateContentFlow:: AccessPath reads , PropagateContentFlow:: AccessPath stores ,
306- boolean preservesValue
522+ DataFlow:: ParameterNode p , ReturnNodeExt returnNodeExt , PropagateContentFlow:: AccessPath reads ,
523+ PropagateContentFlow:: AccessPath stores
307524 |
308- PropagateContentFlow:: flow ( p , reads , returnNodeExt , stores , preservesValue ) and
309- returnNodeExt .getEnclosingCallable ( ) = api and
525+ apiRelevantContentFlow ( api , p , reads , returnNodeExt , stores , preservesValue ) and
310526 input = parameterNodeAsContentInput ( p ) + printReadAccessPath ( reads ) and
311527 output = getContentOutput ( returnNodeExt ) + printStoreAccessPath ( stores ) and
312528 input != output and
313- result = Printing:: asModel ( api , input , output , preservesValue )
529+ ( if mentionsField ( reads ) or mentionsField ( stores ) then lift = false else lift = true )
530+ )
531+ }
532+
533+ /**
534+ * Gets the content based summary model(s) of the API `api` (if there is flow from a parameter to
535+ * the return value or a parameter).
536+ *
537+ * Models are lifted to the best type in case the read and store access paths do not
538+ * contain a field or synthetic field access.
539+ */
540+ string captureContentFlow ( ContentDataFlowSummaryTargetApi api ) {
541+ exists ( string input , string output , boolean lift , boolean preservesValue |
542+ captureContentFlow0 ( api , input , output , _, lift ) and
543+ preservesValue = max ( boolean p | captureContentFlow0 ( api , input , output , p , lift ) ) and
544+ result = Printing:: asModel ( api , input , output , preservesValue , lift )
314545 )
315546}
316547
0 commit comments