@@ -23,6 +23,7 @@ import semmle.code.cpp.ir.ValueNumbering
2323import semmle.code.cpp.controlflow.IRGuards
2424import semmle.code.cpp.ir.IR
2525import codeql.util.Unit
26+ import FinalFlow:: PathGraph
2627
2728pragma [ nomagic]
2829Instruction getABoundIn ( SemBound b , IRFunction func ) {
@@ -318,7 +319,7 @@ module InvalidPointerToDerefBarrier {
318319 private module BarrierConfig implements DataFlow:: ConfigSig {
319320 predicate isSource ( DataFlow:: Node source ) {
320321 // The sources is the same as in the sources for `InvalidPointerToDerefConfig`.
321- invalidPointerToDerefSource ( _, source , _)
322+ invalidPointerToDerefSource ( _, _ , source , _)
322323 }
323324
324325 additional predicate isSink (
@@ -336,7 +337,7 @@ module InvalidPointerToDerefBarrier {
336337 private int getInvalidPointerToDerefSourceDelta ( DataFlow:: Node node ) {
337338 exists ( DataFlow:: Node source |
338339 flow ( source , node ) and
339- invalidPointerToDerefSource ( _, source , result )
340+ invalidPointerToDerefSource ( _, _ , source , result )
340341 )
341342 }
342343
@@ -373,7 +374,7 @@ module InvalidPointerToDerefBarrier {
373374 * by `AllocToInvalidPointerConfig` to a dereference of the pointer.
374375 */
375376module InvalidPointerToDerefConfig implements DataFlow:: ConfigSig {
376- predicate isSource ( DataFlow:: Node source ) { invalidPointerToDerefSource ( _, source , _) }
377+ predicate isSource ( DataFlow:: Node source ) { invalidPointerToDerefSource ( _, _ , source , _) }
377378
378379 pragma [ inline]
379380 predicate isSink ( DataFlow:: Node sink ) { isInvalidPointerDerefSink ( sink , _, _, _) }
@@ -388,189 +389,155 @@ module InvalidPointerToDerefConfig implements DataFlow::ConfigSig {
388389module InvalidPointerToDerefFlow = DataFlow:: Global< InvalidPointerToDerefConfig > ;
389390
390391/**
391- * Holds if `pai` is a pointer-arithmetic operation and `source` is a dataflow node with a
392- * pointer-value that is non-strictly upper bounded by `pai + delta`.
392+ * Holds if `source1` is dataflow node that represents an allocation that flows to the
393+ * left-hand side of the pointer-arithmetic `pai`, and `derefSource` is a dataflow node with
394+ * a pointer-value that is non-strictly upper bounded by `pai + delta`.
393395 *
394396 * For example, if `pai` is a pointer-arithmetic operation `p + size` in an expression such
395- * as `(p + size) + 1` and `source ` is the node representing `(p + size) + 1`. In this
397+ * as `(p + size) + 1` and `derefSource ` is the node representing `(p + size) + 1`. In this
396398 * case `delta` is 1.
397399 */
398400predicate invalidPointerToDerefSource (
399- PointerArithmeticInstruction pai , DataFlow:: Node source , int delta
401+ DataFlow :: Node source1 , PointerArithmeticInstruction pai , DataFlow:: Node derefSource , int delta
400402) {
401403 exists (
402- AllocToInvalidPointerFlow:: PathNode1 p1 , AllocToInvalidPointerFlow:: PathNode2 p2 ,
403- DataFlow:: Node sink1 , DataFlow:: Node sink2 , int delta0
404+ AllocToInvalidPointerFlow:: PathNode1 pSource1 , AllocToInvalidPointerFlow:: PathNode1 pSink1 ,
405+ AllocToInvalidPointerFlow:: PathNode2 pSink2 , DataFlow:: Node sink1 , DataFlow:: Node sink2 ,
406+ int delta0
404407 |
405- pragma [ only_bind_out ] ( p1 .getNode ( ) ) = sink1 and
406- pragma [ only_bind_out ] ( p2 .getNode ( ) ) = sink2 and
407- AllocToInvalidPointerFlow:: flowPath ( _, _, pragma [ only_bind_into ] ( p1 ) , pragma [ only_bind_into ] ( p2 ) ) and
408+ pragma [ only_bind_out ] ( pSource1 .getNode ( ) ) = source1 and
409+ pragma [ only_bind_out ] ( pSink1 .getNode ( ) ) = sink1 and
410+ pragma [ only_bind_out ] ( pSink2 .getNode ( ) ) = sink2 and
411+ AllocToInvalidPointerFlow:: flowPath ( pSource1 , _, pragma [ only_bind_into ] ( pSink1 ) ,
412+ pragma [ only_bind_into ] ( pSink2 ) ) and
408413 // Note that `delta` is not necessarily equal to `delta0`:
409414 // `delta0` is the constant offset added to the size of the allocation, and
410415 // delta is the constant difference between the pointer-arithmetic instruction
411416 // and the instruction computing the address for which we will search for a dereference.
412417 isSinkImpl ( pai , sink1 , sink2 , delta0 ) and
413- bounded2 ( source .asInstruction ( ) , pai , delta ) and
418+ bounded2 ( derefSource .asInstruction ( ) , pai , delta ) and
414419 delta >= 0 and
415- not source .getBasicBlock ( ) = Barrier2:: getABarrierBlock ( delta0 )
420+ not derefSource .getBasicBlock ( ) = Barrier2:: getABarrierBlock ( delta0 )
416421 )
417422}
418423
419- newtype TMergedPathNode =
420- // The path nodes computed by the first projection of `AllocToInvalidPointerConfig`
421- TPathNode1 ( AllocToInvalidPointerFlow:: PathNode1 p ) or
422- // The path nodes computed by `InvalidPointerToDerefConfig`
423- TPathNode3 ( InvalidPointerToDerefFlow:: PathNode p ) or
424- // The read/write that uses the invalid pointer identified by `InvalidPointerToDerefConfig`.
425- // This one is needed because the sink identified by `InvalidPointerToDerefConfig` is the
426- // pointer, but we want to raise an alert at the dereference.
427- TPathNodeSink ( Instruction i ) {
428- exists ( DataFlow:: Node n |
429- InvalidPointerToDerefFlow:: flowTo ( pragma [ only_bind_into ] ( n ) ) and
430- isInvalidPointerDerefSink ( n , i , _, _) and
431- i = getASuccessor ( n .asInstruction ( ) )
432- )
433- }
434-
435- class MergedPathNode extends TMergedPathNode {
436- string toString ( ) { none ( ) }
437-
438- final AllocToInvalidPointerFlow:: PathNode1 asPathNode1 ( ) { this = TPathNode1 ( result ) }
439-
440- final InvalidPointerToDerefFlow:: PathNode asPathNode3 ( ) { this = TPathNode3 ( result ) }
441-
442- final Instruction asSinkNode ( ) { this = TPathNodeSink ( result ) }
443-
444- predicate hasLocationInfo (
445- string filepath , int startline , int startcolumn , int endline , int endcolumn
446- ) {
447- none ( )
448- }
424+ /**
425+ * Holds if `derefSink` is a dataflow node that represents an out-of-bounds address that is about to
426+ * be dereferenced by `operation` (which is either a `StoreInstruction` or `LoadInstruction`), and
427+ * `pai` is the pointer-arithmetic operation that caused the `derefSink` to be out-of-bounds.
428+ */
429+ predicate derefSinkToOperation (
430+ DataFlow:: Node derefSink , PointerArithmeticInstruction pai , DataFlow:: Node operation
431+ ) {
432+ exists ( DataFlow:: Node source , Instruction i |
433+ InvalidPointerToDerefFlow:: flow ( pragma [ only_bind_into ] ( source ) ,
434+ pragma [ only_bind_into ] ( derefSink ) ) and
435+ invalidPointerToDerefSource ( _, pai , source , _) and
436+ isInvalidPointerDerefSink ( derefSink , i , _, _) and
437+ i = getASuccessor ( derefSink .asInstruction ( ) ) and
438+ operation .asInstruction ( ) = i
439+ )
449440}
450441
451- class PathNode1 extends MergedPathNode , TPathNode1 {
452- override string toString ( ) {
453- exists ( AllocToInvalidPointerFlow :: PathNode1 p |
454- this = TPathNode1 ( p ) and
455- result = p . toString ( )
456- )
457- }
458-
459- override predicate hasLocationInfo (
460- string filepath , int startline , int startcolumn , int endline , int endcolumn
461- ) {
462- this . asPathNode1 ( ) . hasLocationInfo ( filepath , startline , startcolumn , endline , endcolumn )
463- }
464- }
442+ /**
443+ * A configuration that represents the full dataflow path all the way from
444+ * the allocation to the dereference. We need this final dataflow traversal
445+ * to ensure that the transition from the sink in `AllocToInvalidPointerConfig`
446+ * to the source in `InvalidPointerToDerefFlow` did not make us construct an
447+ * infeasible path (which can happen since the transition from one configuration
448+ * to the next does not preserve information about call contexts).
449+ */
450+ module FinalConfig implements DataFlow :: StateConfigSig {
451+ newtype FlowState =
452+ additional TInitial ( ) or
453+ additional TPointerArith ( PointerArithmeticInstruction pai ) {
454+ invalidPointerToDerefSource ( _ , pai , _ , _ )
455+ }
465456
466- class PathNode3 extends MergedPathNode , TPathNode3 {
467- override string toString ( ) {
468- exists ( InvalidPointerToDerefFlow :: PathNode p |
469- this = TPathNode3 ( p ) and
470- result = p . toString ( )
457+ predicate isSource ( DataFlow :: Node source , FlowState state ) {
458+ state = TInitial ( ) and
459+ exists ( DataFlow :: Node derefSource |
460+ invalidPointerToDerefSource ( source , _ , derefSource , _ ) and
461+ InvalidPointerToDerefFlow :: flow ( derefSource , _ )
471462 )
472463 }
473464
474- override predicate hasLocationInfo (
475- string filepath , int startline , int startcolumn , int endline , int endcolumn
476- ) {
477- this .asPathNode3 ( ) .hasLocationInfo ( filepath , startline , startcolumn , endline , endcolumn )
478- }
479- }
480-
481- class PathSinkNode extends MergedPathNode , TPathNodeSink {
482- override string toString ( ) {
483- exists ( Instruction i |
484- this = TPathNodeSink ( i ) and
485- result = i .toString ( )
465+ predicate isSink ( DataFlow:: Node sink , FlowState state ) {
466+ exists ( PointerArithmeticInstruction pai |
467+ derefSinkToOperation ( _, pai , sink ) and state = TPointerArith ( pai )
486468 )
487469 }
488470
489- override predicate hasLocationInfo (
490- string filepath , int startline , int startcolumn , int endline , int endcolumn
471+ predicate isAdditionalFlowStep (
472+ DataFlow :: Node node1 , FlowState state1 , DataFlow :: Node node2 , FlowState state2
491473 ) {
492- this .asSinkNode ( )
493- .getLocation ( )
494- .hasLocationInfo ( filepath , startline , startcolumn , endline , endcolumn )
474+ // A step from the left-hand side of a pointer-arithmetic operation that has been
475+ // identified as creating an out-of-bounds pointer to the result of the pointer-arithmetic
476+ // operation.
477+ exists (
478+ PointerArithmeticInstruction pai , AllocToInvalidPointerFlow:: PathNode1 p1 ,
479+ InvalidPointerToDerefFlow:: PathNode p2
480+ |
481+ isSinkImpl ( pai , node1 , _, _) and
482+ invalidPointerToDerefSource ( _, pai , node2 , _) and
483+ node1 = p1 .getNode ( ) and
484+ node2 = p2 .getNode ( ) and
485+ state1 = TInitial ( ) and
486+ state2 = TPointerArith ( pai )
487+ )
488+ or
489+ // A step from an out-of-bounds address to the operation (which is either a `StoreInstruction`
490+ // or a `LoadInstruction`) that dereferences the address.
491+ // This step exists purely for aesthetic reasons: we want the alert to be placed at the operation
492+ // that causes the dereference, and not at the address that flows into the operation.
493+ state1 = state2 and
494+ exists ( DataFlow:: Node derefSource , PointerArithmeticInstruction pai |
495+ InvalidPointerToDerefFlow:: flow ( derefSource , node1 ) and
496+ invalidPointerToDerefSource ( _, pai , derefSource , _) and
497+ state1 = TPointerArith ( pai ) and
498+ derefSinkToOperation ( node1 , pai , node2 )
499+ )
495500 }
496501}
497502
498- query predicate edges ( MergedPathNode node1 , MergedPathNode node2 ) {
499- node1 .asPathNode1 ( ) .getASuccessor ( ) = node2 .asPathNode1 ( )
500- or
501- joinOn1 ( _, node1 .asPathNode1 ( ) , node2 .asPathNode3 ( ) )
502- or
503- node1 .asPathNode3 ( ) .getASuccessor ( ) = node2 .asPathNode3 ( )
504- or
505- joinOn2 ( node1 .asPathNode3 ( ) , node2 .asSinkNode ( ) , _, _)
506- }
507-
508- query predicate nodes ( MergedPathNode n , string key , string val ) {
509- AllocToInvalidPointerFlow:: PathGraph1:: nodes ( n .asPathNode1 ( ) , key , val )
510- or
511- InvalidPointerToDerefFlow:: PathGraph:: nodes ( n .asPathNode3 ( ) , key , val )
512- or
513- key = "semmle.label" and val = n .asSinkNode ( ) .toString ( )
514- }
515-
516- query predicate subpaths (
517- MergedPathNode arg , MergedPathNode par , MergedPathNode ret , MergedPathNode out
518- ) {
519- AllocToInvalidPointerFlow:: PathGraph1:: subpaths ( arg .asPathNode1 ( ) , par .asPathNode1 ( ) ,
520- ret .asPathNode1 ( ) , out .asPathNode1 ( ) )
521- or
522- InvalidPointerToDerefFlow:: PathGraph:: subpaths ( arg .asPathNode3 ( ) , par .asPathNode3 ( ) ,
523- ret .asPathNode3 ( ) , out .asPathNode3 ( ) )
524- }
503+ module FinalFlow = DataFlow:: GlobalWithState< FinalConfig > ;
525504
526505/**
527- * Holds if `p1` is a sink of `AllocToInvalidPointerConfig` and `p2` is a source
528- * of `InvalidPointerToDerefConfig`, and they are connected through `pai`.
529- */
530- predicate joinOn1 (
531- PointerArithmeticInstruction pai , AllocToInvalidPointerFlow:: PathNode1 p1 ,
532- InvalidPointerToDerefFlow:: PathNode p2
533- ) {
534- isSinkImpl ( pai , p1 .getNode ( ) , _, _) and
535- invalidPointerToDerefSource ( pai , p2 .getNode ( ) , _)
536- }
537-
538- /**
539- * Holds if `p1` is a sink of `InvalidPointerToDerefConfig` and `i` is the instruction
540- * that dereferences `p1`. The string `operation` describes whether the `i` is
541- * a `StoreInstruction` or `LoadInstruction`.
506+ * Holds if `source` is an allocation that flows into the left-hand side of `pai`, which produces an out-of-bounds
507+ * pointer that flows into an address that is dereferenced by `sink` (which is either a `LoadInstruction` or a
508+ * `StoreInstruction`). The end result is that `sink` writes to an address that is off-by-`delta` from the end of
509+ * the allocation. The string `operation` describes whether the `sink` is a load or a store (which is then used
510+ * to produce the alert message).
511+ *
512+ * Note that multiple `delta`s can exist for a given `(source, pai, sink)` triplet.
542513 */
543- pragma [ inline]
544- predicate joinOn2 ( InvalidPointerToDerefFlow:: PathNode p1 , Instruction i , string operation , int delta ) {
545- isInvalidPointerDerefSink ( p1 .getNode ( ) , i , operation , delta )
546- }
547-
548514predicate hasFlowPath (
549- MergedPathNode source1 , MergedPathNode sink , InvalidPointerToDerefFlow :: PathNode source3 ,
550- PointerArithmeticInstruction pai , string operation , int delta
515+ FinalFlow :: PathNode source , FinalFlow :: PathNode sink , PointerArithmeticInstruction pai ,
516+ string operation , int delta
551517) {
552- exists ( InvalidPointerToDerefFlow:: PathNode sink3 , AllocToInvalidPointerFlow:: PathNode1 sink1 |
553- AllocToInvalidPointerFlow:: flowPath ( source1 .asPathNode1 ( ) , _, sink1 , _) and
554- joinOn1 ( pai , sink1 , source3 ) and
555- InvalidPointerToDerefFlow:: flowPath ( source3 , sink3 ) and
556- joinOn2 ( sink3 , sink .asSinkNode ( ) , operation , delta )
518+ exists (
519+ DataFlow:: Node derefSink , DataFlow:: Node derefSource , int deltaDerefSourceAndPai ,
520+ int deltaDerefSinkAndDerefAddress
521+ |
522+ FinalFlow:: flowPath ( source , sink ) and
523+ sink .getState ( ) = FinalConfig:: TPointerArith ( pai ) and
524+ invalidPointerToDerefSource ( source .getNode ( ) , pai , derefSource , deltaDerefSourceAndPai ) and
525+ InvalidPointerToDerefFlow:: flow ( derefSource , derefSink ) and
526+ derefSinkToOperation ( derefSink , pai , sink .getNode ( ) ) and
527+ isInvalidPointerDerefSink ( derefSink , sink .getNode ( ) .asInstruction ( ) , operation ,
528+ deltaDerefSinkAndDerefAddress ) and
529+ delta = deltaDerefSourceAndPai + deltaDerefSinkAndDerefAddress
557530 )
558531}
559532
560533from
561- MergedPathNode source , MergedPathNode sink , int k , string kstr , PointerArithmeticInstruction pai ,
562- string operation , Expr offset , DataFlow:: Node n
534+ FinalFlow :: PathNode source , FinalFlow :: PathNode sink , int k , string kstr ,
535+ PointerArithmeticInstruction pai , string operation , Expr offset , DataFlow:: Node n
563536where
564- k =
565- min ( int k2 , int k3 , InvalidPointerToDerefFlow:: PathNode source3 |
566- hasFlowPath ( source , sink , source3 , pai , operation , k3 ) and
567- invalidPointerToDerefSource ( pai , source3 .getNode ( ) , k2 )
568- |
569- k2 + k3
570- ) and
537+ k = min ( int cand | hasFlowPath ( source , sink , pai , operation , cand ) ) and
571538 offset = pai .getRight ( ) .getUnconvertedResultExpression ( ) and
572- n = source .asPathNode1 ( ) . getNode ( ) and
539+ n = source .getNode ( ) and
573540 if k = 0 then kstr = "" else kstr = " + " + k
574- select sink , source , sink ,
541+ select sink . getNode ( ) , source , sink ,
575542 "This " + operation + " might be out of bounds, as the pointer might be equal to $@ + $@" + kstr +
576543 "." , n , n .toString ( ) , offset , offset .toString ( )
0 commit comments