Skip to content

Commit 7eacd87

Browse files
committed
C++: Apply widening based on number of bounds measure
1 parent 70a8c4f commit 7eacd87

File tree

5 files changed

+1443
-392
lines changed

5 files changed

+1443
-392
lines changed

cpp/ql/lib/semmle/code/cpp/rangeanalysis/SimpleRangeAnalysis.qll

Lines changed: 295 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,297 @@ private predicate isRecursiveExpr(Expr e) {
517517
)
518518
}
519519

520+
/**
521+
* Provides predicates that estimate the number of bounds that the range
522+
* analysis might produce.
523+
*/
524+
private module BoundsEstimate {
525+
/**
526+
* Gets the limit beyond which we enable widening. I.e., if the estimated
527+
* number of bounds exceeds this limit, we enable widening such that the limit
528+
* will not be reached.
529+
*/
530+
float getBoundsLimit() {
531+
// This limit is arbitrary, but low enough that it prevents timeouts on
532+
// specific observed customer databases (and the in the tests).
533+
result = 2.0.pow(40)
534+
}
535+
536+
/** Gets the maximum number of bounds possible when widening is used. */
537+
private int getNrOfWideningBounds() {
538+
result =
539+
max(ArithmeticType t | | count(wideningLowerBounds(t)).maximum(count(wideningUpperBounds(t))))
540+
}
541+
542+
/**
543+
* Holds if `boundFromGuard(guard, v, _, branch)` holds, but without
544+
* relying on range analysis (which would cause non-monotonic recursion
545+
* elsewhere).
546+
*/
547+
private predicate hasBoundFromGuard(Expr guard, VariableAccess v, boolean branch) {
548+
exists(Expr lhs | linearAccess(lhs, v, _, _) |
549+
relOpWithSwapAndNegate(guard, lhs, _, _, _, branch)
550+
or
551+
eqOpWithSwapAndNegate(guard, lhs, _, true, branch)
552+
or
553+
eqZeroWithNegate(guard, lhs, true, branch)
554+
)
555+
}
556+
557+
/** Holds if `def` and `v` is a guard phi node with a bound from a guard. */
558+
predicate isGuardPhiWithBound(RangeSsaDefinition def, StackVariable v, VariableAccess access) {
559+
exists(Expr guard, boolean branch |
560+
def.isGuardPhi(v, access, guard, branch) and
561+
hasBoundFromGuard(guard, access, branch)
562+
)
563+
}
564+
565+
/** Gets the number of bounds for `def` and `v` as guard phi node. */
566+
language[monotonicAggregates]
567+
private float nrOfBoundsPhiGuard(RangeSsaDefinition def, StackVariable v) {
568+
// If there's different `access`es, then they refer to the same variable
569+
// with the same lower bounds. Hence adding these guards make no sense (the
570+
// implementation will take the union but they'll be removed by
571+
// deduplication). Hence we use `max` as an approximation.
572+
result =
573+
max(VariableAccess access | isGuardPhiWithBound(def, v, access) | nrOfBoundsExpr(access))
574+
or
575+
def.isPhiNode(v) and
576+
not isGuardPhiWithBound(def, v, _) and
577+
result = 0
578+
}
579+
580+
/** Gets the number of bounds for `def` and `v` as normal phi node. */
581+
language[monotonicAggregates]
582+
private float nrOfBoundsPhiNormal(RangeSsaDefinition def, StackVariable v) {
583+
// The implementation
584+
result =
585+
strictsum(RangeSsaDefinition inputDef |
586+
inputDef = def.getAPhiInput(v)
587+
|
588+
nrOfBoundsDef(inputDef, v)
589+
)
590+
or
591+
def.isPhiNode(v) and
592+
not exists(def.getAPhiInput(v)) and
593+
result = 0
594+
}
595+
596+
/** Gets the number of bounds for `def` and `v` as an NE phi node. */
597+
private float nrOfBoundsNEPhi(RangeSsaDefinition def, StackVariable v) {
598+
exists(VariableAccess access | isNEPhi(v, def, access, _) and result = nrOfBoundsExpr(access))
599+
or
600+
def.isPhiNode(v) and
601+
not isNEPhi(v, def, _, _) and
602+
result = 0
603+
}
604+
605+
/** Gets the number of bounds for `def` and `v` as an unsupported guard phi node. */
606+
private float nrOfBoundsUnsupportedGuardPhi(RangeSsaDefinition def, StackVariable v) {
607+
exists(VariableAccess access |
608+
isUnsupportedGuardPhi(v, def, access) and
609+
result = nrOfBoundsExpr(access)
610+
)
611+
or
612+
def.isPhiNode(v) and
613+
not isUnsupportedGuardPhi(v, def, _) and
614+
result = 0
615+
}
616+
617+
private float nrOfBoundsPhi(RangeSsaDefinition def, StackVariable v) {
618+
// The cases for phi nodes are not mutually exclusive. For instance a phi
619+
// node can be both a guard phi node and a normal phi node. To handle this
620+
// we sum the contributions from the different cases.
621+
result =
622+
nrOfBoundsPhiGuard(def, v) + nrOfBoundsPhiNormal(def, v) + nrOfBoundsNEPhi(def, v) +
623+
nrOfBoundsUnsupportedGuardPhi(def, v) and
624+
result != 0
625+
}
626+
627+
/** Gets the estimated number of bounds for `def` and `v`. */
628+
float nrOfBoundsDef(RangeSsaDefinition def, StackVariable v) {
629+
// Recursive definitions are already widened, so we simply estimate them as
630+
// having the number of widening bounds available. This is crucial as it
631+
// ensures that we don't follow recursive cycles when calculating the
632+
// estimate. Had that not been the case the estimate itself would be at risk
633+
// of causing performance issues and being non-functional.
634+
if isRecursiveDef(def, v)
635+
then result = getNrOfWideningBounds()
636+
else (
637+
// Definitions with a defining value
638+
exists(Expr defExpr | assignmentDef(def, v, defExpr) and result = nrOfBoundsExpr(defExpr))
639+
or
640+
// Assignment operations with a defining value
641+
exists(AssignOperation assignOp |
642+
def = assignOp and
643+
assignOp.getLValue() = v.getAnAccess() and
644+
result = nrOfBoundsExpr(assignOp)
645+
)
646+
or
647+
// Phi nodes
648+
result = nrOfBoundsPhi(def, v)
649+
or
650+
unanalyzableDefBounds(def, v, _, _) and result = 1
651+
)
652+
}
653+
654+
/**
655+
* Gets a naive estimate of the number of bounds for `e`.
656+
*
657+
* The estimate is like an abstract interpretation of the range analysis,
658+
* where the abstract value is the number of bounds. For instance,
659+
* `nrOfBoundsExpr(12) = 1` and `nrOfBoundsExpr(x + y) = nrOfBoundsExpr(x) *
660+
* nrOfBoundsExpr(y)`.
661+
*
662+
* The estimated number of bounds will usually be greater than the actual
663+
* number of bounds, as the estimate can not detect cases where bounds are cut
664+
* down when tracked precisely. For instance, in
665+
* ```c
666+
* int x = 1;
667+
* if (cond) { x = 1; }
668+
* int y = x + x;
669+
* ```
670+
* the actual number of bounds for `y` is 1. However, the estimate will be 4
671+
* as the conditional assignment to `x` gives two bounds for `x` on the last
672+
* line and the addition gives 2 * 2 bounds. There are two sources of anncuracies:
673+
*
674+
* 1. Without tracking the lower bounds we can't see that `x` is assigned a
675+
* value that is equal to its lower bound.
676+
* 2. Had the conditional assignment been `x = 2` then the estimate of two
677+
* bounds for `x` would have been correct. However, the estimate of 4 for `y`
678+
* would still be incorrect. Summing the actual bounds `{1,2}` with itself
679+
* gives `{2,3,4}` which is only three bounds. Again, we can't realise this
680+
* without tracking the bounds.
681+
*
682+
* Since these inaccuracies compound the estimated number of bounds can often
683+
* be _much_ greater than the actual number of bounds. Do note though that the
684+
* estimate is not _guaranteed_ to be an upper bound. In some cases the
685+
* approximations might underestimate the number of bounds.
686+
*
687+
* This predicate is functional. This is crucial as:
688+
*
689+
* - It ensures that the computing the estimate itself is fast.
690+
* - Our use of monotonic aggregates assumes functionality.
691+
*
692+
* Any non-functional case should be considered a bug.
693+
*/
694+
float nrOfBoundsExpr(Expr e) {
695+
// Similarly to what we do for definitions, we do not attempt to measure the
696+
// number of bounds for recursive expressions.
697+
if isRecursiveExpr(e)
698+
then result = getNrOfWideningBounds()
699+
else
700+
if analyzableExpr(e)
701+
then
702+
// The cases here are an abstraction of and mirrors the cases inside
703+
// `getLowerBoundsImpl`/`getUpperBoundsImpl`.
704+
result = 1 and exists(getValue(e).toFloat())
705+
or
706+
exists(Expr operand | result = nrOfBoundsExpr(operand) |
707+
effectivelyMultipliesByPositive(e, operand, _)
708+
or
709+
effectivelyMultipliesByNegative(e, operand, _)
710+
)
711+
or
712+
exists(ConditionalExpr condExpr |
713+
e = condExpr and
714+
result = nrOfBoundsExpr(condExpr.getThen()) * nrOfBoundsExpr(condExpr.getElse())
715+
)
716+
or
717+
exists(BinaryArithmeticOperation binop |
718+
e = binop and
719+
result = nrOfBoundsExpr(binop.getLeftOperand()) * nrOfBoundsExpr(binop.getRightOperand())
720+
|
721+
e instanceof MaxExpr or
722+
e instanceof MinExpr or
723+
e instanceof AddExpr or
724+
e instanceof SubExpr or
725+
e instanceof UnsignedMulExpr
726+
)
727+
or
728+
exists(AssignExpr assign | e = assign and result = nrOfBoundsExpr(assign.getRValue()))
729+
or
730+
exists(AssignArithmeticOperation assignOp |
731+
e = assignOp and
732+
result = nrOfBoundsExpr(assignOp.getLValue()) * nrOfBoundsExpr(assignOp.getRValue())
733+
|
734+
e instanceof AssignAddExpr or
735+
e instanceof AssignSubExpr or
736+
e instanceof UnsignedAssignMulExpr
737+
)
738+
or
739+
// Handles `AssignMulByPositiveConstantExpr` and `AssignMulByNegativeConstantExpr`
740+
exists(AssignMulByConstantExpr mulExpr |
741+
e = mulExpr and
742+
result = nrOfBoundsExpr(mulExpr.getLValue())
743+
)
744+
or
745+
// Handles the prefix and postfix increment and decrement operators.
746+
exists(CrementOperation crementOp |
747+
e = crementOp and result = nrOfBoundsExpr(crementOp.getOperand())
748+
)
749+
or
750+
exists(RemExpr remExpr | e = remExpr | result = nrOfBoundsExpr(remExpr.getLeftOperand()))
751+
or
752+
exists(Conversion convExpr |
753+
e = convExpr and
754+
if convExpr.getUnspecifiedType() instanceof BoolType
755+
then result = 1
756+
else result = nrOfBoundsExpr(convExpr.getExpr())
757+
)
758+
or
759+
exists(RangeSsaDefinition def, StackVariable v |
760+
e = def.getAUse(v) and
761+
result = nrOfBoundsDef(def, v) and
762+
// Avoid returning two numbers when `e` is a use with a constant value.
763+
not exists(getValue(e).toFloat())
764+
)
765+
or
766+
e instanceof UnsignedBitwiseAndExpr and
767+
result = 1
768+
or
769+
exists(RShiftExpr rsExpr |
770+
e = rsExpr and
771+
exists(getValue(rsExpr.getRightOperand().getFullyConverted()).toInt()) and
772+
result = nrOfBoundsExpr(rsExpr.getLeftOperand())
773+
)
774+
else (
775+
exists(exprMinVal(e)) and result = 1
776+
)
777+
}
778+
}
779+
780+
/**
781+
* Holds if `v` is a variable for which widening should be used, as otherwise a
782+
* very large number of bounds might be generated during the range analysis for
783+
* `v`.
784+
*/
785+
private predicate varHasTooManyBounds(StackVariable v) {
786+
exists(RangeSsaDefinition def |
787+
def.getAVariable() = v and
788+
BoundsEstimate::nrOfBoundsDef(def, v) > BoundsEstimate::getBoundsLimit()
789+
)
790+
}
791+
792+
/**
793+
* Holds if `e` is an expression for which widening should be used, as otherwise
794+
* a very large number of bounds might be generated during the range analysis
795+
* for `e`.
796+
*/
797+
private predicate exprHasTooManyBounds(Expr e) {
798+
BoundsEstimate::nrOfBoundsExpr(e) > BoundsEstimate::getBoundsLimit()
799+
or
800+
// A subexpressions of an expression with too many bounds may itself not have
801+
// to many bounds. For instance, `x + y` can have too many bounds without `x`
802+
// having as well. But in these cases, still want to consider `e` as having
803+
// too many bounds since:
804+
// - The overall result is widened anyway, so widening `e` as well is unlikely
805+
// to cause further precision loss.
806+
// - The number of bounds could be very large but still below the arbitrary
807+
// limit. Hence widening `e` can improve performance.
808+
exists(Expr pe | exprHasTooManyBounds(pe) and e.getParent() = pe)
809+
}
810+
520811
/**
521812
* Holds if `binop` is a binary operation that's likely to be assigned a
522813
* quadratic (or more) number of candidate bounds during the analysis. This can
@@ -667,7 +958,7 @@ private float getTruncatedLowerBounds(Expr expr) {
667958
if exprMinVal(expr) <= newLB and newLB <= exprMaxVal(expr)
668959
then
669960
// Apply widening where we might get a combinatorial explosion.
670-
if isRecursiveBinary(expr)
961+
if isRecursiveBinary(expr) or exprHasTooManyBounds(expr)
671962
then result = widenLowerBound(expr.getUnspecifiedType(), newLB)
672963
else result = newLB
673964
else result = exprMinVal(expr)
@@ -721,7 +1012,7 @@ private float getTruncatedUpperBounds(Expr expr) {
7211012
if exprMinVal(expr) <= newUB and newUB <= exprMaxVal(expr)
7221013
then
7231014
// Apply widening where we might get a combinatorial explosion.
724-
if isRecursiveBinary(expr)
1015+
if isRecursiveBinary(expr) or exprHasTooManyBounds(expr)
7251016
then result = widenUpperBound(expr.getUnspecifiedType(), newUB)
7261017
else result = newUB
7271018
else result = exprMaxVal(expr)
@@ -1812,7 +2103,7 @@ module SimpleRangeAnalysisInternal {
18122103
|
18132104
// Widening: check whether the new lower bound is from a source which
18142105
// depends recursively on the current definition.
1815-
if isRecursiveDef(def, v)
2106+
if isRecursiveDef(def, v) or varHasTooManyBounds(v)
18162107
then
18172108
// The new lower bound is from a recursive source, so we round
18182109
// down to one of a limited set of values to prevent the
@@ -1836,7 +2127,7 @@ module SimpleRangeAnalysisInternal {
18362127
|
18372128
// Widening: check whether the new upper bound is from a source which
18382129
// depends recursively on the current definition.
1839-
if isRecursiveDef(def, v)
2130+
if isRecursiveDef(def, v) or varHasTooManyBounds(v)
18402131
then
18412132
// The new upper bound is from a recursive source, so we round
18422133
// up to one of a fixed set of values to prevent the recursion

0 commit comments

Comments
 (0)