@@ -6,6 +6,8 @@ import semmle.code.cpp.Type
66import semmle.code.cpp.commons.CommonType
77import semmle.code.cpp.commons.StringAnalysis
88import semmle.code.cpp.models.interfaces.FormattingFunction
9+ private import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
10+ private import semmle.code.cpp.rangeanalysis.RangeAnalysisUtils
911
1012class PrintfFormatAttribute extends FormatAttribute {
1113 PrintfFormatAttribute ( ) { this .getArchetype ( ) = [ "printf" , "__printf__" ] }
@@ -268,6 +270,18 @@ class FormattingFunctionCall extends Expr {
268270 }
269271}
270272
273+ /**
274+ * Gets the number of digits required to represent the integer represented by `f`.
275+ *
276+ * `f` is assumed to be nonnegative.
277+ */
278+ bindingset [ f]
279+ private int lengthInBase10 ( float f ) {
280+ f = 0 and result = 1
281+ or
282+ result = f .log10 ( ) .floor ( ) + 1
283+ }
284+
271285/**
272286 * A class to represent format strings that occur as arguments to invocations of formatting functions.
273287 */
@@ -1046,39 +1060,63 @@ class FormatLiteral extends Literal {
10461060 or
10471061 this .getConversionChar ( n ) .toLowerCase ( ) = [ "d" , "i" ] and
10481062 // e.g. -2^31 = "-2147483648"
1049- exists ( int sizeBits |
1050- sizeBits =
1051- min ( int bits |
1052- bits = this .getIntegralDisplayType ( n ) .getSize ( ) * 8
1053- or
1054- exists ( IntegralType t |
1055- t = this .getUse ( ) .getConversionArgument ( n ) .getType ( ) .getUnderlyingType ( )
1056- |
1057- t .isSigned ( ) and bits = t .getSize ( ) * 8
1058- )
1059- ) and
1060- len = 1 + ( ( sizeBits - 1 ) / 10.0 .log2 ( ) ) .ceil ( )
1061- // this calculation is as %u (below) only we take out the sign bit (- 1) and allow a whole
1062- // character for it to be expressed as '-'.
1063- )
1063+ len =
1064+ min ( float cand |
1065+ // The first case handles length sub-specifiers
1066+ // Subtract one in the exponent because one bit is for the sign.
1067+ // Add 1 to account for the possible sign in the output.
1068+ cand = 1 + lengthInBase10 ( 2 .pow ( this .getIntegralDisplayType ( n ) .getSize ( ) * 8 - 1 ) )
1069+ or
1070+ // The second case uses range analysis to deduce a length that's shorter than the length
1071+ // of the number -2^31.
1072+ exists ( Expr arg , float lower , float upper |
1073+ arg = this .getUse ( ) .getConversionArgument ( n ) and
1074+ lower = lowerBound ( arg .getFullyConverted ( ) ) and
1075+ upper = upperBound ( arg .getFullyConverted ( ) )
1076+ |
1077+ cand =
1078+ max ( int cand0 |
1079+ // Include the sign bit in the length if it can be negative
1080+ (
1081+ if lower < 0
1082+ then cand0 = 1 + lengthInBase10 ( lower .abs ( ) )
1083+ else cand0 = lengthInBase10 ( lower )
1084+ )
1085+ or
1086+ (
1087+ if upper < 0
1088+ then cand0 = 1 + lengthInBase10 ( upper .abs ( ) )
1089+ else cand0 = lengthInBase10 ( upper )
1090+ )
1091+ )
1092+ )
1093+ )
10641094 or
10651095 this .getConversionChar ( n ) .toLowerCase ( ) = "u" and
10661096 // e.g. 2^32 - 1 = "4294967295"
1067- exists ( int sizeBits |
1068- sizeBits =
1069- min ( int bits |
1070- bits = this .getIntegralDisplayType ( n ) .getSize ( ) * 8
1071- or
1072- exists ( IntegralType t |
1073- t = this .getUse ( ) .getConversionArgument ( n ) .getType ( ) .getUnderlyingType ( )
1074- |
1075- t .isUnsigned ( ) and bits = t .getSize ( ) * 8
1076- )
1077- ) and
1078- len = ( sizeBits / 10.0 .log2 ( ) ) .ceil ( )
1079- // convert the size from bits to decimal characters, and round up as you can't have
1080- // fractional characters (10.0.log2() is the number of bits expressed per decimal character)
1081- )
1097+ len =
1098+ min ( float cand |
1099+ // The first case handles length sub-specifiers
1100+ cand = 2 .pow ( this .getIntegralDisplayType ( n ) .getSize ( ) * 8 )
1101+ or
1102+ // The second case uses range analysis to deduce a length that's shorter than
1103+ // the length of the number 2^31 - 1.
1104+ exists ( Expr arg , float lower |
1105+ arg = this .getUse ( ) .getConversionArgument ( n ) and
1106+ lower = lowerBound ( arg .getFullyConverted ( ) )
1107+ |
1108+ cand =
1109+ max ( float cand0 |
1110+ // If lower can be negative we use `(unsigned)-1` as the candidate value.
1111+ lower < 0 and
1112+ cand0 = 2 .pow ( any ( IntType t | t .isUnsigned ( ) ) .getSize ( ) * 8 )
1113+ or
1114+ cand0 = upperBound ( arg .getFullyConverted ( ) )
1115+ )
1116+ )
1117+ |
1118+ lengthInBase10 ( cand )
1119+ )
10821120 or
10831121 this .getConversionChar ( n ) .toLowerCase ( ) = "x" and
10841122 // e.g. "12345678"
0 commit comments