@@ -9,6 +9,83 @@ import semmle.code.cpp.models.interfaces.FormattingFunction
99private import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
1010private import semmle.code.cpp.rangeanalysis.RangeAnalysisUtils
1111
12+ private newtype TBufferWriteEstimationReason =
13+ TNoSpecifiedEstimateReason ( ) or
14+ TTypeBoundsAnalysis ( ) or
15+ TValueFlowAnalysis ( )
16+
17+ /**
18+ * A reason for a specific buffer write size estimate.
19+ */
20+ abstract class BufferWriteEstimationReason extends TBufferWriteEstimationReason {
21+ /**
22+ * Returns the name of the concrete class.
23+ */
24+ abstract string toString ( ) ;
25+
26+ /**
27+ * Returns a human readable representation of this reason.
28+ */
29+ abstract string getDescription ( ) ;
30+
31+ /**
32+ * Combine estimate reasons. Used to give a reason for the size of a format string
33+ * conversion given reasons coming from its individual specifiers.
34+ */
35+ abstract BufferWriteEstimationReason combineWith ( BufferWriteEstimationReason other ) ;
36+ }
37+
38+ /**
39+ * No particular reason given. This is currently used for backward compatibility so that
40+ * classes derived from BufferWrite and overriding `getMaxData/0` still work with the
41+ * queries as intended.
42+ */
43+ class NoSpecifiedEstimateReason extends BufferWriteEstimationReason , TNoSpecifiedEstimateReason {
44+ override string toString ( ) { result = "NoSpecifiedEstimateReason" }
45+
46+ override string getDescription ( ) { result = "no reason specified" }
47+
48+ override BufferWriteEstimationReason combineWith ( BufferWriteEstimationReason other ) {
49+ // this reason should not be used in format specifiers, so it should not be combined
50+ // with other reasons
51+ none ( )
52+ }
53+ }
54+
55+ /**
56+ * The estimation comes from rough bounds just based on the type (e.g.
57+ * `0 <= x < 2^32` for an unsigned 32 bit integer).
58+ */
59+ class TypeBoundsAnalysis extends BufferWriteEstimationReason , TTypeBoundsAnalysis {
60+ override string toString ( ) { result = "TypeBoundsAnalysis" }
61+
62+ override string getDescription ( ) { result = "based on type bounds" }
63+
64+ override BufferWriteEstimationReason combineWith ( BufferWriteEstimationReason other ) {
65+ other != TNoSpecifiedEstimateReason ( ) and result = TTypeBoundsAnalysis ( )
66+ }
67+ }
68+
69+ /**
70+ * The estimation comes from non trivial bounds found via actual flow analysis.
71+ * For example
72+ * ```
73+ * unsigned u = x;
74+ * if (u < 1000) {
75+ * //... <- estimation done here based on u
76+ * }
77+ * ```
78+ */
79+ class ValueFlowAnalysis extends BufferWriteEstimationReason , TValueFlowAnalysis {
80+ override string toString ( ) { result = "ValueFlowAnalysis" }
81+
82+ override string getDescription ( ) { result = "based on flow analysis of value bounds" }
83+
84+ override BufferWriteEstimationReason combineWith ( BufferWriteEstimationReason other ) {
85+ other != TNoSpecifiedEstimateReason ( ) and result = other
86+ }
87+ }
88+
1289class PrintfFormatAttribute extends FormatAttribute {
1390 PrintfFormatAttribute ( ) { this .getArchetype ( ) = [ "printf" , "__printf__" ] }
1491}
@@ -990,7 +1067,14 @@ class FormatLiteral extends Literal {
9901067 * conversion specifier of this format string; has no result if this cannot
9911068 * be determined.
9921069 */
993- int getMaxConvertedLength ( int n ) {
1070+ int getMaxConvertedLength ( int n ) { result = max ( getMaxConvertedLength ( n , _) ) }
1071+
1072+ /**
1073+ * Gets the maximum length of the string that can be produced by the nth
1074+ * conversion specifier of this format string, specifying the estimation reason;
1075+ * has no result if this cannot be determined.
1076+ */
1077+ int getMaxConvertedLength ( int n , BufferWriteEstimationReason reason ) {
9941078 exists ( int len |
9951079 (
9961080 (
@@ -1002,10 +1086,12 @@ class FormatLiteral extends Literal {
10021086 ) and
10031087 (
10041088 this .getConversionChar ( n ) = "%" and
1005- len = 1
1089+ len = 1 and
1090+ reason = TValueFlowAnalysis ( )
10061091 or
10071092 this .getConversionChar ( n ) .toLowerCase ( ) = "c" and
1008- len = 1 // e.g. 'a'
1093+ len = 1 and
1094+ reason = TValueFlowAnalysis ( ) // e.g. 'a'
10091095 or
10101096 this .getConversionChar ( n ) .toLowerCase ( ) = "f" and
10111097 exists ( int dot , int afterdot |
@@ -1019,7 +1105,8 @@ class FormatLiteral extends Literal {
10191105 afterdot = 6
10201106 ) and
10211107 len = 1 + 309 + dot + afterdot
1022- ) // e.g. -1e308="-100000"...
1108+ ) and
1109+ reason = TTypeBoundsAnalysis ( ) // e.g. -1e308="-100000"...
10231110 or
10241111 this .getConversionChar ( n ) .toLowerCase ( ) = "e" and
10251112 exists ( int dot , int afterdot |
@@ -1033,7 +1120,8 @@ class FormatLiteral extends Literal {
10331120 afterdot = 6
10341121 ) and
10351122 len = 1 + 1 + dot + afterdot + 1 + 1 + 3
1036- ) // -1e308="-1.000000e+308"
1123+ ) and
1124+ reason = TTypeBoundsAnalysis ( ) // -1e308="-1.000000e+308"
10371125 or
10381126 this .getConversionChar ( n ) .toLowerCase ( ) = "g" and
10391127 exists ( int dot , int afterdot |
@@ -1056,67 +1144,80 @@ class FormatLiteral extends Literal {
10561144 // (e.g. 123456, 0.000123456 are just OK)
10571145 // so case %f can be at most P characters + 4 zeroes, sign, dot = P + 6
10581146 len = ( afterdot .maximum ( 1 ) + 6 ) .maximum ( 1 + 1 + dot + afterdot + 1 + 1 + 3 )
1059- ) // (e.g. "-1.59203e-319")
1147+ ) and
1148+ reason = TTypeBoundsAnalysis ( ) // (e.g. "-1.59203e-319")
10601149 or
10611150 this .getConversionChar ( n ) .toLowerCase ( ) = [ "d" , "i" ] and
10621151 // e.g. -2^31 = "-2147483648"
1063- len =
1064- min ( float cand |
1065- // The first case handles length sub-specifiers
1066- // Subtract one in the exponent because one bit is for the sign .
1067- // Add 1 to account for the possible sign in the output.
1068- cand = 1 + lengthInBase10 ( 2 .pow ( this .getIntegralDisplayType ( n ) .getSize ( ) * 8 - 1 ) )
1069- or
1070- // The second case uses range analysis to deduce a length that's shorter than the length
1071- // of the number -2^31.
1072- exists ( Expr arg , float lower , float upper |
1073- arg = this . getUse ( ) . getConversionArgument ( n ) and
1074- lower = lowerBound ( arg .getFullyConverted ( ) ) and
1075- upper = upperBound ( arg .getFullyConverted ( ) )
1076- |
1077- cand =
1078- max ( int cand0 |
1079- // Include the sign bit in the length if it can be negative
1080- (
1081- if lower < 0
1082- then cand0 = 1 + lengthInBase10 ( lower . abs ( ) )
1083- else cand0 = lengthInBase10 ( lower )
1084- )
1085- or
1086- (
1087- if upper < 0
1088- then cand0 = 1 + lengthInBase10 ( upper . abs ( ) )
1089- else cand0 = lengthInBase10 ( upper )
1090- )
1152+ exists ( float typeBasedBound , float valueBasedBound |
1153+ // The first case handles length sub-specifiers
1154+ // Subtract one in the exponent because one bit is for the sign.
1155+ // Add 1 to account for the possible sign in the output .
1156+ typeBasedBound =
1157+ 1 + lengthInBase10 ( 2 .pow ( this .getIntegralDisplayType ( n ) .getSize ( ) * 8 - 1 ) ) and
1158+ // The second case uses range analysis to deduce a length that's shorter than the length
1159+ // of the number -2^31.
1160+ exists ( Expr arg , float lower , float upper , float typeLower , float typeUpper |
1161+ arg = this . getUse ( ) . getConversionArgument ( n ) and
1162+ lower = lowerBound ( arg . getFullyConverted ( ) ) and
1163+ upper = upperBound ( arg .getFullyConverted ( ) ) and
1164+ typeLower = exprMinVal ( arg .getFullyConverted ( ) ) and
1165+ typeUpper = exprMaxVal ( arg . getFullyConverted ( ) )
1166+ |
1167+ valueBasedBound =
1168+ max ( int cand |
1169+ // Include the sign bit in the length if it can be negative
1170+ (
1171+ if lower < 0
1172+ then cand = 1 + lengthInBase10 ( lower . abs ( ) )
1173+ else cand = lengthInBase10 ( lower )
1174+ )
1175+ or
1176+ (
1177+ if upper < 0
1178+ then cand = 1 + lengthInBase10 ( upper . abs ( ) )
1179+ else cand = lengthInBase10 ( upper )
10911180 )
1181+ ) and
1182+ (
1183+ if lower > typeLower or upper < typeUpper
1184+ then reason = TValueFlowAnalysis ( )
1185+ else reason = TTypeBoundsAnalysis ( )
10921186 )
1093- )
1187+ ) and
1188+ len = valueBasedBound .minimum ( typeBasedBound )
1189+ )
10941190 or
10951191 this .getConversionChar ( n ) .toLowerCase ( ) = "u" and
10961192 // e.g. 2^32 - 1 = "4294967295"
1097- len =
1098- min ( float cand |
1099- // The first case handles length sub-specifiers
1100- cand = 2 .pow ( this .getIntegralDisplayType ( n ) .getSize ( ) * 8 )
1101- or
1102- // The second case uses range analysis to deduce a length that's shorter than
1103- // the length of the number 2^31 - 1.
1104- exists ( Expr arg , float lower |
1105- arg = this .getUse ( ) .getConversionArgument ( n ) and
1106- lower = lowerBound ( arg .getFullyConverted ( ) )
1107- |
1108- cand =
1109- max ( float cand0 |
1193+ exists ( float typeBasedBound , float valueBasedBound |
1194+ // The first case handles length sub-specifiers
1195+ typeBasedBound = lengthInBase10 ( 2 .pow ( this .getIntegralDisplayType ( n ) .getSize ( ) * 8 ) - 1 ) and
1196+ // The second case uses range analysis to deduce a length that's shorter than
1197+ // the length of the number 2^31 - 1.
1198+ exists ( Expr arg , float lower , float upper , float typeLower , float typeUpper |
1199+ arg = this .getUse ( ) .getConversionArgument ( n ) and
1200+ lower = lowerBound ( arg .getFullyConverted ( ) ) and
1201+ upper = upperBound ( arg .getFullyConverted ( ) ) and
1202+ typeLower = exprMinVal ( arg .getFullyConverted ( ) ) and
1203+ typeUpper = exprMaxVal ( arg .getFullyConverted ( ) )
1204+ |
1205+ valueBasedBound =
1206+ lengthInBase10 ( max ( float cand |
11101207 // If lower can be negative we use `(unsigned)-1` as the candidate value.
11111208 lower < 0 and
1112- cand0 = 2 .pow ( any ( IntType t | t .isUnsigned ( ) ) .getSize ( ) * 8 )
1209+ cand = 2 .pow ( any ( IntType t | t .isUnsigned ( ) ) .getSize ( ) * 8 )
11131210 or
1114- cand0 = upperBound ( arg .getFullyConverted ( ) )
1115- )
1211+ cand = upper
1212+ ) ) and
1213+ (
1214+ if lower > typeLower or upper < typeUpper
1215+ then reason = TValueFlowAnalysis ( )
1216+ else reason = TTypeBoundsAnalysis ( )
11161217 )
1117- |
1118- lengthInBase10 ( cand )
1119- )
1218+ ) and
1219+ len = valueBasedBound . minimum ( typeBasedBound )
1220+ )
11201221 or
11211222 this .getConversionChar ( n ) .toLowerCase ( ) = "x" and
11221223 // e.g. "12345678"
@@ -1135,7 +1236,8 @@ class FormatLiteral extends Literal {
11351236 (
11361237 if this .hasAlternateFlag ( n ) then len = 2 + baseLen else len = baseLen // "0x"
11371238 )
1138- )
1239+ ) and
1240+ reason = TTypeBoundsAnalysis ( )
11391241 or
11401242 this .getConversionChar ( n ) .toLowerCase ( ) = "p" and
11411243 exists ( PointerType ptrType , int baseLen |
@@ -1144,7 +1246,8 @@ class FormatLiteral extends Literal {
11441246 (
11451247 if this .hasAlternateFlag ( n ) then len = 2 + baseLen else len = baseLen // "0x"
11461248 )
1147- )
1249+ ) and
1250+ reason = TValueFlowAnalysis ( )
11481251 or
11491252 this .getConversionChar ( n ) .toLowerCase ( ) = "o" and
11501253 // e.g. 2^32 - 1 = "37777777777"
@@ -1163,14 +1266,16 @@ class FormatLiteral extends Literal {
11631266 (
11641267 if this .hasAlternateFlag ( n ) then len = 1 + baseLen else len = baseLen // "0"
11651268 )
1166- )
1269+ ) and
1270+ reason = TTypeBoundsAnalysis ( )
11671271 or
11681272 this .getConversionChar ( n ) .toLowerCase ( ) = "s" and
11691273 len =
11701274 min ( int v |
11711275 v = this .getPrecision ( n ) or
11721276 v = this .getUse ( ) .getFormatArgument ( n ) .( AnalysedString ) .getMaxLength ( ) - 1 // (don't count null terminator)
1173- )
1277+ ) and
1278+ reason = TValueFlowAnalysis ( )
11741279 )
11751280 )
11761281 }
@@ -1182,10 +1287,19 @@ class FormatLiteral extends Literal {
11821287 * determining whether a buffer overflow is caused by long float to string
11831288 * conversions.
11841289 */
1185- int getMaxConvertedLengthLimited ( int n ) {
1290+ int getMaxConvertedLengthLimited ( int n ) { result = max ( getMaxConvertedLengthLimited ( n , _) ) }
1291+
1292+ /**
1293+ * Gets the maximum length of the string that can be produced by the nth
1294+ * conversion specifier of this format string, specifying the reason for the
1295+ * estimation, except that float to string conversions are assumed to be 8
1296+ * characters. This is helpful for determining whether a buffer overflow is
1297+ * caused by long float to string conversions.
1298+ */
1299+ int getMaxConvertedLengthLimited ( int n , BufferWriteEstimationReason reason ) {
11861300 if this .getConversionChar ( n ) .toLowerCase ( ) = "f"
1187- then result = this .getMaxConvertedLength ( n ) .minimum ( 8 )
1188- else result = this .getMaxConvertedLength ( n )
1301+ then result = this .getMaxConvertedLength ( n , reason ) .minimum ( 8 )
1302+ else result = this .getMaxConvertedLength ( n , reason )
11891303 }
11901304
11911305 /**
@@ -1225,35 +1339,60 @@ class FormatLiteral extends Literal {
12251339 )
12261340 }
12271341
1228- private int getMaxConvertedLengthAfter ( int n ) {
1342+ private int getMaxConvertedLengthAfter ( int n , BufferWriteEstimationReason reason ) {
12291343 if n = this .getNumConvSpec ( )
1230- then result = this .getConstantSuffix ( ) .length ( ) + 1
1344+ then result = this .getConstantSuffix ( ) .length ( ) + 1 and reason = TValueFlowAnalysis ( )
12311345 else
1232- result =
1233- this .getConstantPart ( n ) .length ( ) + this .getMaxConvertedLength ( n ) +
1234- this .getMaxConvertedLengthAfter ( n + 1 )
1346+ exists ( BufferWriteEstimationReason headReason , BufferWriteEstimationReason tailReason |
1347+ result =
1348+ this .getConstantPart ( n ) .length ( ) + this .getMaxConvertedLength ( n , headReason ) +
1349+ this .getMaxConvertedLengthAfter ( n + 1 , tailReason ) and
1350+ reason = headReason .combineWith ( tailReason )
1351+ )
12351352 }
12361353
1237- private int getMaxConvertedLengthAfterLimited ( int n ) {
1354+ private int getMaxConvertedLengthAfterLimited ( int n , BufferWriteEstimationReason reason ) {
12381355 if n = this .getNumConvSpec ( )
1239- then result = this .getConstantSuffix ( ) .length ( ) + 1
1356+ then result = this .getConstantSuffix ( ) .length ( ) + 1 and reason = TValueFlowAnalysis ( )
12401357 else
1241- result =
1242- this .getConstantPart ( n ) .length ( ) + this .getMaxConvertedLengthLimited ( n ) +
1243- this .getMaxConvertedLengthAfterLimited ( n + 1 )
1358+ exists ( BufferWriteEstimationReason headReason , BufferWriteEstimationReason tailReason |
1359+ result =
1360+ this .getConstantPart ( n ) .length ( ) + this .getMaxConvertedLengthLimited ( n , headReason ) +
1361+ this .getMaxConvertedLengthAfterLimited ( n + 1 , tailReason ) and
1362+ reason = headReason .combineWith ( tailReason )
1363+ )
12441364 }
12451365
12461366 /**
12471367 * Gets the maximum length of the string that can be produced by this format
12481368 * string. Has no result if this cannot be determined.
12491369 */
1250- int getMaxConvertedLength ( ) { result = this .getMaxConvertedLengthAfter ( 0 ) }
1370+ int getMaxConvertedLength ( ) { result = this .getMaxConvertedLengthAfter ( 0 , _ ) }
12511371
12521372 /**
12531373 * Gets the maximum length of the string that can be produced by this format
12541374 * string, except that float to string conversions are assumed to be 8
12551375 * characters. This is helpful for determining whether a buffer overflow
12561376 * is caused by long float to string conversions.
12571377 */
1258- int getMaxConvertedLengthLimited ( ) { result = this .getMaxConvertedLengthAfterLimited ( 0 ) }
1378+ int getMaxConvertedLengthLimited ( ) { result = this .getMaxConvertedLengthAfterLimited ( 0 , _) }
1379+
1380+ /**
1381+ * Gets the maximum length of the string that can be produced by this format
1382+ * string, specifying the reason for the estimate. Has no result if no estimate
1383+ * can be found.
1384+ */
1385+ int getMaxConvertedLengthWithReason ( BufferWriteEstimationReason reason ) {
1386+ result = this .getMaxConvertedLengthAfter ( 0 , reason )
1387+ }
1388+
1389+ /**
1390+ * Gets the maximum length of the string that can be produced by this format
1391+ * string, specifying the reason for the estimate, except that float to string
1392+ * conversions are assumed to be 8 characters. This is helpful for determining
1393+ * whether a buffer overflow is caused by long float to string conversions.
1394+ */
1395+ int getMaxConvertedLengthLimitedWithReason ( BufferWriteEstimationReason reason ) {
1396+ result = this .getMaxConvertedLengthAfterLimited ( 0 , reason )
1397+ }
12591398}
0 commit comments