1313 */
1414
1515import cpp
16+ import semmle.code.cpp.controlflow.Guards
1617
1718class WideCharPointerType extends PointerType {
1819 WideCharPointerType ( ) { this .getBaseType ( ) instanceof WideCharType }
1920}
2021
22+ /**
23+ * Given type `t`, recurses through and returns all
24+ * intermediate base types, including `t`.
25+ */
26+ Type getABaseType ( Type t ) {
27+ result = t
28+ or
29+ result = getABaseType ( t .( DerivedType ) .getBaseType ( ) )
30+ or
31+ result = getABaseType ( t .( TypedefType ) .getBaseType ( ) )
32+ }
33+
2134/**
2235 * A type that may also be `CharPointerType`, but that are likely used as arbitrary buffers.
2336 */
2437class UnlikelyToBeAStringType extends Type {
2538 UnlikelyToBeAStringType ( ) {
26- this .( PointerType ) .getBaseType ( ) .( CharType ) .isUnsigned ( ) or
27- this .( PointerType ) .getBaseType ( ) .getName ( ) .toLowerCase ( ) .matches ( "%byte" ) or
28- this .getName ( ) .toLowerCase ( ) .matches ( "%byte" ) or
29- this .( PointerType ) .getBaseType ( ) .hasName ( "uint8_t" )
39+ exists ( Type targ | getABaseType ( this ) = targ |
40+ // NOTE: not using CharType isUnsigned, but rather look for any explicitly declared unsigned
41+ // char types. Assuming these are used for buffers, not strings.
42+ targ .( CharType ) .getName ( ) .toLowerCase ( ) .matches ( "unsigned%" ) or
43+ targ .getName ( ) .toLowerCase ( ) .matches ( [ "uint8_t" , "%byte%" ] )
44+ )
3045 }
3146}
3247
48+ // Types that can be wide depending on the UNICODE macro
49+ // see https://learn.microsoft.com/en-us/windows/win32/winprog/windows-data-types
50+ class UnicodeMacroDependentWidthType extends Type {
51+ UnicodeMacroDependentWidthType ( ) {
52+ exists ( Type targ | getABaseType ( this ) = targ |
53+ targ .getName ( ) in [
54+ "LPCTSTR" ,
55+ "LPTSTR" ,
56+ "PCTSTR" ,
57+ "PTSTR" ,
58+ "TBYTE" ,
59+ "TCHAR"
60+ ]
61+ )
62+ }
63+ }
64+
65+ class UnicodeMacro extends Macro {
66+ UnicodeMacro ( ) { this .getName ( ) .toLowerCase ( ) .matches ( "%unicode%" ) }
67+ }
68+
69+ class UnicodeMacroInvocation extends MacroInvocation {
70+ UnicodeMacroInvocation ( ) { this .getMacro ( ) instanceof UnicodeMacro }
71+ }
72+
73+ /**
74+ * Holds when a expression whose type is UnicodeMacroDependentWidthType and
75+ * is observed to be guarded by a check involving a bitwise-and operation
76+ * with a UnicodeMacroInvocation.
77+ * Such expressions are assumed to be checked dynamically, i.e.,
78+ * the flag would indicate if UNICODE typing is set correctly to allow
79+ * or disallow a widening cast.
80+ */
81+ predicate isLikelyDynamicallyChecked ( Expr e ) {
82+ e .getType ( ) instanceof UnicodeMacroDependentWidthType and
83+ exists ( GuardCondition gc , BitwiseAndExpr bai , UnicodeMacroInvocation umi |
84+ bai .getAnOperand ( ) = umi .getExpr ( )
85+ |
86+ // bai == 0 is false when reaching `e.getBasicBlock()`.
87+ // That is, bai != 0 when reaching `e.getBasicBlock()`.
88+ gc .ensuresEq ( bai , 0 , e .getBasicBlock ( ) , false )
89+ or
90+ // bai == k and k != 0 is true when reaching `e.getBasicBlock()`.
91+ gc .ensuresEq ( bai , any ( int k | k != 0 ) , e .getBasicBlock ( ) , true )
92+ )
93+ }
94+
3395from Expr e1 , Cast e2
3496where
3597 e2 = e1 .getConversion ( ) and
@@ -42,7 +104,11 @@ where
42104 not e1 .getType ( ) instanceof UnlikelyToBeAStringType and
43105 // Avoid castings from 'new' expressions as typically these will be safe
44106 // Example: `__Type* ret = reinterpret_cast<__Type*>(New(m_pmo) char[num * sizeof(__Type)]);`
45- not exists ( NewOrNewArrayExpr newExpr | newExpr .getAChild * ( ) = e1 )
107+ not exists ( NewOrNewArrayExpr newExpr | newExpr .getAChild * ( ) = e1 ) and
108+ // Avoid cases where the cast is guarded by a check to determine if
109+ // unicode encoding is enabled in such a way to disallow the dangerous cast
110+ // at runtime.
111+ not isLikelyDynamicallyChecked ( e1 )
46112select e1 ,
47113 "Conversion from " + e1 .getType ( ) .toString ( ) + " to " + e2 .getType ( ) .toString ( ) +
48114 ". Use of invalid string can lead to undefined behavior."
0 commit comments