1313 */
1414
1515import cpp
16+ import semmle.code.cpp.controlflow.Guards
1617
1718class WideCharPointerType extends PointerType {
1819 WideCharPointerType ( ) { this .getBaseType ( ) instanceof WideCharType }
1920}
2021
22+ /**
23+ * Recurse through types to find any intermediate type or final type
24+ * that suggests the type is unlikely to be a string.
25+ * Specifically looking for any unsigned character, or datatype with name containing "byte"
26+ * or datatype uint8_t.
27+ */
28+ predicate hasIntermediateType ( Type cur , Type targ ) {
29+ cur = targ
30+ or
31+ hasIntermediateType ( cur .( DerivedType ) .getBaseType ( ) , targ )
32+ or
33+ hasIntermediateType ( cur .( TypedefType ) .getBaseType ( ) , targ )
34+ }
35+
2136/**
2237 * A type that may also be `CharPointerType`, but that are likely used as arbitrary buffers.
2338 */
2439class UnlikelyToBeAStringType extends Type {
2540 UnlikelyToBeAStringType ( ) {
26- this .( PointerType ) .getBaseType ( ) .( CharType ) .isUnsigned ( ) or
27- this .( PointerType ) .getBaseType ( ) .getName ( ) .toLowerCase ( ) .matches ( "%byte" ) or
28- this .getName ( ) .toLowerCase ( ) .matches ( "%byte" ) or
29- this .( PointerType ) .getBaseType ( ) .hasName ( "uint8_t" )
41+ exists ( Type targ |
42+ targ .( CharType ) .isUnsigned ( ) or
43+ targ .getName ( ) .toLowerCase ( ) .matches ( [ "uint8_t" , "%byte%" ] )
44+ |
45+ hasIntermediateType ( this , targ )
46+ )
3047 }
3148}
3249
50+ // Types that can be wide depending on the UNICODE macro
51+ // see https://learn.microsoft.com/en-us/windows/win32/winprog/windows-data-types
52+ class UnicodeMacroDependentWidthType extends Type {
53+ UnicodeMacroDependentWidthType ( ) {
54+ exists ( Type targ |
55+ targ .getName ( ) in [
56+ "LPCTSTR" ,
57+ "LPTSTR" ,
58+ "PCTSTR" ,
59+ "PTSTR" ,
60+ "TBYTE" ,
61+ "TCHAR"
62+ ]
63+ |
64+ hasIntermediateType ( this , targ )
65+ )
66+ }
67+ }
68+
69+ class UnicodeMacro extends Macro {
70+ UnicodeMacro ( ) { this .getName ( ) .toLowerCase ( ) .matches ( "%unicode%" ) }
71+ }
72+
73+ class UnicodeMacroInvocation extends MacroInvocation {
74+ UnicodeMacroInvocation ( ) { this .getMacro ( ) instanceof UnicodeMacro }
75+ }
76+
77+ /**
78+ * Holds when a expression whose type is UnicodeMacroDependentWidthType and
79+ * is observed to be guarded by a check involving a bitwise-and operation
80+ * with a UnicodeMacroInvocation.
81+ * Such expressions are assumed to be checked dynamically, i.e.,
82+ * the flag would indicate if UNICODE typing is set correctly to allow
83+ * or disallow a widening cast.
84+ */
85+ predicate isLikelyDynamicChecked ( Expr e , GuardCondition gc ) {
86+ e .getType ( ) instanceof UnicodeMacroDependentWidthType and
87+ exists ( BitwiseAndExpr bai , UnicodeMacroInvocation umi | bai .getAnOperand ( ) = umi .getExpr ( ) |
88+ // bai == 0 is false when reaching `e.getBasicBlock()`.
89+ // That is, bai != 0 when reaching `e.getBasicBlock()`.
90+ gc .ensuresEq ( bai , 0 , e .getBasicBlock ( ) , false )
91+ or
92+ // bai == k and k != 0 is true when reaching `e.getBasicBlock()`.
93+ gc .ensuresEq ( bai , any ( int k | k != 0 ) , e .getBasicBlock ( ) , true )
94+ )
95+ }
96+
3397from Expr e1 , Cast e2
3498where
3599 e2 = e1 .getConversion ( ) and
@@ -42,7 +106,11 @@ where
42106 not e1 .getType ( ) instanceof UnlikelyToBeAStringType and
43107 // Avoid castings from 'new' expressions as typically these will be safe
44108 // Example: `__Type* ret = reinterpret_cast<__Type*>(New(m_pmo) char[num * sizeof(__Type)]);`
45- not exists ( NewOrNewArrayExpr newExpr | newExpr .getAChild * ( ) = e1 )
109+ not exists ( NewOrNewArrayExpr newExpr | newExpr .getAChild * ( ) = e1 ) and
110+ // Avoid cases where the cast is guarded by a check to determine if
111+ // unicode encoding is enabled in such a way to disallow the dangerous cast
112+ // at runtime.
113+ not isLikelyDynamicChecked ( e1 , _)
46114select e1 ,
47115 "Conversion from " + e1 .getType ( ) .toString ( ) + " to " + e2 .getType ( ) .toString ( ) +
48116 ". Use of invalid string can lead to undefined behavior."
0 commit comments