Skip to content

Commit fef08af

Browse files
committed
Python: Remove points-to to from ControlFlowNode
Moves the existing points-to predicates to the newly added class `ControlFlowNodeWithPointsTo` which resides in the `LegacyPointsTo` module. (Existing code that uses these predicates should import this module, and references to `ControlFlowNode` should be changed to `ControlFlowNodeWithPointsTo`.) Also updates all existing points-to based code to do just this.
1 parent 4461be1 commit fef08af

File tree

75 files changed

+410
-236
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

75 files changed

+410
-236
lines changed

python/ql/examples/snippets/store_none.ql

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,10 @@
1010
*/
1111

1212
import python
13+
private import LegacyPointsTo
1314

1415
from SubscriptNode store
1516
where
1617
store.isStore() and
17-
store.getIndex().pointsTo(Value::named("None"))
18+
store.getIndex().(ControlFlowNodeWithPointsTo).pointsTo(Value::named("None"))
1819
select store

python/ql/lib/LegacyPointsTo.qll

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
/**
2+
* DEPRECATED: Using the methods in this module may lead to a degradation of performance. Use at
3+
* your own peril.
4+
*
5+
* This module contains legacy points-to predicates and methods for various classes in the
6+
* points-to analysis.
7+
*
8+
* Existing code that depends on, say, points-to predicates on `ControlFlowNode` should be modified
9+
* to use `ControlFlowNodeWithPointsTo` instead. In particular, if inside a method call chain such
10+
* as
11+
*
12+
* `someCallNode.getFunction().pointsTo(...)`
13+
*
14+
* an explicit cast should be added as follows
15+
*
16+
* `someCallNode.getFunction().(ControlFlowNodeWithPointsTo).pointsTo(...)`
17+
*
18+
* Similarly, if a bound variable has type `ControlFlowNode`, and a points-to method is called on
19+
* it, the type should be changed to `ControlFlowNodeWithPointsTo`.
20+
*/
21+
22+
private import python
23+
private import semmle.python.pointsto.PointsTo
24+
25+
/**
26+
* An extension of `ControlFlowNode` that provides points-to predicates.
27+
*/
28+
class ControlFlowNodeWithPointsTo extends ControlFlowNode {
29+
/** Gets the value that this ControlFlowNode points-to. */
30+
predicate pointsTo(Value value) { this.pointsTo(_, value, _) }
31+
32+
/** Gets the value that this ControlFlowNode points-to. */
33+
Value pointsTo() { this.pointsTo(_, result, _) }
34+
35+
/** Gets a value that this ControlFlowNode may points-to. */
36+
Value inferredValue() { this.pointsTo(_, result, _) }
37+
38+
/** Gets the value and origin that this ControlFlowNode points-to. */
39+
predicate pointsTo(Value value, ControlFlowNode origin) { this.pointsTo(_, value, origin) }
40+
41+
/** Gets the value and origin that this ControlFlowNode points-to, given the context. */
42+
predicate pointsTo(Context context, Value value, ControlFlowNode origin) {
43+
PointsTo::pointsTo(this, context, value, origin)
44+
}
45+
46+
/**
47+
* Gets what this flow node might "refer-to". Performs a combination of localized (intra-procedural) points-to
48+
* analysis and global module-level analysis. This points-to analysis favours precision over recall. It is highly
49+
* precise, but may not provide information for a significant number of flow-nodes.
50+
* If the class is unimportant then use `refersTo(value)` or `refersTo(value, origin)` instead.
51+
*/
52+
pragma[nomagic]
53+
predicate refersTo(Object obj, ClassObject cls, ControlFlowNode origin) {
54+
this.refersTo(_, obj, cls, origin)
55+
}
56+
57+
/** Gets what this expression might "refer-to" in the given `context`. */
58+
pragma[nomagic]
59+
predicate refersTo(Context context, Object obj, ClassObject cls, ControlFlowNode origin) {
60+
not obj = unknownValue() and
61+
not cls = theUnknownType() and
62+
PointsTo::points_to(this, context, obj, cls, origin)
63+
}
64+
65+
/**
66+
* Whether this flow node might "refer-to" to `value` which is from `origin`
67+
* Unlike `this.refersTo(value, _, origin)` this predicate includes results
68+
* where the class cannot be inferred.
69+
*/
70+
pragma[nomagic]
71+
predicate refersTo(Object obj, ControlFlowNode origin) {
72+
not obj = unknownValue() and
73+
PointsTo::points_to(this, _, obj, _, origin)
74+
}
75+
76+
/** Equivalent to `this.refersTo(value, _)` */
77+
predicate refersTo(Object obj) { this.refersTo(obj, _) }
78+
79+
/**
80+
* Check whether this control-flow node has complete points-to information.
81+
* This would mean that the analysis managed to infer an over approximation
82+
* of possible values at runtime.
83+
*/
84+
predicate hasCompletePointsToSet() {
85+
// If the tracking failed, then `this` will be its own "origin". In that
86+
// case, we want to exclude nodes for which there is also a different
87+
// origin, as that would indicate that some paths failed and some did not.
88+
this.refersTo(_, _, this) and
89+
not exists(ControlFlowNode other | other != this and this.refersTo(_, _, other))
90+
or
91+
// If `this` is a use of a variable, then we must have complete points-to
92+
// for that variable.
93+
exists(SsaVariable v | v.getAUse() = this | varHasCompletePointsToSet(v))
94+
}
95+
}
96+
97+
/**
98+
* Check whether a SSA variable has complete points-to information.
99+
* This would mean that the analysis managed to infer an overapproximation
100+
* of possible values at runtime.
101+
*/
102+
private predicate varHasCompletePointsToSet(SsaVariable var) {
103+
// Global variables may be modified non-locally or concurrently.
104+
not var.getVariable() instanceof GlobalVariable and
105+
(
106+
// If we have complete points-to information on the definition of
107+
// this variable, then the variable has complete information.
108+
var.getDefinition()
109+
.(DefinitionNode)
110+
.getValue()
111+
.(ControlFlowNodeWithPointsTo)
112+
.hasCompletePointsToSet()
113+
or
114+
// If this variable is a phi output, then we have complete
115+
// points-to information about it if all phi inputs had complete
116+
// information.
117+
forex(SsaVariable phiInput | phiInput = var.getAPhiInput() |
118+
varHasCompletePointsToSet(phiInput)
119+
)
120+
)
121+
}

python/ql/lib/analysis/DefinitionTracking.qll

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
*/
44

55
import python
6+
private import LegacyPointsTo
67
import semmle.python.pointsto.PointsTo
78
import IDEContextual
89

@@ -36,22 +37,22 @@ private predicate jump_to_defn(ControlFlowNode use, Definition defn) {
3637
)
3738
or
3839
exists(PythonModuleObject mod |
39-
use.(ImportExprNode).refersTo(mod) and
40+
use.(ImportExprNode).(ControlFlowNodeWithPointsTo).refersTo(mod) and
4041
defn.getAstNode() = mod.getModule()
4142
)
4243
or
4344
exists(PythonModuleObject mod, string name |
44-
use.(ImportMemberNode).getModule(name).refersTo(mod) and
45+
use.(ImportMemberNode).getModule(name).(ControlFlowNodeWithPointsTo).refersTo(mod) and
4546
scope_jump_to_defn_attribute(mod.getModule(), name, defn)
4647
)
4748
or
4849
exists(PackageObject package |
49-
use.(ImportExprNode).refersTo(package) and
50+
use.(ImportExprNode).(ControlFlowNodeWithPointsTo).refersTo(package) and
5051
defn.getAstNode() = package.getInitModule().getModule()
5152
)
5253
or
5354
exists(PackageObject package, string name |
54-
use.(ImportMemberNode).getModule(name).refersTo(package) and
55+
use.(ImportMemberNode).getModule(name).(ControlFlowNodeWithPointsTo).refersTo(package) and
5556
scope_jump_to_defn_attribute(package.getInitModule().getModule(), name, defn)
5657
)
5758
or
@@ -230,7 +231,7 @@ private predicate module_and_name_for_import_star_helper(
230231
ModuleObject mod, string name, ImportStarNode im_star, ImportStarRefinement def
231232
) {
232233
im_star = def.getDefiningNode() and
233-
im_star.getModule().refersTo(mod) and
234+
im_star.getModule().(ControlFlowNodeWithPointsTo).refersTo(mod) and
234235
name = def.getSourceVariable().getName()
235236
}
236237

@@ -239,7 +240,7 @@ pragma[noinline]
239240
private predicate variable_not_redefined_by_import_star(EssaVariable var, ImportStarRefinement def) {
240241
var = def.getInput() and
241242
exists(ModuleObject mod |
242-
def.getDefiningNode().(ImportStarNode).getModule().refersTo(mod) and
243+
def.getDefiningNode().(ImportStarNode).getModule().(ControlFlowNodeWithPointsTo).refersTo(mod) and
243244
not mod.exports(var.getSourceVariable().getName())
244245
)
245246
}
@@ -352,7 +353,9 @@ private predicate scope_jump_to_defn_attribute(ImportTimeScope s, string name, D
352353
)
353354
}
354355

355-
private predicate jump_to_defn_attribute(ControlFlowNode use, string name, Definition defn) {
356+
private predicate jump_to_defn_attribute(
357+
ControlFlowNodeWithPointsTo use, string name, Definition defn
358+
) {
356359
/* Local attribute */
357360
exists(EssaVariable var |
358361
use = var.getASourceUse() and
@@ -367,7 +370,7 @@ private predicate jump_to_defn_attribute(ControlFlowNode use, string name, Defin
367370
/* Super attributes */
368371
exists(AttrNode f, SuperBoundMethod sbm, Object function |
369372
use = f.getObject(name) and
370-
f.refersTo(sbm) and
373+
f.(ControlFlowNodeWithPointsTo).refersTo(sbm) and
371374
function = sbm.getFunction(_) and
372375
function.getOrigin() = defn.getAstNode()
373376
)
@@ -408,7 +411,7 @@ private predicate attribute_assignment_jump_to_defn_attribute(
408411
private predicate sets_attribute(ArgumentRefinement def, string name) {
409412
exists(CallNode call |
410413
call = def.getDefiningNode() and
411-
call.getFunction().refersTo(Object::builtin("setattr")) and
414+
call.getFunction().(ControlFlowNodeWithPointsTo).refersTo(Object::builtin("setattr")) and
412415
def.getInput().getAUse() = call.getArg(0) and
413416
call.getArg(1).getNode().(StringLiteral).getText() = name
414417
)

python/ql/lib/semmle/python/Exprs.qll

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import python
2+
private import LegacyPointsTo
23
private import semmle.python.pointsto.PointsTo
34
private import semmle.python.objects.ObjectInternal
45
private import semmle.python.internal.CachedStages
@@ -71,7 +72,9 @@ class Expr extends Expr_, AstNode {
7172
* Gets what this expression might "refer-to" in the given `context`.
7273
*/
7374
predicate refersTo(Context context, Object obj, ClassObject cls, AstNode origin) {
74-
this.getAFlowNode().refersTo(context, obj, cls, origin.getAFlowNode())
75+
this.getAFlowNode()
76+
.(ControlFlowNodeWithPointsTo)
77+
.refersTo(context, obj, cls, origin.getAFlowNode())
7578
}
7679

7780
/**
@@ -82,7 +85,7 @@ class Expr extends Expr_, AstNode {
8285
*/
8386
pragma[nomagic]
8487
predicate refersTo(Object obj, AstNode origin) {
85-
this.getAFlowNode().refersTo(obj, origin.getAFlowNode())
88+
this.getAFlowNode().(ControlFlowNodeWithPointsTo).refersTo(obj, origin.getAFlowNode())
8689
}
8790

8891
/**
@@ -96,14 +99,16 @@ class Expr extends Expr_, AstNode {
9699
* in the given `context`.
97100
*/
98101
predicate pointsTo(Context context, Value value, AstNode origin) {
99-
this.getAFlowNode().pointsTo(context, value, origin.getAFlowNode())
102+
this.getAFlowNode()
103+
.(ControlFlowNodeWithPointsTo)
104+
.pointsTo(context, value, origin.getAFlowNode())
100105
}
101106

102107
/**
103108
* Holds if this expression might "point-to" to `value` which is from `origin`.
104109
*/
105110
predicate pointsTo(Value value, AstNode origin) {
106-
this.getAFlowNode().pointsTo(value, origin.getAFlowNode())
111+
this.getAFlowNode().(ControlFlowNodeWithPointsTo).pointsTo(value, origin.getAFlowNode())
107112
}
108113

109114
/**

python/ql/lib/semmle/python/Flow.qll

Lines changed: 0 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import python
2-
private import semmle.python.pointsto.PointsTo
32
private import semmle.python.internal.CachedStages
43
private import codeql.controlflow.BasicBlock as BB
54

@@ -144,56 +143,6 @@ class ControlFlowNode extends @py_flow_node {
144143
/** Whether this flow node is the first in its scope */
145144
predicate isEntryNode() { py_scope_flow(this, _, -1) }
146145

147-
/** Gets the value that this ControlFlowNode points-to. */
148-
predicate pointsTo(Value value) { this.pointsTo(_, value, _) }
149-
150-
/** Gets the value that this ControlFlowNode points-to. */
151-
Value pointsTo() { this.pointsTo(_, result, _) }
152-
153-
/** Gets a value that this ControlFlowNode may points-to. */
154-
Value inferredValue() { this.pointsTo(_, result, _) }
155-
156-
/** Gets the value and origin that this ControlFlowNode points-to. */
157-
predicate pointsTo(Value value, ControlFlowNode origin) { this.pointsTo(_, value, origin) }
158-
159-
/** Gets the value and origin that this ControlFlowNode points-to, given the context. */
160-
predicate pointsTo(Context context, Value value, ControlFlowNode origin) {
161-
PointsTo::pointsTo(this, context, value, origin)
162-
}
163-
164-
/**
165-
* Gets what this flow node might "refer-to". Performs a combination of localized (intra-procedural) points-to
166-
* analysis and global module-level analysis. This points-to analysis favours precision over recall. It is highly
167-
* precise, but may not provide information for a significant number of flow-nodes.
168-
* If the class is unimportant then use `refersTo(value)` or `refersTo(value, origin)` instead.
169-
*/
170-
pragma[nomagic]
171-
predicate refersTo(Object obj, ClassObject cls, ControlFlowNode origin) {
172-
this.refersTo(_, obj, cls, origin)
173-
}
174-
175-
/** Gets what this expression might "refer-to" in the given `context`. */
176-
pragma[nomagic]
177-
predicate refersTo(Context context, Object obj, ClassObject cls, ControlFlowNode origin) {
178-
not obj = unknownValue() and
179-
not cls = theUnknownType() and
180-
PointsTo::points_to(this, context, obj, cls, origin)
181-
}
182-
183-
/**
184-
* Whether this flow node might "refer-to" to `value` which is from `origin`
185-
* Unlike `this.refersTo(value, _, origin)` this predicate includes results
186-
* where the class cannot be inferred.
187-
*/
188-
pragma[nomagic]
189-
predicate refersTo(Object obj, ControlFlowNode origin) {
190-
not obj = unknownValue() and
191-
PointsTo::points_to(this, _, obj, _, origin)
192-
}
193-
194-
/** Equivalent to `this.refersTo(value, _)` */
195-
predicate refersTo(Object obj) { this.refersTo(obj, _) }
196-
197146
/** Gets the basic block containing this flow node */
198147
BasicBlock getBasicBlock() { result.contains(this) }
199148

@@ -259,23 +208,6 @@ class ControlFlowNode extends @py_flow_node {
259208
)
260209
}
261210

262-
/**
263-
* Check whether this control-flow node has complete points-to information.
264-
* This would mean that the analysis managed to infer an over approximation
265-
* of possible values at runtime.
266-
*/
267-
predicate hasCompletePointsToSet() {
268-
// If the tracking failed, then `this` will be its own "origin". In that
269-
// case, we want to exclude nodes for which there is also a different
270-
// origin, as that would indicate that some paths failed and some did not.
271-
this.refersTo(_, _, this) and
272-
not exists(ControlFlowNode other | other != this and this.refersTo(_, _, other))
273-
or
274-
// If `this` is a use of a variable, then we must have complete points-to
275-
// for that variable.
276-
exists(SsaVariable v | v.getAUse() = this | varHasCompletePointsToSet(v))
277-
}
278-
279211
/** Whether this strictly dominates other. */
280212
pragma[inline]
281213
predicate strictlyDominates(ControlFlowNode other) {
@@ -332,28 +264,6 @@ private class AnyNode extends ControlFlowNode {
332264
override AstNode getNode() { result = super.getNode() }
333265
}
334266

335-
/**
336-
* Check whether a SSA variable has complete points-to information.
337-
* This would mean that the analysis managed to infer an overapproximation
338-
* of possible values at runtime.
339-
*/
340-
private predicate varHasCompletePointsToSet(SsaVariable var) {
341-
// Global variables may be modified non-locally or concurrently.
342-
not var.getVariable() instanceof GlobalVariable and
343-
(
344-
// If we have complete points-to information on the definition of
345-
// this variable, then the variable has complete information.
346-
var.getDefinition().(DefinitionNode).getValue().hasCompletePointsToSet()
347-
or
348-
// If this variable is a phi output, then we have complete
349-
// points-to information about it if all phi inputs had complete
350-
// information.
351-
forex(SsaVariable phiInput | phiInput = var.getAPhiInput() |
352-
varHasCompletePointsToSet(phiInput)
353-
)
354-
)
355-
}
356-
357267
/** A control flow node corresponding to a call expression, such as `func(...)` */
358268
class CallNode extends ControlFlowNode {
359269
CallNode() { toAst(this) instanceof Call }

python/ql/lib/semmle/python/Metrics.qll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import python
2+
private import LegacyPointsTo
23

34
/** The metrics for a function */
45
class FunctionMetrics extends Function {
@@ -59,7 +60,7 @@ class FunctionMetrics extends Function {
5960
not non_coupling_method(result) and
6061
exists(Call call | call.getScope() = this |
6162
exists(FunctionObject callee | callee.getFunction() = result |
62-
call.getAFlowNode().getFunction().refersTo(callee)
63+
call.getAFlowNode().getFunction().(ControlFlowNodeWithPointsTo).refersTo(callee)
6364
)
6465
or
6566
exists(Attribute a | call.getFunc() = a |

0 commit comments

Comments
 (0)