@@ -65,31 +65,75 @@ private import semmle.python.dataflow.new.internal.DataFlowPrivate
6565 */
6666module ImportResolution {
6767 /**
68- * Holds if the module `m` defines a name `name` by assigning `defn` to it. This is an
69- * overapproximation, as `name` may not in fact be exported (e.g. by defining an `__all__` that does
70- * not include `name`).
68+ * Holds if there is an ESSA step from `defFrom` to `defTo`, which should be allowed
69+ * for import resolution.
70+ */
71+ private predicate allowedEssaImportStep ( EssaDefinition defFrom , EssaDefinition defTo ) {
72+ // to handle definitions guarded by if-then-else
73+ defFrom = defTo .( PhiFunction ) .getAnInput ( )
74+ or
75+ // refined variable
76+ // example: https://github.com/nvbn/thefuck/blob/ceeaeab94b5df5a4fe9d94d61e4f6b0bbea96378/thefuck/utils.py#L25-L45
77+ defFrom = defTo .( EssaNodeRefinement ) .getInput ( ) .getDefinition ( )
78+ }
79+
80+ /**
81+ * Holds if the module `m` defines a name `name` with the value `val`. The value
82+ * represents the value `name` will have at the end of the module (the last place we
83+ * have def-use flow to).
84+ *
85+ * Note: The handling of re-exporting imports is a bit simplistic. We assume that if
86+ * an import is made, it will be re-exported (which will not be the case if a new
87+ * value is assigned to the name, or it is deleted).
7188 */
7289 pragma [ nomagic]
73- predicate module_export ( Module m , string name , DataFlow:: CfgNode defn ) {
74- exists ( EssaVariable v , EssaDefinition essaDef |
75- v .getName ( ) = name and
76- v .getAUse ( ) = ImportStar:: getStarImported * ( m ) .getANormalExit ( ) and
77- (
78- essaDef = v .getDefinition ( )
79- or
80- // to handle definitions guarded by if-then-else
81- essaDef = v .getDefinition ( ) .( PhiFunction ) .getAnInput ( )
82- )
90+ predicate module_export ( Module m , string name , DataFlow:: Node val ) {
91+ // Definitions made inside `m` itself
92+ //
93+ // for code such as `foo = ...; foo.bar = ...` there will be TWO
94+ // EssaDefinition/EssaVariable. One for `foo = ...` (AssignmentDefinition) and one
95+ // for `foo.bar = ...`. The one for `foo.bar = ...` (EssaNodeRefinement). The
96+ // EssaNodeRefinement is the one that will reach the end of the module (normal
97+ // exit).
98+ //
99+ // However, we cannot just use the EssaNodeRefinement as the `val`, because the
100+ // normal data-flow depends on use-use flow, and use-use flow targets CFG nodes not
101+ // EssaNodes. So we need to go back from the EssaDefinition/EssaVariable that
102+ // reaches the end of the module, to the first definition of the variable, and then
103+ // track forwards using use-use flow to find a suitable CFG node that has flow into
104+ // it from use-use flow.
105+ exists ( EssaVariable lastUseVar , EssaVariable firstDef |
106+ lastUseVar .getName ( ) = name and
107+ // we ignore special variable $ introduced by our analysis (not used for anything)
108+ // we ignore special variable * introduced by `from <pkg> import *` -- TODO: understand why we even have this?
109+ not name in [ "$" , "*" ] and
110+ lastUseVar .getAUse ( ) = m .getANormalExit ( ) and
111+ allowedEssaImportStep * ( firstDef , lastUseVar ) and
112+ not allowedEssaImportStep ( _, firstDef )
83113 |
84- defn .getNode ( ) = essaDef .( AssignmentDefinition ) .getValue ( )
114+ not EssaFlow:: defToFirstUse ( firstDef , _) and
115+ val .asVar ( ) = firstDef
85116 or
86- defn .getNode ( ) = essaDef .( ArgumentRefinement ) .getArgument ( )
117+ exists ( ControlFlowNode mid , ControlFlowNode end |
118+ EssaFlow:: defToFirstUse ( firstDef , mid ) and
119+ EssaFlow:: useToNextUse * ( mid , end ) and
120+ not EssaFlow:: useToNextUse ( end , _) and
121+ val .asCfgNode ( ) = end
122+ )
123+ )
124+ or
125+ // re-exports from `from <pkg> import *`
126+ exists ( Module importedFrom |
127+ importedFrom = ImportStar:: getStarImported ( m ) and
128+ module_export ( importedFrom , name , val ) and
129+ potential_module_export ( importedFrom , name )
87130 )
88131 or
132+ // re-exports from `import <pkg>` or `from <pkg> import <stuff>`
89133 exists ( Alias a |
90- defn .asExpr ( ) = [ a .getValue ( ) , a . getValue ( ) . ( ImportMember ) . getModule ( ) ] and
134+ val .asExpr ( ) = a .getValue ( ) and
91135 a .getAsname ( ) .( Name ) .getId ( ) = name and
92- defn .getScope ( ) = m
136+ val .getScope ( ) = m
93137 )
94138 }
95139
@@ -263,9 +307,21 @@ module ImportResolution {
263307 module_reexport ( reexporter , attr_name , m )
264308 )
265309 or
266- // Submodules that are implicitly defined with relative imports of the form `from .foo import ...`.
267- // In practice, we create a definition for each module in a package, even if it is not imported.
310+ // submodules of packages will be available as `<pkg>.<submodule>` after doing
311+ // `import <pkg>.<submodule>` at least once in the program, or can be directly
312+ // imported with `from <pkg> import <submodule>` (even with an empty
313+ // `<pkg>.__init__` file).
314+ //
315+ // Until an import of `<pkg>.<submodule>` is executed, it is technically possible
316+ // that `<pkg>.<submodule>` (or `from <pkg> import <submodule>`) can refer to an
317+ // attribute set in `<pkg>.__init__`.
318+ //
319+ // Therefore, if there is an attribute defined in `<pkg>.__init__` with the same
320+ // name as a submodule, we always consider that this attribute _could_ be a
321+ // reference to the submodule, even if we don't know that the submodule has been
322+ // imported yet.
268323 exists ( string submodule , Module package |
324+ submodule = result .asVar ( ) .getName ( ) and
269325 SsaSource:: init_module_submodule_defn ( result .asVar ( ) .getSourceVariable ( ) ,
270326 package .getEntryNode ( ) ) and
271327 m = getModuleFromName ( package .getPackageName ( ) + "." + submodule )
0 commit comments