@@ -69,6 +69,8 @@ module Lxml {
6969 */
7070 class XPathCall extends XML:: XPathExecution:: Range , DataFlow:: CallCfgNode {
7171 XPathCall ( ) {
72+ // TODO: lxml.etree.parseid(<text>)[0] will contain the root element from parsing <text>
73+ // but we don't really have a way to model that nicely.
7274 this = [ Element:: instance ( ) , ElementTree:: instance ( ) ] .getMember ( "xpath" ) .getACall ( )
7375 }
7476
@@ -201,9 +203,10 @@ module Lxml {
201203 * A call to either of:
202204 * - `lxml.etree.fromstring`
203205 * - `lxml.etree.fromstringlist`
204- * -
206+ * - `lxml.etree.HTML`
205207 * - `lxml.etree.XML`
206208 * - `lxml.etree.XMLID`
209+ * - `lxml.etree.XMLDTDID`
207210 * - `lxml.etree.parse`
208211 * - `lxml.etree.parseid`
209212 *
@@ -329,7 +332,7 @@ module Lxml {
329332 * calls, or a special parameter that will be set when functions are called by an external
330333 * library.
331334 *
332- * Use the predicate `Element::instance()` to get references to instances of `lxml.etree.ElementTree ` instances.
335+ * Use the predicate `Element::instance()` to get references to instances of `lxml.etree.Element ` instances.
333336 */
334337 abstract class InstanceSource instanceof API:: Node {
335338 /** Gets a textual representation of this element. */
@@ -354,7 +357,8 @@ module Lxml {
354357 etreeRef ( ) .getMember ( "get_default_parser" ) .getReturn ( )
355358 ] .getMember ( "close" ) .getReturn ( )
356359 or
357- // TODO: `XMLID` and `parseid` returns a tuple of which the first element is an `Element`
360+ // TODO: `XMLID`, `XMLDTDID`, `parseid` returns a tuple of which the first element is an `Element`.
361+ // `iterparse` returns an iterator of tuples, each of which has a second element that is an `Element`.
358362 this = etreeRef ( ) .getMember ( [ "XML" , "HTML" , "fromstring" , "fromstringlist" ] ) .getReturn ( )
359363 }
360364 }
@@ -393,15 +397,18 @@ module Lxml {
393397 }
394398 }
395399
396- /** An additional taint step from an `Element` instance. See https://lxml.de/apidoc/lxml.etree.html#lxml.etree.ElementBase */
400+ /**
401+ * An additional taint step from an `Element` instance.
402+ * See https://lxml.de/apidoc/lxml.etree.html#lxml.etree.ElementBase.
403+ */
397404 private class ElementTaintStep extends TaintTracking:: AdditionalTaintStep {
398405 override predicate step ( DataFlow:: Node nodeFrom , DataFlow:: Node nodeTo ) {
399406 exists ( DataFlow:: MethodCallNode call |
400407 nodeTo = call and instance ( ) .asSource ( ) .flowsTo ( nodeFrom )
401408 |
402409 call .calls ( nodeFrom ,
403- // We consider a node to be tainted if there could be taint anywhere in the element tree
404- // So sibling nodes (e.g. `getnext`) are also tainted
410+ // We consider a node to be tainted if there could be taint anywhere in the element tree;
411+ // So sibling nodes (e.g. `getnext`) are also tainted.
405412 // This ensures nodes like `elem[0].getnext()` are tracked.
406413 [
407414 "cssselect" , "find" , "findall" , "findtext" , "get" , "getchildren" , "getiterator" ,
@@ -445,7 +452,7 @@ module Lxml {
445452 ElementTreeInstance ( ) { this = classRef ( ) .getAnInstance ( ) }
446453 }
447454
448- /** The result of a parst operation that returns an `ElementTree` */
455+ /** The result of a parst operation that returns an `ElementTree`. */
449456 private class ParseResult extends InstanceSource {
450457 ParseResult ( ) { this = etreeRef ( ) .getMember ( "parse" ) .getReturn ( ) }
451458 }
0 commit comments