Implement CSS immediate children

sortafreel · sortafreel · commit 053f2669eef8 · 2019-06-16T00:27:52.000+03:00
diff --git a/cssselect/parser.py b/cssselect/parser.py
@@ -400,8 +400,8 @@ def parse_simple_selector(stream, inside_negation=False):
     stream.skip_whitespace()
     selector_start = len(stream.used)
     peek = stream.peek()
-    if peek.type == 'IDENT' or peek == ('DELIM', '*') or peek == ('DELIM', '^'):
-        if peek.type == 'IDENT' or peek == ('DELIM', '^'):
+    if peek.type == 'IDENT' or peek == ('DELIM', '*') or peek == ('DELIM', '<'):
+        if peek.type == 'IDENT' or peek == ('DELIM', '<'):
             namespace = stream.next().value
         else:
             stream.next()
diff --git a/cssselect/xpath.py b/cssselect/xpath.py
@@ -187,14 +187,6 @@ def css_to_xpath(self, css, prefix='descendant-or-self::'):
             The equivalent XPath 1.0 expression as an Unicode string.
 
         """
-        # no prefix if css immediate children (example: css "^ > div" to xpath "./div")
-        child_re = r'^[ \t\r\n\f]*\^[ \t\r\n\f]*>'
-        if re.match(child_re, css):
-            prefix = ''
-            # prefix = 'child::'
-            # css = re.sub(child_re, '', css)
-            # print('*' * 50)
-            # print(css)
         return ' | '.join(self.selector_to_xpath(selector, prefix,
                                                  translate_pseudo_elements=True)
                           for selector in parse(css))
@@ -228,7 +220,18 @@ def selector_to_xpath(self, selector, prefix='descendant-or-self::',
         assert isinstance(xpath, self.xpathexpr_cls)  # help debug a missing 'return'
         if translate_pseudo_elements and selector.pseudo_element:
             xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element)
-        return (prefix or '') + _unicode(xpath)
+
+        unicode_xpath = _unicode(xpath)
+        # CSS immediate children (CSS "<> div" to XPath "child::div" or "./div")
+        # Works only at the start of a selector
+        # Needed to get immediate children of a processed selector in Scrapy
+        # product = response.css('.product')
+        # name = product.css('<> div')
+        child_re = r'^[ \t\r\n\f]*\<[ \t\r\n\f]*\/'
+        if re.match(child_re, unicode_xpath):
+            prefix = 'child::'
+            unicode_xpath = re.sub(child_re, '', unicode_xpath)
+        return (prefix or '') + unicode_xpath
 
     def xpath_pseudo_element(self, xpath, pseudo_element):
         """Translate a pseudo-element.
@@ -340,8 +343,8 @@ def xpath_element(self, selector):
         if not element:
             element = '*'
             safe = True
-        if element == '^':
-            element = '.'
+        if element == '<':
+            element = '<'
             safe = True
         else:
             safe = is_safe_name(element)