Silence wrong-import-position

gsnedders · gsnedders · commit e5d395c28c73 · 2016-05-21T00:29:35.000+01:00
diff --git a/html5lib/serializer/htmlserializer.py b/html5lib/serializer/htmlserializer.py
@@ -3,6 +3,8 @@
 
 import re
 
+from codecs import register_error, xmlcharrefreplace_errors
+
 from ..constants import voidElements, booleanAttributes, spaceCharacters
 from ..constants import rcdataElements, entities, xmlEntities
 from .. import utils
@@ -21,61 +23,54 @@
                                   "\u2008\u2009\u200a\u2028\u2029\u202f\u205f"
                                   "\u3000]")
 
-try:
-    from codecs import register_error, xmlcharrefreplace_errors
-except ImportError:
-    unicode_encode_errors = "strict"
-else:
-    unicode_encode_errors = "htmlentityreplace"
-
-    encode_entity_map = {}
-    is_ucs4 = len("\U0010FFFF") == 1
-    for k, v in list(entities.items()):
-        # skip multi-character entities
-        if ((is_ucs4 and len(v) > 1) or
-                (not is_ucs4 and len(v) > 2)):
-            continue
-        if v != "&":
-            if len(v) == 2:
-                v = utils.surrogatePairToCodepoint(v)
-            else:
-                v = ord(v)
-            if v not in encode_entity_map or k.islower():
-                # prefer &lt; over &LT; and similarly for &amp;, &gt;, etc.
-                encode_entity_map[v] = k
-
-    def htmlentityreplace_errors(exc):
-        if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)):
-            res = []
-            codepoints = []
-            skip = False
-            for i, c in enumerate(exc.object[exc.start:exc.end]):
-                if skip:
-                    skip = False
-                    continue
-                index = i + exc.start
-                if utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]):
-                    codepoint = utils.surrogatePairToCodepoint(exc.object[index:index + 2])
-                    skip = True
-                else:
-                    codepoint = ord(c)
-                codepoints.append(codepoint)
-            for cp in codepoints:
-                e = encode_entity_map.get(cp)
-                if e:
-                    res.append("&")
-                    res.append(e)
-                    if not e.endswith(";"):
-                        res.append(";")
-                else:
-                    res.append("&#x%s;" % (hex(cp)[2:]))
-            return ("".join(res), exc.end)
-        else:
-            return xmlcharrefreplace_errors(exc)
 
-    register_error(unicode_encode_errors, htmlentityreplace_errors)
+encode_entity_map = {}
+is_ucs4 = len("\U0010FFFF") == 1
+for k, v in list(entities.items()):
+    # skip multi-character entities
+    if ((is_ucs4 and len(v) > 1) or
+            (not is_ucs4 and len(v) > 2)):
+        continue
+    if v != "&":
+        if len(v) == 2:
+            v = utils.surrogatePairToCodepoint(v)
+        else:
+            v = ord(v)
+        if v not in encode_entity_map or k.islower():
+            # prefer &lt; over &LT; and similarly for &amp;, &gt;, etc.
+            encode_entity_map[v] = k
+
+
+def htmlentityreplace_errors(exc):
+    if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)):
+        res = []
+        codepoints = []
+        skip = False
+        for i, c in enumerate(exc.object[exc.start:exc.end]):
+            if skip:
+                skip = False
+                continue
+            index = i + exc.start
+            if utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]):
+                codepoint = utils.surrogatePairToCodepoint(exc.object[index:index + 2])
+                skip = True
+            else:
+                codepoint = ord(c)
+            codepoints.append(codepoint)
+        for cp in codepoints:
+            e = encode_entity_map.get(cp)
+            if e:
+                res.append("&")
+                res.append(e)
+                if not e.endswith(";"):
+                    res.append(";")
+            else:
+                res.append("&#x%s;" % (hex(cp)[2:]))
+        return ("".join(res), exc.end)
+    else:
+        return xmlcharrefreplace_errors(exc)
 
-    del register_error
+register_error("htmlentityreplace", htmlentityreplace_errors)
 
 
 class HTMLSerializer(object):
@@ -168,7 +163,7 @@ def __init__(self, **kwargs):
     def encode(self, string):
         assert(isinstance(string, text_type))
         if self.encoding:
-            return string.encode(self.encoding, unicode_encode_errors)
+            return string.encode(self.encoding, "htmlentityreplace")
         else:
             return string
 
diff --git a/html5lib/tests/support.py b/html5lib/tests/support.py
@@ -1,5 +1,7 @@
 from __future__ import absolute_import, division, unicode_literals
 
+# pylint:disable=wrong-import-position
+
 import os
 import sys
 import codecs
@@ -68,6 +70,8 @@
         "walker": treewalkers.getTreeWalker("genshi")
     }
 
+# pylint:enable=wrong-import-position
+
 
 def get_data_files(subdirectory, files='*.dat', search_dir=test_dir):
     return sorted(glob.glob(os.path.join(search_dir, subdirectory, files)))
diff --git a/html5lib/tests/test_encoding.py b/html5lib/tests/test_encoding.py
@@ -55,6 +55,7 @@ def test_encoding():
             yield (runParserEncodingTest, test[b'data'], test[b'encoding'])
             yield (runPreScanEncodingTest, test[b'data'], test[b'encoding'])
 
+# pylint:disable=wrong-import-position
 try:
     try:
         import charade  # noqa
@@ -67,3 +68,4 @@ def test_chardet():
         with open(os.path.join(test_dir, "encoding", "chardet", "test_big5.txt"), "rb") as fp:
             encoding = inputstream.HTMLInputStream(fp.read()).charEncoding
             assert encoding[0].name == "big5"
+# pylint:enable=wrong-import-position
diff --git a/html5lib/tests/test_serializer.py b/html5lib/tests/test_serializer.py
@@ -12,13 +12,15 @@
 from html5lib.serializer import HTMLSerializer, serialize
 from html5lib.treewalkers._base import TreeWalker
 
+# pylint:disable=wrong-import-position
 optionals_loaded = []
 
 try:
     from lxml import etree
     optionals_loaded.append("lxml")
 except ImportError:
     pass
+# pylint:enable=wrong-import-position
 
 default_namespace = constants.namespaces["html"]
 
diff --git a/html5lib/tokenizer.py b/html5lib/tokenizer.py
@@ -1,9 +1,6 @@
 from __future__ import absolute_import, division, unicode_literals
 
-try:
-    chr = unichr  # noqa
-except NameError:
-    pass
+from six import unichr as chr
 
 from collections import deque
 
diff --git a/html5lib/trie/__init__.py b/html5lib/trie/__init__.py
@@ -4,9 +4,11 @@
 
 Trie = PyTrie
 
+# pylint:disable=wrong-import-position
 try:
     from .datrie import Trie as DATrie
 except ImportError:
     pass
 else:
     Trie = DATrie
+# pylint:enable=wrong-import-position