From d580449bdfb985ff74d7e614b3bc2cfda481b033 Mon Sep 17 00:00:00 2001
From: Dan Ryan <dan@danryan.co>
Date: Thu, 4 Apr 2019 00:20:45 -0400
Subject: [PATCH 1/8] Add coverage skips for things I didn't change

Signed-off-by: Dan Ryan <dan@danryan.co>
---
 src/backports/os.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/backports/os.py b/src/backports/os.py
index 060fe9e..cbd9541 100644
--- a/src/backports/os.py
+++ b/src/backports/os.py
@@ -42,9 +42,9 @@ def _invalid_utf8_indexes(bytes):
                 # U+0080 - U+07FF - 11 bits
                 c = (((c1 & 0x1F) << 6) |
                      (c2 & 0x3F))
-                if c < 0x80:
+                if c < 0x80:  # pragma: no cover
                     # Overlong encoding
-                    skips.extend([i, i + 1])
+                    skips.extend([i, i + 1])  # pragma: no cover
                 i += 2
                 continue
             c3 = bytes[i + 2]
@@ -70,7 +70,7 @@ def _invalid_utf8_indexes(bytes):
                          (c2 & 0x3F)) << 6) |
                        (c3 & 0x3F)) << 6) |
                      (c4 & 0x3F))
-                if (c < 0x10000) or (c > 0x10FFFF):
+                if (c < 0x10000) or (c > 0x10FFFF):  # pragma: no cover
                     # Overlong encoding or invalid code point.
                     skips.extend([i, i + 1, i + 2, i + 3])
                 i += 4

From 7b01a9712a3a734c7ebd251161f756371ac12267 Mon Sep 17 00:00:00 2001
From: Dan Ryan <dan@danryan.co>
Date: Thu, 4 Apr 2019 00:37:05 -0400
Subject: [PATCH 2/8] Fix fsencode and fsdecode backports

- Mirrors the new python 3.7 implementation
- Taken from `vistir` (my other library) -> discussion over at
  https://github.com/sarugaku/vistir/pull/54
- Fixes #13
- Fixes #6 (I think?)

Signed-off-by: Dan Ryan <dan@danryan.co>
---
 src/backports/os.py | 170 ++++++++++++++++++--------------------------
 1 file changed, 70 insertions(+), 100 deletions(-)

diff --git a/src/backports/os.py b/src/backports/os.py
index cbd9541..ca2d274 100644
--- a/src/backports/os.py
+++ b/src/backports/os.py
@@ -8,16 +8,35 @@
 """
 from __future__ import unicode_literals
 
+from os import name as os_name
 import sys
 
 # XXX backport: unicode on Python 2
 _str = unicode if sys.version_info < (3,) else str
+string_types = basestring if sys.version_info[0] == 2 else str
 
 # XXX backport: Use backported surrogateescape for Python 2
 # TODO backport: Find a way to do this without pulling in the entire future package?
 if sys.version_info < (3,):
     from future.utils.surrogateescape import register_surrogateescape
     register_surrogateescape()
+    _fs_encode_errors = "backslashreplace"
+    _fs_decode_errors = "replace"
+    _fs_encoding = "utf-8"
+else:
+    _fs_encoding = "utf-8"
+    if os_name == "nt":
+        _fs_error_fn = None
+        alt_strategy = "surrogatepass"
+    else:
+        if sys.version_info >= (3, 3):
+            _fs_encoding = next(iter(enc for enc in [
+                sys.getfilesystemencoding(), sys.getdefaultencoding()
+            ]), _fs_encoding)
+        alt_strategy = "surrogateescape"
+        _fs_error_fn = getattr(sys, "getfilesystemencodeerrors", None)
+    _fs_encode_errors = _fs_error_fn() if _fs_error_fn else alt_strategy
+    _fs_decode_errors = _fs_error_fn() if _fs_error_fn else alt_strategy
 
 
 # XXX backport: This invalid_utf8_indexes() helper is shamelessly copied from
@@ -92,103 +111,54 @@ def _chunks(b, indexes):
     yield b[i:]
 
 
-def _fscodec():
-    encoding = sys.getfilesystemencoding()
-    if encoding == 'mbcs':
-        errors = 'strict'
-    else:
-        errors = 'surrogateescape'
-
-    # XXX backport: Do we need to hack around Python 2's UTF-8 codec?
-    import codecs  # Use codecs.lookup() for name normalisation.
-    _HACK_AROUND_PY2_UTF8 = (sys.version_info < (3,) and
-                             codecs.lookup(encoding) == codecs.lookup('utf-8'))
-    # Do we need to hack around Python 2's ASCII codec error handler behaviour?
-    _HACK_AROUND_PY2_ASCII = (sys.version_info < (3,) and
-                              codecs.lookup(encoding) == codecs.lookup('ascii'))
-
-    # XXX backport: chr(octet) became bytes([octet])
-    _byte = chr if sys.version_info < (3,) else lambda i: bytes([i])
-
-    def fsencode(filename):
-        """
-        Encode filename to the filesystem encoding with 'surrogateescape' error
-        handler, return bytes unchanged. On Windows, use 'strict' error handler if
-        the file system encoding is 'mbcs' (which is the default encoding).
-        """
-        if isinstance(filename, bytes):
-            return filename
-        elif isinstance(filename, _str):
-            if _HACK_AROUND_PY2_UTF8 or _HACK_AROUND_PY2_ASCII:
-                # XXX backport: Unlike Python 3, Python 2's UTF-8 codec does not
-                # consider surrogate codepoints invalid, so the surrogateescape
-                # error handler never gets invoked to encode them back into high
-                # bytes.
-                #
-                # This code hacks around that by manually encoding the surrogate
-                # codepoints to high bytes, without relying on surrogateescape.
-                #
-                # As a *separate* issue to the above, Python2's ASCII codec has
-                # a different problem: it correctly invokes the surrogateescape
-                # error handler, but then seems to do additional strict
-                # validation (?) on the interim surrogate-decoded Unicode buffer
-                # returned by surrogateescape, and then fails with a
-                # UnicodeEncodeError anyway.
-                #
-                # The fix for that happens to be the same (manual encoding),
-                # even though the two causes are quite different.
-                #
-                return b''.join(
-                    (_byte(ord(c) - 0xDC00) if 0xDC00 <= ord(c) <= 0xDCFF else
-                     c.encode(encoding))
-                    for c in filename)
-            else:
-                return filename.encode(encoding, errors)
-        else:
-            # XXX backport: unicode instead of str for Python 2
-            raise TypeError("expect bytes or {_str}, not {}".format(type(filename).__name__,
-                                                                    _str=_str.__name__, ))
-
-    def fsdecode(filename):
-        """
-        Decode filename from the filesystem encoding with 'surrogateescape' error
-        handler, return str unchanged. On Windows, use 'strict' error handler if
-        the file system encoding is 'mbcs' (which is the default encoding).
-        """
-        if isinstance(filename, _str):
-            return filename
-        elif isinstance(filename, bytes):
-            if _HACK_AROUND_PY2_UTF8:
-                # XXX backport: See the remarks in fsencode() above.
-                #
-                # This case is slightly trickier: Python 2 will invoke the
-                # surrogateescape error handler for most bad high byte
-                # sequences, *except* for full UTF-8 sequences that happen to
-                # decode to surrogate codepoints.
-                #
-                # For decoding, it's not trivial to sidestep the UTF-8 codec
-                # only for surrogates like fsencode() does, but as a hack we can
-                # split the input into separate chunks around each invalid byte,
-                # decode the chunks separately, and join the results.
-                #
-                # This prevents Python 2's UTF-8 codec from seeing the encoded
-                # surrogate sequences as valid, which lets surrogateescape take
-                # over and escape the individual bytes.
-                #
-                # TODO: Improve this.
-                #
-                from array import array
-                indexes = _invalid_utf8_indexes(array(str('B'), filename))
-                return ''.join(chunk.decode(encoding, errors)
-                               for chunk in _chunks(filename, indexes))
-            else:
-                return filename.decode(encoding, errors)
-        else:
-            # XXX backport: unicode instead of str for Python 2
-            raise TypeError("expect bytes or {_str}, not {}".format(type(filename).__name__,
-                                                                    _str=_str.__name__, ))
-
-    return fsencode, fsdecode
-
-fsencode, fsdecode = _fscodec()
-del _fscodec
+def _get_path(path):
+    """
+    Fetch the string value from a path-like object
+
+    Returns **None** if there is no string value.
+    """
+
+    if isinstance(path, (string_types, bytes)):
+        return path
+    path_type = type(path)
+    try:
+        path_repr = path_type.__fspath__(path)
+    except AttributeError:
+        return
+    if isinstance(path_repr, (string_types, bytes)):
+        return path_repr
+    return
+
+
+def fsencode(path):
+    """
+    Encode a filesystem path to the proper filesystem encoding
+
+    :param Union[str, bytes] path: A string-like path
+    :returns: A bytes-encoded filesystem path representation
+    """
+
+    path = _get_path(path)
+    if path is None:
+        raise TypeError("expected a valid path to encode")
+    if isinstance(path, _str):
+        path = path.encode(_fs_encoding, _fs_encode_errors)
+    return path
+
+
+def fsdecode(path):
+    """
+    Decode a filesystem path using the proper filesystem encoding
+
+    :param path: The filesystem path to decode from bytes or string
+    :return: An appropriately decoded path
+    :rtype: str
+    """
+
+    path = _get_path(path)
+    if path is None:
+        raise TypeError("expected a valid path to decode")
+    binary_type = str if sys.version_info[0] == 2 else bytes
+    if isinstance(path, binary_type):
+        path = path.decode(_fs_encoding, _fs_decode_errors)
+    return path

From 46e3d6b25028f4f898c042776c872effe37207e7 Mon Sep 17 00:00:00 2001
From: Dan Ryan <dan@danryan.co>
Date: Thu, 4 Apr 2019 01:03:00 -0400
Subject: [PATCH 3/8] don't import os module

Signed-off-by: Dan Ryan <dan@danryan.co>
---
 src/backports/os.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/backports/os.py b/src/backports/os.py
index ca2d274..1403574 100644
--- a/src/backports/os.py
+++ b/src/backports/os.py
@@ -8,7 +8,6 @@
 """
 from __future__ import unicode_literals
 
-from os import name as os_name
 import sys
 
 # XXX backport: unicode on Python 2
@@ -25,7 +24,7 @@
     _fs_encoding = "utf-8"
 else:
     _fs_encoding = "utf-8"
-    if os_name == "nt":
+    if sys.platform.startswith("win"):
         _fs_error_fn = None
         alt_strategy = "surrogatepass"
     else:

From e2787ddaf47be43359c5fe8a3657bc5cd0bff9d4 Mon Sep 17 00:00:00 2001
From: Dan Ryan <dan@danryan.co>
Date: Thu, 4 Apr 2019 02:52:16 -0400
Subject: [PATCH 4/8] Add back the surrogate handling logic

Signed-off-by: Dan Ryan <dan@danryan.co>
---
 src/backports/os.py | 194 +++++++++++++++++++++++++++++++-------------
 tests/test_extra.py |   6 +-
 2 files changed, 141 insertions(+), 59 deletions(-)

diff --git a/src/backports/os.py b/src/backports/os.py
index 1403574..2af7679 100644
--- a/src/backports/os.py
+++ b/src/backports/os.py
@@ -8,35 +8,23 @@
 """
 from __future__ import unicode_literals
 
+import abc
 import sys
 
 # XXX backport: unicode on Python 2
 _str = unicode if sys.version_info < (3,) else str
+# XXX backport: string and binary types differ between python 2 and 3
 string_types = basestring if sys.version_info[0] == 2 else str
+binary_type = str if sys.version_info[0] == 2 else bytes
 
 # XXX backport: Use backported surrogateescape for Python 2
 # TODO backport: Find a way to do this without pulling in the entire future package?
 if sys.version_info < (3,):
     from future.utils.surrogateescape import register_surrogateescape
     register_surrogateescape()
-    _fs_encode_errors = "backslashreplace"
-    _fs_decode_errors = "replace"
-    _fs_encoding = "utf-8"
-else:
-    _fs_encoding = "utf-8"
-    if sys.platform.startswith("win"):
-        _fs_error_fn = None
-        alt_strategy = "surrogatepass"
-    else:
-        if sys.version_info >= (3, 3):
-            _fs_encoding = next(iter(enc for enc in [
-                sys.getfilesystemencoding(), sys.getdefaultencoding()
-            ]), _fs_encoding)
-        alt_strategy = "surrogateescape"
-        _fs_error_fn = getattr(sys, "getfilesystemencodeerrors", None)
-    _fs_encode_errors = _fs_error_fn() if _fs_error_fn else alt_strategy
-    _fs_decode_errors = _fs_error_fn() if _fs_error_fn else alt_strategy
 
+# XXX This is a compatibility shiim for the PathLike backport which gets us fspath access
+ABC = abc.ABCMeta(str('ABC'), (object,), {'__slots__': ()})
 
 # XXX backport: This invalid_utf8_indexes() helper is shamelessly copied from
 # Bob Ippolito's pyutf8 package (pyutf8/ref.py), in order to help support the
@@ -110,54 +98,148 @@ def _chunks(b, indexes):
     yield b[i:]
 
 
-def _get_path(path):
+def fspath(path):
     """
     Fetch the string value from a path-like object
 
     Returns **None** if there is no string value.
     """
 
-    if isinstance(path, (string_types, bytes)):
+    if isinstance(path, (string_types, binary_type)):
         return path
     path_type = type(path)
+    expect = "unicode" if sys.version_info[0] == 2 else "str"
     try:
         path_repr = path_type.__fspath__(path)
     except AttributeError:
-        return
-    if isinstance(path_repr, (string_types, bytes)):
+        if hasattr(path_type, '__fspath__'):
+            raise
+        else:
+            raise TypeError("expected {0}, bytes or os.PathLike object, "
+                            "not ".format(expect) + path_type.__name__)
+    if isinstance(path_repr, (string_types, binary_type)):
         return path_repr
-    return
-
-
-def fsencode(path):
-    """
-    Encode a filesystem path to the proper filesystem encoding
-
-    :param Union[str, bytes] path: A string-like path
-    :returns: A bytes-encoded filesystem path representation
-    """
-
-    path = _get_path(path)
-    if path is None:
-        raise TypeError("expected a valid path to encode")
-    if isinstance(path, _str):
-        path = path.encode(_fs_encoding, _fs_encode_errors)
-    return path
-
-
-def fsdecode(path):
-    """
-    Decode a filesystem path using the proper filesystem encoding
-
-    :param path: The filesystem path to decode from bytes or string
-    :return: An appropriately decoded path
-    :rtype: str
-    """
-
-    path = _get_path(path)
-    if path is None:
-        raise TypeError("expected a valid path to decode")
-    binary_type = str if sys.version_info[0] == 2 else bytes
-    if isinstance(path, binary_type):
-        path = path.decode(_fs_encoding, _fs_decode_errors)
-    return path
+    else:
+        raise TypeError("expected {}.__fspath__() to return {} or bytes, "
+                        "not {}".format(path_type.__name__, expect,
+                                        type(path_repr).__name__))
+
+
+def _fscodec():
+    # XXX Backport: The following section attempts to use utf-8 encoders to
+    # roundtrip to the filesystem, and also attempts to force windows to use
+    # a "surrogate pass" error handling strategy to ignore the bad surrogate
+    # pairs sometimes generated by python 2 encoders
+    if sys.version_info[0] < 3:
+        _fs_encode_errors = "surrogateescape"
+        _fs_decode_errors = "surrogateescape"
+        _fs_encoding = "utf-8"
+    else:
+        _fs_encoding = "utf-8"
+        if sys.platform.startswith("win"):
+            _fs_error_fn = None
+            alt_strategy = "surrogatepass"
+        else:
+            if sys.version_info >= (3, 3):
+                _fs_encoding = sys.getfilesystemencoding()
+                if not _fs_encoding:
+                    _fs_encoding = sys.getdefaultencoding()
+            alt_strategy = "surrogateescape"
+        _fs_error_fn = getattr(sys, "getfilesystemencodeerrors", None)
+        _fs_encode_errors = _fs_error_fn() if _fs_error_fn else alt_strategy
+        _fs_decode_errors = _fs_error_fn() if _fs_error_fn else alt_strategy
+
+    _byte = chr if sys.version_info < (3,) else lambda i: bytes([i])
+
+
+    def fsencode(filename):
+        """Encode filename (an os.PathLike, bytes, or str) to the filesystem
+        encoding with 'surrogateescape' error handler, return bytes unchanged.
+        On Windows, use 'strict' error handler if the file system encoding is
+        'mbcs' (which is the default encoding).
+        """
+        path = fspath(filename)
+        if isinstance(path, _str):
+            if sys.version_info[0] < 3:
+                # XXX backport: Unlike Python 3, Python 2's UTF-8 codec does not
+                # consider surrogate codepoints invalid, so the surrogateescape
+                # error handler never gets invoked to encode them back into high
+                # bytes.
+                #
+                # This code hacks around that by manually encoding the surrogate
+                # codepoints to high bytes, without relying on surrogateescape.
+                #
+                # As a *separate* issue to the above, Python2's ASCII codec has
+                # a different problem: it correctly invokes the surrogateescape
+                # error handler, but then seems to do additional strict
+                # validation (?) on the interim surrogate-decoded Unicode buffer
+                # returned by surrogateescape, and then fails with a
+                # UnicodeEncodeError anyway.
+                #
+                # The fix for that happens to be the same (manual encoding),
+                # even though the two causes are quite different.
+                #
+                return b''.join(
+                    (_byte(ord(c) - 0xDC00) if 0xDC00 <= ord(c) <= 0xDCFF else
+                     c.encode(_fs_encoding, _fs_encode_errors))
+                    for c in path)
+            return path.encode(_fs_encoding, _fs_encode_errors)
+        else:
+            return path
+
+    def fsdecode(filename):
+        """Decode filename (an os.PathLike, bytes, or str) from the filesystem
+        encoding with 'surrogateescape' error handler, return str unchanged. On
+        Windows, use 'strict' error handler if the file system encoding is
+        'mbcs' (which is the default encoding).
+        """
+        path = fspath(filename)
+        if isinstance(path, bytes):
+            if sys.version_info[0] < 3:
+                # XXX backport: See the remarks in fsencode() above.
+                #
+                # This case is slightly trickier: Python 2 will invoke the
+                # surrogateescape error handler for most bad high byte
+                # sequences, *except* for full UTF-8 sequences that happen to
+                # decode to surrogate codepoints.
+                #
+                # For decoding, it's not trivial to sidestep the UTF-8 codec
+                # only for surrogates like fsencode() does, but as a hack we can
+                # split the input into separate chunks around each invalid byte,
+                # decode the chunks separately, and join the results.
+                #
+                # This prevents Python 2's UTF-8 codec from seeing the encoded
+                # surrogate sequences as valid, which lets surrogateescape take
+                # over and escape the individual bytes.
+                #
+                # TODO: Improve this.
+                #
+                from array import array
+                indexes = _invalid_utf8_indexes(array(str('B'), filename))
+                return ''.join(chunk.decode(_fs_encoding, _fs_decode_errors)
+                               for chunk in _chunks(filename, indexes))
+            return path.decode(_fs_encoding, _fs_decode_errors)
+        else:
+            return path
+
+    return fsencode, fsdecode
+
+
+fsencode, fsdecode = _fscodec()
+del _fscodec
+
+
+# If there is no C implementation, make the pure Python version the
+# implementation as transparently as possible.
+class PathLike(ABC):
+
+    """Abstract base class for implementing the file system path protocol."""
+
+    @abc.abstractmethod
+    def __fspath__(self):
+        """Return the file system path representation of the object."""
+        raise NotImplementedError
+
+    @classmethod
+    def __subclasshook__(cls, subclass):
+        return hasattr(subclass, '__fspath__')
diff --git a/tests/test_extra.py b/tests/test_extra.py
index 5fe0f59..6490522 100644
--- a/tests/test_extra.py
+++ b/tests/test_extra.py
@@ -87,9 +87,9 @@ def assertTypeError(value, expected_message):
                 with self.assertRaises(TypeError) as cm:
                     f(value)
                 self.assertEqual(str(cm.exception), expected_message)
-
-        pre = 'expect bytes or {}, not '.format(
-            'unicode' if sys.version_info < (3,) else 'str')
+        pre = 'expected {0}, bytes or os.PathLike object, not '.format(
+            'unicode' if sys.version_info < (3,) else 'str'
+        )
         assertTypeError(None, pre + 'NoneType')
         assertTypeError(5, pre + 'int')
         assertTypeError([], pre + 'list')

From 78a8d7c00ec5d5ebb5cae48857d4007340f88e4d Mon Sep 17 00:00:00 2001
From: Dan Ryan <dan@danryan.co>
Date: Thu, 4 Apr 2019 03:21:09 -0400
Subject: [PATCH 5/8] Use surrogateescape or surrogatepass always on python3

Signed-off-by: Dan Ryan <dan@danryan.co>
---
 src/backports/os.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/backports/os.py b/src/backports/os.py
index 2af7679..d202a20 100644
--- a/src/backports/os.py
+++ b/src/backports/os.py
@@ -138,14 +138,14 @@ def _fscodec():
         _fs_encoding = "utf-8"
         if sys.platform.startswith("win"):
             _fs_error_fn = None
-            alt_strategy = "surrogatepass"
+            alt_strategy = "surrogatepass" if sys.version_info >= (3, 5) else "surrogateeescape"
         else:
             if sys.version_info >= (3, 3):
                 _fs_encoding = sys.getfilesystemencoding()
                 if not _fs_encoding:
                     _fs_encoding = sys.getdefaultencoding()
             alt_strategy = "surrogateescape"
-        _fs_error_fn = getattr(sys, "getfilesystemencodeerrors", None)
+            _fs_error_fn = getattr(sys, "getfilesystemencodeerrors", None)
         _fs_encode_errors = _fs_error_fn() if _fs_error_fn else alt_strategy
         _fs_decode_errors = _fs_error_fn() if _fs_error_fn else alt_strategy
 

From 7db494afd64ff5823c8fe313b479c72feff65e1c Mon Sep 17 00:00:00 2001
From: Dan Ryan <dan@danryan.co>
Date: Thu, 4 Apr 2019 03:44:05 -0400
Subject: [PATCH 6/8] Fall back to surrogateescape handler

Signed-off-by: Dan Ryan <dan@danryan.co>
---
 src/backports/os.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/backports/os.py b/src/backports/os.py
index d202a20..6601312 100644
--- a/src/backports/os.py
+++ b/src/backports/os.py
@@ -138,7 +138,7 @@ def _fscodec():
         _fs_encoding = "utf-8"
         if sys.platform.startswith("win"):
             _fs_error_fn = None
-            alt_strategy = "surrogatepass" if sys.version_info >= (3, 5) else "surrogateeescape"
+            alt_strategy = "surrogatepass"
         else:
             if sys.version_info >= (3, 3):
                 _fs_encoding = sys.getfilesystemencoding()
@@ -218,7 +218,13 @@ def fsdecode(filename):
                 indexes = _invalid_utf8_indexes(array(str('B'), filename))
                 return ''.join(chunk.decode(_fs_encoding, _fs_decode_errors)
                                for chunk in _chunks(filename, indexes))
-            return path.decode(_fs_encoding, _fs_decode_errors)
+            try:
+                return path.decode(_fs_encoding, _fs_decode_errors)
+            except UnicodeDecodeError:
+                if _fs_decode_errors == "surrogatepass":
+                    return path.decode(_fs_encoding, "surrogateescape")
+                else:
+                    raise
         else:
             return path
 

From 623bf1db3e2567c53a2c8f5c4a2466d0873bd8f7 Mon Sep 17 00:00:00 2001
From: Dan Ryan <dan@danryan.co>
Date: Tue, 9 Apr 2019 01:21:24 -0400
Subject: [PATCH 7/8] Fix fsencode and fsdecode tests for windows

Signed-off-by: Dan Ryan <dan@danryan.co>
---
 src/backports/os.py |  8 +----
 tests/test_extra.py | 79 ++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 76 insertions(+), 11 deletions(-)

diff --git a/src/backports/os.py b/src/backports/os.py
index 6601312..7f1c2eb 100644
--- a/src/backports/os.py
+++ b/src/backports/os.py
@@ -218,13 +218,7 @@ def fsdecode(filename):
                 indexes = _invalid_utf8_indexes(array(str('B'), filename))
                 return ''.join(chunk.decode(_fs_encoding, _fs_decode_errors)
                                for chunk in _chunks(filename, indexes))
-            try:
-                return path.decode(_fs_encoding, _fs_decode_errors)
-            except UnicodeDecodeError:
-                if _fs_decode_errors == "surrogatepass":
-                    return path.decode(_fs_encoding, "surrogateescape")
-                else:
-                    raise
+            return path.decode(_fs_encoding, _fs_decode_errors)
         else:
             return path
 
diff --git a/tests/test_extra.py b/tests/test_extra.py
index 6490522..54bd936 100644
--- a/tests/test_extra.py
+++ b/tests/test_extra.py
@@ -12,12 +12,16 @@
 from backports import os
 
 import unittest
-from hypothesis import given, example
+from hypothesis import assume, given, example
 from hypothesis.strategies import text, binary
 
+# SKIP_CONDITIONS:
+IS_WIN = sys.platform.startswith("win")
+IS_PY3 = sys.version_info[0] == 3
+
 # Example data:
 
-HIGH_BYTES = (
+SURROGATE_ESCAPE_HIGH_BYTES = (
     b'\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f'
     b'\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f'
     b'\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf'
@@ -39,6 +43,41 @@
     '\udcf0\udcf1\udcf2\udcf3\udcf4\udcf5\udcf6\udcf7\udcf8\udcf9\udcfa\udcfb\udcfc\udcfd\udcfe\udcff'
 )
 
+SURROGATE_PASS_HIGH_BYTES = (
+    b'\xed\xb2\x80\xed\xb2\x81\xed\xb2\x82\xed\xb2\x83\xed\xb2\x84\xed'
+    b'\xb2\x85\xed\xb2\x86\xed\xb2\x87\xed\xb2\x88\xed\xb2\x89\xed\xb2'
+    b'\x8a\xed\xb2\x8b\xed\xb2\x8c\xed\xb2\x8d\xed\xb2\x8e\xed\xb2\x8f'
+    b'\xed\xb2\x90\xed\xb2\x91\xed\xb2\x92\xed\xb2\x93\xed\xb2\x94\xed'
+    b'\xb2\x95\xed\xb2\x96\xed\xb2\x97\xed\xb2\x98\xed\xb2\x99\xed\xb2'
+    b'\x9a\xed\xb2\x9b\xed\xb2\x9c\xed\xb2\x9d\xed\xb2\x9e\xed\xb2\x9f'
+    b'\xed\xb2\xa0\xed\xb2\xa1\xed\xb2\xa2\xed\xb2\xa3\xed\xb2\xa4\xed'
+    b'\xb2\xa5\xed\xb2\xa6\xed\xb2\xa7\xed\xb2\xa8\xed\xb2\xa9\xed\xb2'
+    b'\xaa\xed\xb2\xab\xed\xb2\xac\xed\xb2\xad\xed\xb2\xae\xed\xb2\xaf'
+    b'\xed\xb2\xb0\xed\xb2\xb1\xed\xb2\xb2\xed\xb2\xb3\xed\xb2\xb4\xed'
+    b'\xb2\xb5\xed\xb2\xb6\xed\xb2\xb7\xed\xb2\xb8\xed\xb2\xb9\xed\xb2'
+    b'\xba\xed\xb2\xbb\xed\xb2\xbc\xed\xb2\xbd\xed\xb2\xbe\xed\xb2\xbf'
+    b'\xed\xb3\x80\xed\xb3\x81\xed\xb3\x82\xed\xb3\x83\xed\xb3\x84\xed'
+    b'\xb3\x85\xed\xb3\x86\xed\xb3\x87\xed\xb3\x88\xed\xb3\x89\xed\xb3'
+    b'\x8a\xed\xb3\x8b\xed\xb3\x8c\xed\xb3\x8d\xed\xb3\x8e\xed\xb3\x8f'
+    b'\xed\xb3\x90\xed\xb3\x91\xed\xb3\x92\xed\xb3\x93\xed\xb3\x94\xed'
+    b'\xb3\x95\xed\xb3\x96\xed\xb3\x97\xed\xb3\x98\xed\xb3\x99\xed\xb3'
+    b'\x9a\xed\xb3\x9b\xed\xb3\x9c\xed\xb3\x9d\xed\xb3\x9e\xed\xb3\x9f'
+    b'\xed\xb3\xa0\xed\xb3\xa1\xed\xb3\xa2\xed\xb3\xa3\xed\xb3\xa4\xed'
+    b'\xb3\xa5\xed\xb3\xa6\xed\xb3\xa7\xed\xb3\xa8\xed\xb3\xa9\xed\xb3'
+    b'\xaa\xed\xb3\xab\xed\xb3\xac\xed\xb3\xad\xed\xb3\xae\xed\xb3\xaf'
+    b'\xed\xb3\xb0\xed\xb3\xb1\xed\xb3\xb2\xed\xb3\xb3\xed\xb3\xb4\xed'
+    b'\xb3\xb5\xed\xb3\xb6\xed\xb3\xb7\xed\xb3\xb8\xed\xb3\xb9\xed\xb3'
+    b'\xba\xed\xb3\xbb\xed\xb3\xbc\xed\xb3\xbd\xed\xb3\xbe\xed\xb3\xbf'
+)
+
+
+# Use surrogate pass for encoding on windows on python 3+ to ensure
+# we can decode them as the native decoder uses surrogate escape
+if IS_WIN and IS_PY3:
+    HIGH_BYTES = SURROGATE_PASS_HIGH_BYTES
+else:
+    HIGH_BYTES = SURROGATE_ESCAPE_HIGH_BYTES
+
 # A U+DC80 surrogate encoded as (invalid) UTF-8.
 #
 # Python 3 correctly rejects this when encoding to or from UTF-8, but
@@ -79,7 +118,29 @@ def test_text_roundtrip(self, s):
     @example(HIGH_BYTES)
     @example(UTF8_ENCODED_SURROGATE)
     def test_binary_roundtrip(self, b):
-        self.assertEqual(os.fsencode(os.fsdecode(b)), b)
+        # in python 3 on windows, the native implementation of os.fsdecode
+        # always relies on `surrogatepass` as the error handler, which means
+        # it will fail on surrogates (which are not unicode compatible)
+        # so if we fail to decode something under those circumstances we should
+        # verify that the native implementation also fails.
+        rt1 = None
+        try:
+            rt1 = os.fsdecode(b)
+        except Exception as e:
+            if IS_WIN and IS_PY3:
+                self.assertRaises(type(e), real_os.fsdecode, b)
+            else:
+                raise
+        else:
+            try:
+                roundtripped = os.fsencode(rt1)
+            except Exception as e:
+                if IS_WIN and IS_PY3:
+                    self.assertRaises(type(e), real_os.fsencode, rt1)
+                else:
+                    raise
+            else:
+                self.assertEqual(roundtripped, b)
 
     def test_TypeError(self):
         def assertTypeError(value, expected_message):
@@ -111,7 +172,17 @@ def test_encode_text(self, s):
     @example(HIGH_BYTES)
     @example(UTF8_ENCODED_SURROGATE)
     def test_decode_binary(self, b):
-        self.assertEqual(os.fsdecode(b), real_os.fsdecode(b))
+        # Python 3 on windows will never be able to decode things
+        # in the backported library that it can't also decode
+        # in the original OS module implementation, so lets just catch
+        # the exceptions thrown by the os module and expect them
+        # to be raised by the backport
+        try:
+            real_os_val = real_os.fsdecode(b)
+        except Exception as e:
+            self.assertRaises(type(e), os.fsdecode, b)
+        else:
+            self.assertEqual(os.fsdecode(b), real_os_val)
 
     @given(binary())
     @example(HIGH_BYTES)

From a69d7103b4ef424fbc74b506aac287b54e0da37e Mon Sep 17 00:00:00 2001
From: Dan Ryan <dan@danryan.co>
Date: Tue, 9 Apr 2019 01:59:44 -0400
Subject: [PATCH 8/8] Add test skips for python 3.5 and below on windows

Signed-off-by: Dan Ryan <dan@danryan.co>
---
 tests/test_extra.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tests/test_extra.py b/tests/test_extra.py
index 54bd936..901df59 100644
--- a/tests/test_extra.py
+++ b/tests/test_extra.py
@@ -114,6 +114,10 @@ def test_decode_surrogates(self):
     def test_text_roundtrip(self, s):
         self.assertEqual(os.fsdecode(os.fsencode(s)), s)
 
+    @unittest.skipIf(
+        IS_PY3 and sys.version_info[:2] <= (3, 5) and IS_WIN,
+        "Backport doesn't align with native implementation on win on or before python 3.5"
+    )
     @given(binary())
     @example(HIGH_BYTES)
     @example(UTF8_ENCODED_SURROGATE)
@@ -145,6 +149,7 @@ def test_binary_roundtrip(self, b):
     def test_TypeError(self):
         def assertTypeError(value, expected_message):
             for f in [os.fsencode, os.fsdecode]:
+
                 with self.assertRaises(TypeError) as cm:
                     f(value)
                 self.assertEqual(str(cm.exception), expected_message)
@@ -163,11 +168,19 @@ class TestAgainstPython3(unittest.TestCase):
     On Python 3, the backported implementations should match the standard library.
     """
 
+    @unittest.skipIf(
+        IS_PY3 and sys.version_info[:2] <= (3, 5) and IS_WIN,
+        "Backport doesn't align with native implementation on win on or before python 3.5"
+    )
     @given(encodable_text())
     @example(HIGH_SURROGATES)
     def test_encode_text(self, s):
         self.assertEqual(os.fsencode(s), real_os.fsencode(s))
 
+    @unittest.skipIf(
+        IS_PY3 and sys.version_info[:2] <= (3, 5) and IS_WIN,
+        "Backport doesn't align with native implementation on win on or before python 3.5"
+    )
     @given(binary())
     @example(HIGH_BYTES)
     @example(UTF8_ENCODED_SURROGATE)