diff --git a/lib/pyld/iri_resolver.py b/lib/pyld/iri_resolver.py new file mode 100644 index 00000000..fa5722d8 --- /dev/null +++ b/lib/pyld/iri_resolver.py @@ -0,0 +1,207 @@ +""" +The functions 'remove_dot_segments()', 'resolve()' and 'is_character_allowed_after_relative_path_segment()' are direct ports from [relative-to-absolute-iri.js](https://github.com/rubensworks/relative-to-absolute-iri.js) +""" + +def is_character_allowed_after_relative_path_segment(ch: str) -> bool: + """Return True if a character is valid after '.' or '..' in a path segment.""" + return not ch or ch in ('#', '?', '/') + + +def remove_dot_segments(path: str) -> str: + """ + Removes dot segments ('.' and '..') from a URL path, + as described in https://www.ietf.org/rfc/rfc3986.txt (page 32). + + :param path: the IRI path to remove dot segments from. + + :return: a path with normalized dot segments, will always start with a '/'. + """ + segment_buffers = [] + i = 0 + length = len(path) + + while i < length: + ch = path[i] + + if ch == '/': + # Handle '/.' or '/..' + if i + 1 < length and path[i + 1] == '.': + # Handle '/..' + if i + 2 < length and path[i + 2] == '.': + next_ch = path[i + 3] if i + 3 < length else '' + if not is_character_allowed_after_relative_path_segment(next_ch): + segment_buffers.append([]) + i += 1 + continue + + # Go to parent directory + if segment_buffers: + segment_buffers.pop() + + # Add trailing slash segment if ends with '/..' + if i + 3 >= length: + segment_buffers.append([]) + + i += 3 + continue + + # Handle '/.' + next_ch = path[i + 2] if i + 2 < length else '' + if not is_character_allowed_after_relative_path_segment(next_ch): + segment_buffers.append([]) + i += 1 + continue + + # Add trailing slash if ends with '/.' + if i + 2 >= length: + segment_buffers.append([]) + + # Stay in current directory โ€” skip + i += 2 + continue + + # Regular '/' starts a new segment + segment_buffers.append([]) + i += 1 + continue + + elif ch in ('#', '?'): + # Query or fragment โ†’ append unchanged and stop + if not segment_buffers: + segment_buffers.append([]) + segment_buffers[-1].append(path[i:]) + + # Break the while loop + break + + else: + # Regular character โ†’ append to current segment + if not segment_buffers: + segment_buffers.append([]) + segment_buffers[-1].append(ch) + i += 1 + + return '/' + '/'.join(''.join(buffer) for buffer in segment_buffers) + + +def remove_dot_segments_of_path(iri: str, colon_position: int) -> str: + """ + Remove dot segments from the path portion of an IRI (RFC 3986 ยง5.2.4). + + :param iri: an IRI (or part of IRI). + :param colonPosition: the position of the first ':' in the IRI. + + :return: the IRI where dot segments were removed. + """ + # Determine where to start looking for the first '/' that indicates the start of the path + if colon_position >= 0: + if len(iri) > colon_position + 2 and iri[colon_position + 1] == '/' and iri[colon_position + 2] == '/': + search_offset = colon_position + 3 + else: + search_offset = colon_position + 1 + else: + if len(iri) > 1 and iri[0] == '/' and iri[1] == '/': + search_offset = 2 + else: + search_offset = 0 + + # Find the start of the path + path_separator = iri.find('/', search_offset) + if path_separator < 0: + return iri + + base = iri[:path_separator] + path = iri[path_separator:] + + # Remove dot segments from the path + return base + remove_dot_segments(path) + +def resolve(relative_iri: str, base_iri: str = "") -> str: + # """ + # Resolves a given relative IRI to an absolute IRI. + + # :param base_iri: the base IRI. + # :param relative_iri: the relative IRI. + + # :return: the absolute IRI. + # """ + + base_fragment_pos = base_iri.find("#") + + # Ignore any fragments in the base IRI + if base_fragment_pos > 0: + base_iri = base_iri[:base_fragment_pos] + + # Convert empty value directly to base IRI + if not relative_iri: + if ":" not in base_iri: + raise ValueError(f"Found invalid baseIRI '{base_iri}' for value '{relative_iri}'") + return base_iri + + # If the value starts with a query character, concat directly (strip existing query) + if relative_iri.startswith("?"): + base_query_pos = base_iri.find("?") + if base_query_pos > 0: + base_iri = base_iri[:base_query_pos] + return base_iri + relative_iri + + # If the value starts with a fragment character, concat directly + if relative_iri.startswith("#"): + return base_iri + relative_iri + + # Ignore baseIRI if it is empty + if not base_iri: + relative_colon_pos = relative_iri.find(":") + if relative_colon_pos < 0: + raise ValueError(f"Found invalid relative IRI '{relative_iri}' for a missing baseIRI") + return remove_dot_segments_of_path(relative_iri, relative_colon_pos) + + # Ignore baseIRI if the value is absolute + value_colon_pos = relative_iri.find(":") + if value_colon_pos >= 0: + return remove_dot_segments_of_path(relative_iri, value_colon_pos) + + # baseIRI must be absolute + base_colon_pos = base_iri.find(":") + if base_colon_pos < 0: + raise ValueError(f"Found invalid baseIRI '{base_iri}' for value '{relative_iri}'") + + base_scheme = base_iri[:base_colon_pos + 1] + + # Inherit base scheme if relative starts with '//' + if relative_iri.startswith("//"): + return base_scheme + remove_dot_segments_of_path(relative_iri, value_colon_pos) + + # Determine where the path of base starts + if base_iri.find("//", base_colon_pos) == base_colon_pos + 1: + base_slash_after_colon_pos = base_iri.find("/", base_colon_pos + 3) + if base_slash_after_colon_pos < 0: + if len(base_iri) > base_colon_pos + 3: + return base_iri + "/" + remove_dot_segments_of_path(relative_iri, value_colon_pos) + else: + return base_scheme + remove_dot_segments_of_path(relative_iri, value_colon_pos) + else: + base_slash_after_colon_pos = base_iri.find("/", base_colon_pos + 1) + if base_slash_after_colon_pos < 0: + return base_scheme + remove_dot_segments_of_path(relative_iri, value_colon_pos) + + # If relative starts with '/', append after base authority + if relative_iri.startswith("/"): + return base_iri[:base_slash_after_colon_pos] + remove_dot_segments(relative_iri) + + base_path = base_iri[base_slash_after_colon_pos:] + last_slash = base_path.rfind("/") + + # Ignore everything after last '/' in base path + if last_slash >= 0 and last_slash < len(base_path) - 1: + base_path = base_path[:last_slash + 1] + if (relative_iri.startswith(".") and + not relative_iri.startswith("..") and + not relative_iri.startswith("./") and + len(relative_iri) > 2): + relative_iri = relative_iri[1:] + + relative_iri = base_path + relative_iri + relative_iri = remove_dot_segments(relative_iri) + + return base_iri[:base_slash_after_colon_pos] + relative_iri \ No newline at end of file diff --git a/tests/test_iri_resolver.py b/tests/test_iri_resolver.py new file mode 100644 index 00000000..2a95ccf3 --- /dev/null +++ b/tests/test_iri_resolver.py @@ -0,0 +1,401 @@ +import pytest +from pyld.iri_resolver import resolve, remove_dot_segments + +# Tests ported from relative-to-absolute-iri.js: https://github.com/rubensworks/relative-to-absolute-iri.js/blob/master/test/Resolve-test.ts + +# ---------- Tests for resolve() ---------- +class TestResolve: + def test_absolute_iri_no_base(self): + assert resolve('http://example.org/') == 'http://example.org/' + + def test_absolute_iri_empty_base(self): + assert resolve('http://example.org/', '') == 'http://example.org/' + + def test_absolute_iri_with_base(self): + assert resolve('http://example.org/', 'http://base.org/') == 'http://example.org/' + + def test_empty_value_uses_base(self): + assert resolve('', 'http://base.org/') == 'http://base.org/' + + def test_relative_with_scheme_no_base(self): + assert resolve('ex:abc') == 'ex:abc' + + def test_relative_without_scheme_no_base_error(self): + with pytest.raises(ValueError, match=r"Found invalid relative IRI 'abc' for a missing baseIRI"): + resolve('abc') + + def test_relative_without_dot_segments_no_base(self): + assert resolve('http://abc/../../') == 'http://abc/' + + def test_relative_with_base(self): + assert resolve('abc', 'http://base.org/') == 'http://base.org/abc' + + def test_relative_with_fragment_base(self): + assert resolve('abc', 'http://base.org/#frag') == 'http://base.org/abc' + + def test_hash_relative(self): + assert resolve('#abc', 'http://base.org/') == 'http://base.org/#abc' + + def test_colon_in_value_ignores_base(self): + assert resolve('http:abc', 'http://base.org/') == 'http:abc' + + def test_colon_in_value_removes_dots(self): + assert resolve('http://abc/../../', 'http://base.org/') == 'http://abc/' + + def test_non_absolute_base_error(self): + with pytest.raises(ValueError, match=r"Found invalid baseIRI 'def' for value 'abc'"): + resolve('abc', 'def') + + def test_non_absolute_base_empty_value_error(self): + with pytest.raises(ValueError, match=r"Found invalid baseIRI 'def' for value ''"): + resolve('', 'def') + + def test_scheme_from_base_if_value_starts_with_slash_slash(self): + assert resolve('//abc', 'http://base.org/') == 'http://abc' + + def test_base_without_path_slash(self): + assert resolve('abc', 'http://base.org') == 'http://base.org/abc' + + def test_base_without_path_dot_segments(self): + assert resolve('abc/./', 'http://base.org') == 'http://base.org/abc/' + + def test_base_only_scheme_slash_slash(self): + assert resolve('abc', 'http://') == 'http:abc' + + def test_base_only_scheme_slash_slash_dot_segments(self): + assert resolve('abc/./', 'http://') == 'http:abc/' + + def test_base_with_char_after_colon(self): + assert resolve('abc', 'http:a') == 'http:abc' + + def test_base_with_char_after_colon_dot_segments(self): + assert resolve('abc/./', 'http:a') == 'http:abc/' + + def test_base_only_scheme(self): + assert resolve('abc', 'http:') == 'http:abc' + + def test_base_only_scheme_dot_segments(self): + assert resolve('abc/./', 'http:') == 'http:abc/' + + def test_absolute_path_ignores_base_path(self): + assert resolve('/abc/def/', 'http://base.org/123/456/') == 'http://base.org/abc/def/' + + def test_base_with_last_slash_replacement(self): + assert resolve('xyz', 'http://aa/a') == 'http://aa/xyz' + + def test_base_collapse_parent_paths(self): + assert resolve('xyz', 'http://aa/parent/parent/../../a') == 'http://aa/xyz' + + def test_base_remove_current_dir(self): + assert resolve('xyz', 'http://aa/././a') == 'http://aa/xyz' + + def test_base_dot(self): + assert resolve('.', 'http://aa/') == 'http://aa/' + + def test_base_double_dot(self): + assert resolve('..', 'http://aa/b/') == 'http://aa/' + + def test_base_double_dot_slash(self): + assert resolve('../', 'http://aa/b/') == 'http://aa/' + + def test_base_without_ending_slash_double_dot(self): + assert resolve('..', 'http://aa/b') == 'http://aa/' + + def test_base_without_ending_slash_double_dot_slash(self): + assert resolve('../', 'http://aa/b') == 'http://aa/' + + def test_base_without_ending_slash_query(self): + assert resolve('?a=b', 'http://abc/def/ghi') == 'http://abc/def/ghi?a=b' + + def test_base_without_ending_slash_dot_query(self): + assert resolve('.?a=b', 'http://abc/def/ghi') == 'http://abc/def/?a=b' + + def test_base_without_ending_slash_double_dot_query(self): + assert resolve('..?a=b', 'http://abc/def/ghi') == 'http://abc/?a=b' + + def test_base_without_ending_slash_xyz(self): + assert resolve('xyz', 'http://abc/d:f/ghi') == 'http://abc/d:f/xyz' + + def test_base_without_ending_slash_dot_xyz(self): + assert resolve('./xyz', 'http://abc/d:f/ghi') == 'http://abc/d:f/xyz' + + def test_base_without_ending_slash_double_dot_xyz(self): + assert resolve('../xyz', 'http://abc/d:f/ghi') == 'http://abc/xyz' + + def test_relative_with_colon_ignores_base(self): + assert resolve('g:h', 'file:///a/bb/ccc/d;p?q') == 'g:h' + + def test_simple_relative_with_complex_base(self): + assert resolve('g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g' + + def test_dot_slash_g_relative_with_complex_base(self): + assert resolve('./g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g' + + def test_slash_suffix_relative_with_complex_base(self): + assert resolve('g/', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g/' + + def test_slash_prefix_relative_with_complex_base(self): + assert resolve('/g', 'file:///a/bb/ccc/d;p?q') == 'file:///g' + + def test_double_slash_prefix_relative_with_complex_base(self): + assert resolve('//g', 'file:///a/bb/ccc/d;p?q') == 'file://g' + + def test_questionmark_prefix_relative_with_complex_base(self): + assert resolve('?y', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/d;p?y' + + def test_questionmark_middle_relative_with_complex_base(self): + assert resolve('g?y', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g?y' + + def test_hashtag_prefix_relative_with_complex_base(self): + assert resolve('#s', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/d;p?q#s' + + def test_middle_hashtag_relative_with_complex_base(self): + assert resolve('g#s', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g#s' + + def test_middle_questionmark_and_hashtag_relative_with_complex_base(self): + assert resolve('g?y#s', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g?y#s' + + def test_semicolon_prefix_relative_with_complex_base(self): + assert resolve(';x', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/;x' + + def test_middle_semicolon_relative_with_complex_base(self): + assert resolve('g;x', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g;x' + + def test_semicolon_questionmark_and_hashtag_relative_with_complex_base(self): + assert resolve('g;x?y#s', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g;x?y#s' + + def test_empty_relative_with_complex_base(self): + assert resolve('', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/d;p?q' + + def test_dot_relative_with_complex_base(self): + assert resolve('.', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/' + + def test_dot_slash_relative_with_complex_base(self): + assert resolve('./', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/' + + def test_double_dot_relative_with_complex_base(self): + assert resolve('..', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/' + + def test_double_dot_slash_relative_with_complex_base(self): + assert resolve('../', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/' + + def test_double_dot_slash_g_relative_with_complex_base(self): + assert resolve('../g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/g' + + def test_double_dot_slash_double_dot_relative_with_complex_base(self): + assert resolve('../..', 'file:///a/bb/ccc/d;p?q') == 'file:///a/' + + def test_2x_dot_slash_double_dot_slash_relative_with_complex_base(self): + assert resolve('../../', 'file:///a/bb/ccc/d;p?q') == 'file:///a/' + + def test_2x_double_dot_slash_with_g_relative_with_complex_base(self): + assert resolve('../../g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/g' + + def test_2x_double_dot_slash_with_double_dot_relative_with_complex_base(self): + assert resolve('../../..', 'file:///a/bb/ccc/d;p?q') == 'file:///' + + def test_3x_double_dot_slash_relative_with_complex_base(self): + assert resolve('../../../', 'file:///a/bb/ccc/d;p?q') == 'file:///' + + def test_3x_double_dot_slash_with_g_relative_with_complex_base(self): + assert resolve('../../../g', 'file:///a/bb/ccc/d;p?q') == 'file:///g' + + def test_4x_double_dot_slash_with_g_relative_with_complex_base(self): + assert resolve('../../../../g', 'file:///a/bb/ccc/d;p?q') == 'file:///g' + + def test_slash_dot_slash_g_relative_with_complex_base(self): + assert resolve('/./g', 'file:///a/bb/ccc/d;p?q') == 'file:///g' + + def test_slash_double_dot_slash_g_relative_with_complex_base(self): + assert resolve('/../g', 'file:///a/bb/ccc/d;p?q') == 'file:///g' + + def test_dot_suffix_relative_with_complex_base(self): + assert resolve('g.', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g.' + + def test_dot_prefix_relative_with_complex_base(self): + assert resolve('.g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/.g' + + def test_double_dot_suffix_relative_with_complex_base(self): + assert resolve('g..', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g..' + + def test_double_dot_prefix_relative_with_complex_base(self): + assert resolve('..g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/..g' + + def test_dot_slash_double_dot_slash_g_relative_with_complex_base(self): + assert resolve('./../g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/g' + + def test_dot_slash_g_slash_dot_relative_with_complex_base(self): + assert resolve('./g/.', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g/' + + def test_g_slash_dot_slash_h_relative_with_complex_base(self): + assert resolve('g/./h', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g/h' + + def test_g_slash_double_dot_slash_h_relative_with_complex_base(self): + assert resolve('g/../h', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/h' + + def test_g_semicolon_x_equals_1_slash_dot_slash_y_relative_with_complex_base(self): + assert resolve('g;x=1/./y', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g;x=1/y' + + def test_g_semicolon_x_equals_1_slash_double_dot_slash_y_relative_with_complex_base(self): + assert resolve('g;x=1/../y', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/y' + + def test_g_questionmark_y_slash_dot_slash_x_relative_with_complex_base(self): + assert resolve('g?y/./x', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g?y/./x' + + def test_g_questionmark_y_slash_double_dot_slash_x_relative_with_complex_base(self): + assert resolve('g?y/../x', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g?y/../x' + + def test_g_hash_s_slash_dot_slash_x_relative_with_complex_base(self): + assert resolve('g#s/./x', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g#s/./x' + + def test_g_hash_s_slash_double_dot_slash_x_relative_with_complex_base(self): + assert resolve('g#s/../x', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g#s/../x' + + def test_http_colon_g_relative_with_complex_base(self): + assert resolve('http:g', 'file:///a/bb/ccc/d;p?q') == 'http:g' + + def test_complex_relative_with_complex_base(self): + assert resolve('//example.org/.././useless/../../scheme-relative', 'http://example.com/some/deep/directory/and/file#with-a-fragment') == 'http://example.org/scheme-relative' + + def test_relative_with_complex_base_without_double_slash_after_scheme(self): + assert resolve('a', 'tag:example') == 'tag:a' + + def test_relative_with_complex_base_without_double_slash_after_scheme_with_one_slash(self): + assert resolve('a', 'tag:example/foo') == 'tag:example/a' + + def test_relative_a_with_base_without_double_slash_after_scheme_with_two_slash(self): + assert resolve('a', 'tag:example/foo/') == 'tag:example/foo/a' + + def test_relative_with_triple_dot_segment_and_double_dot_and_base(self): + assert resolve('../.../../', 'http://example.org/a/b/c/') == 'http://example.org/a/b/' + + def test_relative_with_triple_dot_segment_and_2x_double_dot_and_base(self): + assert resolve('../.../../../', 'http://example.org/a/b/c/') == 'http://example.org/a/' + +# ---------- Tests for remove_dot_segments() ---------- +class TestRemoveDotSegments: + def test_no_slash(self): + assert remove_dot_segments('abc') == '/abc' + + def test_single_slash_end(self): + assert remove_dot_segments('abc/') == '/abc/' + + def test_leading_slash(self): + assert remove_dot_segments('/abc') == '/abc' + + def test_leading_and_trailing_slash(self): + assert remove_dot_segments('/abc/') == '/abc/' + + def test_dot(self): + assert remove_dot_segments('/.') == '/' + + def test_dotdot(self): + assert remove_dot_segments('/..') == '/' + + def test_parent_directory(self): + assert remove_dot_segments('/abc/..') == '/' + + def test_too_many_parents(self): + assert remove_dot_segments('/abc/../../..') == '/' + + def test_current_directory(self): + assert remove_dot_segments('/abc/.') == '/abc/' + + def test_inbetween_parent_directory(self): + assert remove_dot_segments('/abc/../def/') == '/def/' + + def test_inbetween_parent_directory_2(self): + assert remove_dot_segments('mid/content=5/../6') == '/mid/6' + + def test_inbetween_current_directory(self): + assert remove_dot_segments('/abc/./def/') == '/abc/def/' + + def test_multiple_parents(self): + assert remove_dot_segments('/abc/def/ghi/../..') == '/abc/' + + def test_multiple_currents(self): + assert remove_dot_segments('/abc/././.') == '/abc/' + + def test_mixed_current_and_parent(self): + assert remove_dot_segments('/abc/def/./ghi/../..') == '/abc/' + + def test_another_mixed_current_and_parent(self): + assert remove_dot_segments('/a/b/c/./../../g') == '/a/g' + + def test_not_modify_fragments(self): + assert remove_dot_segments('/abc#abcdef') == '/abc#abcdef' + + def test_not_modify_paths_in_fragments(self): + assert remove_dot_segments('/abc#a/bc/def') == '/abc#a/bc/def' + + def test_not_modify_current_paths_in_fragments(self): + assert remove_dot_segments('/abc#a/./bc/def') == '/abc#a/./bc/def' + + def test_not_modify_parent_paths_in_fragments(self): + assert remove_dot_segments('/abc#a/../bc/def') == '/abc#a/../bc/def' + + def test_not_modify_queries(self): + assert remove_dot_segments('/abc?abcdef') == '/abc?abcdef' + + def test_not_modify_paths_in_queries(self): + assert remove_dot_segments('/abc?a/bc/def') == '/abc?a/bc/def' + + def test_not_modify_current_paths_in_queries(self): + assert remove_dot_segments('/abc?a/./bc/def') == '/abc?a/./bc/def' + + def test_not_modify_parent_paths_in_queries(self): + assert remove_dot_segments('/abc?a/../bc/def') == '/abc?a/../bc/def' + + def test_mixed_current_and_parent_with_fragment(self): + assert remove_dot_segments('/abc/def/./ghi/../..#abc') == '/abc#abc' + + def test_fragment_without_another_path(self): + assert remove_dot_segments('#abc') == '/#abc' + + def test_not_remove_zerolength_segments(self): + assert remove_dot_segments('/abc//def/') == '/abc//def/' + + def test_parent_into_zerolength_segments(self): + assert remove_dot_segments('/abc//def//../') == '/abc//def/' + + def test_current_over_zerolength_segments(self): + assert remove_dot_segments('/abc//def//./') == '/abc//def//' + + def test_resolve_query_against_non_slash(self): + assert remove_dot_segments('/def/ghi?a=b') == '/def/ghi?a=b' + + def test_resolve_query_against_slash(self): + assert remove_dot_segments('/def/?a=b') == '/def/?a=b' + + def test_resolve_double_dot_and_query(self): + assert remove_dot_segments('/def/..?a=b') == '/?a=b' + + def test_append_dot_g_after_slash(self): + assert remove_dot_segments('/a/bb/ccc/.g') == '/a/bb/ccc/.g' + + def test_append_g_dot_after_slash(self): + assert remove_dot_segments('/a/bb/ccc/g.') == '/a/bb/ccc/g.' + + def test_append_double_dot_g_after_slash(self): + assert remove_dot_segments('/a/bb/ccc/..g') == '/a/bb/ccc/..g' + + def test_append_g_double_dot_after_slash(self): + assert remove_dot_segments('/a/bb/ccc/g..') == '/a/bb/ccc/g..' + + def test_end_with_slash_if_trailing_slash_dot(self): + assert remove_dot_segments('/a/bb/ccc/./g/.') == '/a/bb/ccc/g/' + + def test_triple_dots_as_normal_segment(self): + assert remove_dot_segments('/invalid/...') == '/invalid/...' + + def test_triple_dots_as_normal_segment_followed_by_double_dots(self): + assert remove_dot_segments('/invalid/.../..') == '/invalid/' + + def test_four_dots_as_normal_segment(self): + assert remove_dot_segments('/invalid/../..../../../.../.htaccess') == '/.../.htaccess' + + def test_segment_with_dot_and_invalid_char_as_normal_segment(self): + assert remove_dot_segments('/invalid/../.a/../../.../.htaccess') == '/.../.htaccess' + +if __name__ == "__main__": + pytest.main(["-v", __file__])