From ae6c615e948e0a1e80f7e328997d97c01ce33215 Mon Sep 17 00:00:00 2001 From: Miel Vander Sande Date: Tue, 4 Nov 2025 13:23:16 +0100 Subject: [PATCH 1/3] Add stub files and start porting tests --- lib/pyld/iri_resolver.py | 51 +++ tests/test_iri_resolver.py | 718 +++++++++++++++++++++++++++++++++++++ 2 files changed, 769 insertions(+) create mode 100644 lib/pyld/iri_resolver.py create mode 100644 tests/test_iri_resolver.py diff --git a/lib/pyld/iri_resolver.py b/lib/pyld/iri_resolver.py new file mode 100644 index 00000000..a9030267 --- /dev/null +++ b/lib/pyld/iri_resolver.py @@ -0,0 +1,51 @@ +def resolve(relative_iri: str, base_iri: str | None = None) -> str: + # TODO: implement + return '' + +def remove_dot_segments(path: str) -> str: + """ + Removes dot segments from a URL path. + + :param path: the path to remove dot segments from. + + :return: a path with normalized dot segments. + """ + + # RFC 3986 5.2.4 (reworked) + + # empty path shortcut + if len(path) == 0: + return '' + + input = path.split('/') + output = [] + + while len(input) > 0: + next = input.pop(0) + done = len(input) == 0 + + if next == '.': + if done: + # ensure output has trailing / + output.append('') + continue + + if next == '..': + if len(output) > 0: + output.pop() + if done: + # ensure output has trailing / + output.append('') + continue + + output.append(next) + + # ensure output has leading / + # merge path segments from section 5.2.3 + # note that if the path includes no segments, the entire path is removed + if len(output) > 0 and path.startswith('/') and output[0] != '': + output.insert(0, '') + if len(output) == 1 and output[0] == '': + return '/' + + return '/'.join(output) diff --git a/tests/test_iri_resolver.py b/tests/test_iri_resolver.py new file mode 100644 index 00000000..64b79d90 --- /dev/null +++ b/tests/test_iri_resolver.py @@ -0,0 +1,718 @@ +import pytest +from pyld.iri_resolver import resolve, remove_dot_segments + +# Tests ported from relative-to-absolute-iri.js: https://github.com/rubensworks/relative-to-absolute-iri.js/blob/master/test/Resolve-test.ts + +# ---------- Tests for resolve() ---------- +class TestResolve: + def test_absolute_iri_no_base(self): + assert resolve('http://example.org/') == 'http://example.org/' + + def test_absolute_iri_empty_base(self): + assert resolve('http://example.org/', '') == 'http://example.org/' + + def test_absolute_iri_with_base(self): + assert resolve('http://example.org/', 'http://base.org/') == 'http://example.org/' + + def test_empty_value_uses_base(self): + assert resolve('', 'http://base.org/') == 'http://base.org/' + + def test_relative_with_scheme_no_base(self): + assert resolve('ex:abc') == 'ex:abc' + + def test_relative_without_scheme_no_base_error(self): + with pytest.raises(ValueError, match=r"Found invalid relative IRI 'abc' for a missing baseIRI"): + resolve('abc') + + def test_relative_without_dot_segments_no_base(self): + assert resolve('http://abc/../../') == 'http://abc/' + + def test_relative_with_base(self): + assert resolve('abc', 'http://base.org/') == 'http://base.org/abc' + + def test_relative_with_fragment_base(self): + assert resolve('abc', 'http://base.org/#frag') == 'http://base.org/abc' + + def test_hash_relative(self): + assert resolve('#abc', 'http://base.org/') == 'http://base.org/#abc' + + def test_colon_in_value_ignores_base(self): + assert resolve('http:abc', 'http://base.org/') == 'http:abc' + + def test_colon_in_value_removes_dots(self): + assert resolve('http://abc/../../', 'http://base.org/') == 'http://abc/' + + def test_non_absolute_base_error(self): + with pytest.raises(ValueError, match=r"Found invalid baseIRI 'def' for value 'abc'"): + resolve('abc', 'def') + + def test_non_absolute_base_empty_value_error(self): + with pytest.raises(ValueError, match=r"Found invalid baseIRI 'def' for value ''"): + resolve('', 'def') + + def test_scheme_from_base_if_value_starts_with_slash_slash(self): + assert resolve('//abc', 'http://base.org/') == 'http://abc' + + def test_base_without_path_slash(self): + assert resolve('abc', 'http://base.org') == 'http://base.org/abc' + + def test_base_without_path_dot_segments(self): + assert resolve('abc/./', 'http://base.org') == 'http://base.org/abc/' + + def test_base_only_scheme_slash_slash(self): + assert resolve('abc', 'http://') == 'http:abc' + + def test_base_only_scheme_slash_slash_dot_segments(self): + assert resolve('abc/./', 'http://') == 'http:abc/' + + def test_base_with_char_after_colon(self): + assert resolve('abc', 'http:a') == 'http:abc' + + def test_base_with_char_after_colon_dot_segments(self): + assert resolve('abc/./', 'http:a') == 'http:abc/' + + def test_base_only_scheme(self): + assert resolve('abc', 'http:') == 'http:abc' + + def test_base_only_scheme_dot_segments(self): + assert resolve('abc/./', 'http:') == 'http:abc/' + + def test_absolute_path_ignores_base_path(self): + assert resolve('/abc/def/', 'http://base.org/123/456/') == 'http://base.org/abc/def/' + + def test_base_with_last_slash_replacement(self): + assert resolve('xyz', 'http://aa/a') == 'http://aa/xyz' + + def test_base_collapse_parent_paths(self): + assert resolve('xyz', 'http://aa/parent/parent/../../a') == 'http://aa/xyz' + + def test_base_remove_current_dir(self): + assert resolve('xyz', 'http://aa/././a') == 'http://aa/xyz' + + def test_base_dot(self): + assert resolve('.', 'http://aa/') == 'http://aa/' + + def test_base_double_dot(self): + assert resolve('..', 'http://aa/b/') == 'http://aa/' + + def test_base_double_dot_slash(self): + assert resolve('../', 'http://aa/b/') == 'http://aa/' + + def test_base_without_ending_slash_double_dot(self): + assert resolve('..', 'http://aa/b') == 'http://aa/' + + def test_base_without_ending_slash_double_dot_slash(self): + assert resolve('../', 'http://aa/b') == 'http://aa/' + + def test_base_without_ending_slash_query(self): + assert resolve('?a=b', 'http://abc/def/ghi') == 'http://abc/def/ghi?a=b' + + def test_base_without_ending_slash_dot_query(self): + assert resolve('.?a=b', 'http://abc/def/ghi') == 'http://abc/def/?a=b' + + def test_base_without_ending_slash_double_dot_query(self): + assert resolve('..?a=b', 'http://abc/def/ghi') == 'http://abc/?a=b' + + def test_base_without_ending_slash_xyz(self): + assert resolve('xyz', 'http://abc/d:f/ghi') == 'http://abc/d:f/xyz' + + def test_base_without_ending_slash_dot_xyz(self): + assert resolve('./xyz', 'http://abc/d:f/ghi') == 'http://abc/d:f/xyz' + + def test_base_without_ending_slash_double_dot_xyz(self): + assert resolve('../xyz', 'http://abc/d:f/ghi') == 'http://abc/xyz' + + def test_relative_with_colon_ignores_base(self): + assert resolve('g:h', 'file:///a/bb/ccc/d;p?q') == 'g:h' + + def test_simple_relative_with_complex_base(self): + assert resolve('g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g' + + def test_dot_relative_with_complex_base(self): + assert resolve('./g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g' + +# it('create an IRI from a g/ relative IRI and complex baseIRI', () => { +# expect(resolve('g/', 'file:///a/bb/ccc/d;p?q')) +# .toEqual(''); +# }); + + def test_slash_suffix_relative_with_complex_base(self): + assert resolve('g/', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g/' + +# it('create an IRI from a /g relative IRI and complex baseIRI', () => { +# expect(resolve('/g', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///g'); +# }); + + def test_slash_prefix_relative_with_complex_base(self): + assert resolve('/g', 'file:///a/bb/ccc/d;p?q') == 'file:///g' + +# it('create an IRI from a //g relative IRI and complex baseIRI', () => { +# expect(resolve('//g', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file://g'); +# }); + + def test_double_slash_prefix_relative_with_complex_base(self): + assert resolve('//g', 'file:///a/bb/ccc/d;p?q') == 'file://g' + +# it('create an IRI from a ?y relative IRI and complex baseIRI', () => { +# expect(resolve('?y', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/d;p?y'); +# }); + + def test_questionmark_prefix_relative_with_complex_base(self): + assert resolve('?y', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/d;p?y' + +# it('create an IRI from a g?y relative IRI and complex baseIRI', () => { +# expect(resolve('g?y', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/g?y'); +# }); + + def test_questionmark_middle_relative_with_complex_base(self): + assert resolve('g?y', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g?y' + +# it('create an IRI from a #s relative IRI and complex baseIRI', () => { +# expect(resolve('#s', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/d;p?q#s'); +# }); + + def test_hashtag_prefix_relative_with_complex_base(self): + assert resolve('#s', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/d;p?q#s' + +# it('create an IRI from a g#s relative IRI and complex baseIRI', () => { +# expect(resolve('g#s', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/g#s'); +# }); + + def test_middle_hashtag_relative_with_complex_base(self): + assert resolve('g#s', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g#s' + +# it('create an IRI from a g?y#s relative IRI and complex baseIRI', () => { +# expect(resolve('g?y#s', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/g?y#s'); +# }); + + def test_middle_questionmark_and_hashtag_relative_with_complex_base(self): + assert resolve('g?y#s', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g?y#s' + +# it('create an IRI from a ;x relative IRI and complex baseIRI', () => { +# expect(resolve(';x', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/;x'); +# }); + + def test_semicolon_prefix_relative_with_complex_base(self): + assert resolve(';x', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/;x' + +# it('create an IRI from a g;x relative IRI and complex baseIRI', () => { +# expect(resolve('g;x', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/g;x'); +# }); + + def test_middle_semicolon_relative_with_complex_base(self): + assert resolve('g;x', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g;x' + +# it('create an IRI from a g;x?y#s relative IRI and complex baseIRI', () => { +# expect(resolve('g;x?y#s', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/g;x?y#s'); +# }); + + def test_semicolon_questionmark_and_hashtag_relative_with_complex_base(self): + assert resolve('g;x?y#s', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g;x?y#s' + +# it('create an IRI from an empty relative IRI and complex baseIRI', () => { +# expect(resolve('', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/d;p?q'); +# }); + + def test_empty_relative_with_complex_base(self): + assert resolve('', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/d;p?q' + +# it('create an IRI from a . relative IRI and complex baseIRI', () => { +# expect(resolve('.', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/'); +# }); + + def test_dot_relative_with_complex_base(self): + assert resolve('.', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/d;p?q' + +# it('create an IRI from a ./ relative IRI and complex baseIRI', () => { +# expect(resolve('./', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/'); +# }); + + def test_dot_slash_relative_with_complex_base(self): + assert resolve('./', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/' + +# it('create an IRI from a .. relative IRI and complex baseIRI', () => { +# expect(resolve('..', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/'); +# }); + + def test_double_dot_relative_with_complex_base(self): + assert resolve('..', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/' + +# it('create an IRI from a ../ relative IRI and complex baseIRI', () => { +# expect(resolve('../', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/'); +# }); + + def test_double_dot_slash_relative_with_complex_base(self): + assert resolve('../', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/' + +# it('create an IRI from a ../g relative IRI and complex baseIRI', () => { +# expect(resolve('../g', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/g'); +# }); + + def test_double_dot_slash_g_relative_with_complex_base(self): + assert resolve('../g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/g' + +# it('create an IRI from a ../.. relative IRI and complex baseIRI', () => { +# expect(resolve('../..', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/'); +# }); + + def test_double_dot_slash_double_dot_relative_with_complex_base(self): + assert resolve('../..', 'file:///a/bb/ccc/d;p?q') == 'file:///a/' + +# it('create an IRI from a ../../ relative IRI and complex baseIRI', () => { +# expect(resolve('../../', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/'); +# }); + + def test_2x_dot_slash_double_dot_slash_relative_with_complex_base(self): + assert resolve('../../', 'file:///a/bb/ccc/d;p?q') == 'file:///a/' + +# it('create an IRI from a ../../g relative IRI and complex baseIRI', () => { +# expect(resolve('../../g', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/g'); +# }); + + def test_2x_double_dot_slash_with_g_relative_with_complex_base(self): + assert resolve('../../g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/g' + +# it('create an IRI from a ../../.. relative IRI and complex baseIRI', () => { +# expect(resolve('../../..', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///'); +# }); + + def test_2x_double_dot_slash_with_double_dot_relative_with_complex_base(self): + assert resolve('../../..', 'file:///a/bb/ccc/d;p?q') == 'file:///' + +# it('create an IRI from a ../../../ relative IRI and complex baseIRI', () => { +# expect(resolve('../../../', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///'); +# }); + + def test_3x_double_dot_slash_relative_with_complex_base(self): + assert resolve('../../../', 'file:///a/bb/ccc/d;p?q') == 'file:///' + +# it('create an IRI from a ../../../g relative IRI and complex baseIRI', () => { +# expect(resolve('../../../g', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///g'); +# }); + + def test_3x_double_dot_slash_with_g_relative_with_complex_base(self): + assert resolve('../../../g', 'file:///a/bb/ccc/d;p?q') == 'file:///g' + +# it('create an IRI from a ../../../../g relative IRI and complex baseIRI', () => { +# expect(resolve('../../../../g', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///g'); +# }); + + def test_4x_double_dot_slash_with_g_relative_with_complex_base(self): + assert resolve('../../../../g', 'file:///a/bb/ccc/d;p?q') == 'file:///g' + +# it('create an IRI from a /./g relative IRI and complex baseIRI', () => { +# expect(resolve('/./g', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///g'); +# }); + + def test_slash_dot_slash_g_relative_with_complex_base(self): + assert resolve('/./g', 'file:///a/bb/ccc/d;p?q') == 'file:///g' + +# it('create an IRI from a /../g relative IRI and complex baseIRI', () => { +# expect(resolve('/../g', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///g'); +# }); + + def test_slash_double_dot_slash_g_relative_with_complex_base(self): + assert resolve('/../g', 'file:///a/bb/ccc/d;p?q') == 'file:///g' + +# it('create an IRI from a g. relative IRI and complex baseIRI', () => { +# expect(resolve('g.', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/g.'); +# }); + + def test_dot_suffix_relative_with_complex_base(self): + assert resolve('g.', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g' + +# it('create an IRI from a .g relative IRI and complex baseIRI', () => { +# expect(resolve('.g', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/.g'); +# }); + + def test_dot_prefix_relative_with_complex_base(self): + assert resolve('.g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/.g' + +# it('create an IRI from a g.. relative IRI and complex baseIRI', () => { +# expect(resolve('g..', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/g..'); +# }); + + def test_double_dot_suffix_relative_with_complex_base(self): + assert resolve('g..', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g..' + +# it('create an IRI from a ..g relative IRI and complex baseIRI', () => { +# expect(resolve('..g', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/..g'); +# }); + + def test_double_dot_prefix_relative_with_complex_base(self): + assert resolve('..g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/..g' + +# it('create an IRI from a ./../g relative IRI and complex baseIRI', () => { +# expect(resolve('./../g', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/g'); +# }); + + def test_dot_slash_double_dot_slash_g_relative_with_complex_base(self): + assert resolve('./../g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/g' + +# it('create an IRI from a ./g/. relative IRI and complex baseIRI', () => { +# expect(resolve('./g/.', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/g/'); +# }); + + def test_dot_slash_g_slash_dot_relative_with_complex_base(self): + assert resolve('./g/.', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g/' + +# it('create an IRI from a g/./h relative IRI and complex baseIRI', () => { +# expect(resolve('g/./h', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/g/h'); +# }); + + def test_g_slash_dot_slash_h_relative_with_complex_base(self): + assert resolve('g/./h', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g/h' + +# it('create an IRI from a g/../h relative IRI and complex baseIRI', () => { +# expect(resolve('g/../h', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/h'); +# }); + + def test_g_slash_double_dot_slash_h_relative_with_complex_base(self): + assert resolve('g/../h', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/h' + +# it('create an IRI from a g;x=1/./y relative IRI and complex baseIRI', () => { +# expect(resolve('g;x=1/./y', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/g;x=1/y'); +# }); + + def test_g_semicolon_x_equals_1_slash_dot_slash_y_relative_with_complex_base(self): + assert resolve('g;x=1/./y', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g;x=1/y' + +# it('create an IRI from a g;x=1/../y relative IRI and complex baseIRI', () => { +# expect(resolve('g;x=1/../y', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/y'); +# }); + + def test_g_semicolon_x_equals_1_slash_double_dot_slash_y_relative_with_complex_base(self): + assert resolve('g;x=1/../y', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/y' + +# it('create an IRI from a g?y/./x relative IRI and complex baseIRI', () => { +# expect(resolve('g?y/./x', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/g?y/./x'); +# }); + + def test_g_questionmark_y_slash_dot_slash_x_relative_with_complex_base(self): + assert resolve('g?y/./x', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g?y/./x' + +# it('create an IRI from a g?y/../x relative IRI and complex baseIRI', () => { +# expect(resolve('g?y/../x', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/g?y/../x'); +# }); + +# it('create an IRI from a g#s/./x relative IRI and complex baseIRI', () => { +# expect(resolve('g#s/./x', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/g#s/./x'); +# }); + +# it('create an IRI from a g#s/../x relative IRI and complex baseIRI', () => { +# expect(resolve('g#s/../x', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/g#s/../x'); +# }); + +# it('create an IRI from a http:g relative IRI and complex baseIRI', () => { +# expect(resolve('http:g', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('http:g'); +# }); + +# it('create an IRI from a //example.org/.././useless/../../scheme-relative relative IRI and complex baseIRI', () => { +# expect(resolve('//example.org/.././useless/../../scheme-relative', +# 'http://example.com/some/deep/directory/and/file#with-a-fragment')) +# .toEqual('http://example.org/scheme-relative'); +# }); + +# it('create an IRI from a relative IRI a and baseIRI without // after scheme', () => { +# expect(resolve('a', 'tag:example')) +# .toEqual('tag:a'); +# }); + +# it('create an IRI from a relative IRI a and baseIRI without // after scheme, with one /', () => { +# expect(resolve('a', 'tag:example/foo')) +# .toEqual('tag:example/a'); +# }); + +# it('create an IRI from a relative IRI a and baseIRI without // after scheme, with two /', () => { +# expect(resolve('a', 'tag:example/foo/')) +# .toEqual('tag:example/foo/a'); +# }); + + def test_relative_a_with_base_without_double_slash_after_scheme_with_two_slash(self): + assert resolve('a', 'tag:example/foo/') == 'tag:example/foo/a' + +# it('create an IRI from a relative IRI with a ... segment and one .. and baseIRI', () => { +# expect(resolve('../.../../', 'http://example.org/a/b/c/')) +# .toEqual('http://example.org/a/b/'); +# }); + + def test_relative_with_triple_dot_segment_and_double_dot_and_base(self): + assert resolve('../.../../', 'http://example.org/a/b/c/') == 'http://example.org/a/b/' + +# it('create an IRI from a relative IRI with a ... segment and two .. and baseIRI', () => { +# expect(resolve('../.../../../', 'http://example.org/a/b/c/')) +# .toEqual('http://example.org/a/'); +# }); + + def test_relative_with_triple_dot_segment_and_2x_double_dot_and_base(self): + assert resolve('../.../../../', 'http://example.org/a/b/c/') == 'http://example.org/a/' + +# ---------- Tests for remove_dot_segments() ---------- +class TestRemoveDotSegments: + def test_no_slash(self): + assert remove_dot_segments('abc') == '/abc' + + def test_single_slash_end(self): + assert remove_dot_segments('abc/') == '/abc/' + + def test_leading_slash(self): + assert remove_dot_segments('/abc') == '/abc' + + def test_leading_and_trailing_slash(self): + assert remove_dot_segments('/abc/') == '/abc/' + + def test_dot(self): + assert remove_dot_segments('/.') == '/' + + def test_dotdot(self): + assert remove_dot_segments('/..') == '/' + + def test_parent_directory(self): + assert remove_dot_segments('/abc/..') == '/' + + def test_too_many_parents(self): + assert remove_dot_segments('/abc/../../..') == '/' + + def test_current_directory(self): + assert remove_dot_segments('/abc/.') == '/abc/' + + def test_inbetween_parent_directory(self): + assert remove_dot_segments('/abc/../def/') == '/def/' + + def test_inbetween_parent_directory_2(self): + assert remove_dot_segments('mid/content=5/../6') == '/mid/6' + + def test_inbetween_current_directory(self): + assert remove_dot_segments('/abc/./def/') == '/abc/def/' + + def test_multiple_parents(self): + assert remove_dot_segments('/abc/def/ghi/../..') == '/abc/' + + def test_multiple_currents(self): + assert remove_dot_segments('/abc/././.') == '/abc/' + +# it('should handle mixed current and parent directories', () => { +# expect(removeDotSegments('/abc/def/./ghi/../..')) +# .toEqual('/abc/'); +# }); + + def test_mixed_current_and_parent(self): + assert remove_dot_segments('/abc/def/./ghi/../..') == '/abc/' + +# it('should handle another mixed current and parent directories', () => { +# expect(removeDotSegments('/a/b/c/./../../g')) +# .toEqual('/a/g'); +# }); + + def test_another_mixed_current_and_parent(self): + assert remove_dot_segments('/a/b/c/./../../g') == '/a/g' + +# it('should not modify fragments', () => { +# expect(removeDotSegments('/abc#abcdef')) +# .toEqual('/abc#abcdef'); +# }); + + def test_not_modify_fragments(self): + assert remove_dot_segments('/abc#abcdef') == '/abc#abcdef' + +# it('should not modify paths in fragments', () => { +# expect(removeDotSegments('/abc#a/bc/def')) +# .toEqual('/abc#a/bc/def'); +# }); + + def test_not_modify_paths_in_fragments(self): + assert remove_dot_segments('/abc#abcdef') == '/abc#a/bc/def' + +# it('should not modify current paths in fragments', () => { +# expect(removeDotSegments('/abc#a/./bc/def')) +# .toEqual('/abc#a/./bc/def'); +# }); + + def test_not_modify_current_paths_in_fragments(self): + assert remove_dot_segments('/abc#a/./bc/def') == '/abc#a/./bc/def' + +# it('should not modify parent paths in fragments', () => { +# expect(removeDotSegments('/abc#a/../bc/def')) +# .toEqual('/abc#a/../bc/def'); +# }); + + def test_not_modify_parent_paths_in_fragments(self): + assert remove_dot_segments('/abc#a/../bc/def') == '/abc#a/../bc/def' + +# it('should not modify queries', () => { +# expect(removeDotSegments('/abc?abcdef')) +# .toEqual('/abc?abcdef'); +# }); + + def test_not_modify_queries(self): + assert remove_dot_segments('/abc?abcdef') == '/abc?abcdef' + +# it('should not modify paths in queries', () => { +# expect(removeDotSegments('/abc?a/bc/def')) +# .toEqual('/abc?a/bc/def'); +# }); + + def test_not_modify_paths_in_queries(self): + assert remove_dot_segments('/abc?a/bc/def') == '/abc?a/bc/def' + +# it('should not modify current paths in queries', () => { +# expect(removeDotSegments('/abc?a/./bc/def')) +# .toEqual('/abc?a/./bc/def'); +# }); + + def test_not_modify_current_paths_in_queries(self): + assert remove_dot_segments('/abc?a/./bc/def') == '/abc?a/./bc/def' + +# it('should not modify parent paths in queries', () => { +# expect(removeDotSegments('/abc?a/../bc/def')) +# .toEqual('/abc?a/../bc/def'); +# }); + + def test_not_modify_parent_paths_in_queries(self): + assert remove_dot_segments('/abc?a/../bc/def') == '/abc?a/../bc/def' + +# it('should handle mixed current and parent directories with a fragment', () => { +# expect(removeDotSegments('/abc/def/./ghi/../..#abc')) +# .toEqual('/abc#abc'); +# }); + + def test_mixed_current_and_parent_with_fragment(self): + assert remove_dot_segments('/abc/def/./ghi/../..#abc') == '/abc#abc' + +# it('should handle a fragment without another path', () => { +# expect(removeDotSegments('#abc')) +# .toEqual('/#abc'); +# }); + + def test_fragment_without_another_path(self): + assert remove_dot_segments('#abc') == '/#abc' + +# it('should not remove zero-length segments', () => { +# expect(removeDotSegments('/abc//def/')) +# .toEqual('/abc//def/'); +# }); + + def test_not_remove_zerolength_segments(self): + assert remove_dot_segments('/abc//def/') == '/abc//def/' + +# it('should be able to parent into zero-length segments', () => { +# expect(removeDotSegments('/abc//def//../')) +# .toEqual('/abc//def/'); +# }); + + def test_parent_into_zerolength_segments(self): + assert remove_dot_segments('/abc//def//../') == '/abc//def/' + +# it('should be able to current over zero-length segments', () => { +# expect(removeDotSegments('/abc//def//./')) +# .toEqual('/abc//def//'); +# }); + + def test_current_over_zerolength_segments(self): + assert remove_dot_segments('/abc//def//./') == '/abc//def//' + +# it('should resolve a query against non-/', () => { +# expect(removeDotSegments('/def/ghi?a=b')) +# .toEqual('/def/ghi?a=b'); +# }); + + def test_resolve_query_against_non_slash(self): + assert remove_dot_segments('/def/ghi?a=b') == '/def/ghi?a=b' + +# it('should resolve a query against /', () => { +# expect(removeDotSegments('/def/?a=b')) +# .toEqual('/def/?a=b'); +# }); + +# it('should resolve a .. and query', () => { +# expect(removeDotSegments('/def/..?a=b')) +# .toEqual('/?a=b'); +# }); + +# it('should just append a .g after a slash', () => { +# expect(removeDotSegments('/a/bb/ccc/.g')) +# .toEqual('/a/bb/ccc/.g'); +# }); + +# it('should just append a g. after a slash', () => { +# expect(removeDotSegments('/a/bb/ccc/g.')) +# .toEqual('/a/bb/ccc/g.'); +# }); + +# it('should just append a ..g after a slash', () => { +# expect(removeDotSegments('/a/bb/ccc/..g')) +# .toEqual('/a/bb/ccc/..g'); +# }); + +# it('should just append a g.. after a slash', () => { +# expect(removeDotSegments('/a/bb/ccc/g..')) +# .toEqual('/a/bb/ccc/g..'); +# }); + +# it('should end with a slash if there is a trailing /.', () => { +# expect(removeDotSegments('/a/bb/ccc/./g/.')) +# .toEqual('/a/bb/ccc/g/'); +# }); + +# it('should handle triple dots as a normal segment.', () => { +# expect(removeDotSegments('/invalid/...')) +# .toEqual('/invalid/...'); +# }); + +# it('should handle triple dots as a normal segment, followed by ...', () => { +# expect(removeDotSegments('/invalid/.../..')) +# .toEqual('/invalid/'); +# }); + +# it('should handle four dots as a normal segment.', () => { +# expect(removeDotSegments('/invalid/../..../../../.../.htaccess')) +# .toEqual('/.../.htaccess'); +# }); + +# it('should handle a segment with dot and an invalid char as a normal segment.', () => { +# expect(removeDotSegments('/invalid/../.a/../../.../.htaccess')) +# .toEqual('/.../.htaccess'); +# }); + +if __name__ == "__main__": + pytest.main(["-v", __file__]) From 28f49c7a5db7ac706de027c709007ee80d75f1a6 Mon Sep 17 00:00:00 2001 From: Miel Vander Sande Date: Wed, 5 Nov 2025 12:59:18 +0100 Subject: [PATCH 2/3] Complete porting tests --- tests/test_iri_resolver.py | 425 +++++-------------------------------- 1 file changed, 54 insertions(+), 371 deletions(-) diff --git a/tests/test_iri_resolver.py b/tests/test_iri_resolver.py index 64b79d90..f810c4d8 100644 --- a/tests/test_iri_resolver.py +++ b/tests/test_iri_resolver.py @@ -128,362 +128,147 @@ def test_relative_with_colon_ignores_base(self): def test_simple_relative_with_complex_base(self): assert resolve('g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g' - def test_dot_relative_with_complex_base(self): + def test_dot_slash_g_relative_with_complex_base(self): assert resolve('./g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g' -# it('create an IRI from a g/ relative IRI and complex baseIRI', () => { -# expect(resolve('g/', 'file:///a/bb/ccc/d;p?q')) -# .toEqual(''); -# }); - def test_slash_suffix_relative_with_complex_base(self): assert resolve('g/', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g/' -# it('create an IRI from a /g relative IRI and complex baseIRI', () => { -# expect(resolve('/g', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///g'); -# }); - def test_slash_prefix_relative_with_complex_base(self): assert resolve('/g', 'file:///a/bb/ccc/d;p?q') == 'file:///g' -# it('create an IRI from a //g relative IRI and complex baseIRI', () => { -# expect(resolve('//g', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file://g'); -# }); - def test_double_slash_prefix_relative_with_complex_base(self): assert resolve('//g', 'file:///a/bb/ccc/d;p?q') == 'file://g' -# it('create an IRI from a ?y relative IRI and complex baseIRI', () => { -# expect(resolve('?y', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/d;p?y'); -# }); - def test_questionmark_prefix_relative_with_complex_base(self): assert resolve('?y', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/d;p?y' -# it('create an IRI from a g?y relative IRI and complex baseIRI', () => { -# expect(resolve('g?y', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/g?y'); -# }); - def test_questionmark_middle_relative_with_complex_base(self): assert resolve('g?y', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g?y' -# it('create an IRI from a #s relative IRI and complex baseIRI', () => { -# expect(resolve('#s', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/d;p?q#s'); -# }); - def test_hashtag_prefix_relative_with_complex_base(self): assert resolve('#s', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/d;p?q#s' -# it('create an IRI from a g#s relative IRI and complex baseIRI', () => { -# expect(resolve('g#s', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/g#s'); -# }); - def test_middle_hashtag_relative_with_complex_base(self): assert resolve('g#s', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g#s' -# it('create an IRI from a g?y#s relative IRI and complex baseIRI', () => { -# expect(resolve('g?y#s', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/g?y#s'); -# }); - def test_middle_questionmark_and_hashtag_relative_with_complex_base(self): assert resolve('g?y#s', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g?y#s' -# it('create an IRI from a ;x relative IRI and complex baseIRI', () => { -# expect(resolve(';x', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/;x'); -# }); - def test_semicolon_prefix_relative_with_complex_base(self): assert resolve(';x', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/;x' -# it('create an IRI from a g;x relative IRI and complex baseIRI', () => { -# expect(resolve('g;x', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/g;x'); -# }); - def test_middle_semicolon_relative_with_complex_base(self): assert resolve('g;x', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g;x' -# it('create an IRI from a g;x?y#s relative IRI and complex baseIRI', () => { -# expect(resolve('g;x?y#s', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/g;x?y#s'); -# }); - def test_semicolon_questionmark_and_hashtag_relative_with_complex_base(self): assert resolve('g;x?y#s', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g;x?y#s' -# it('create an IRI from an empty relative IRI and complex baseIRI', () => { -# expect(resolve('', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/d;p?q'); -# }); - def test_empty_relative_with_complex_base(self): assert resolve('', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/d;p?q' -# it('create an IRI from a . relative IRI and complex baseIRI', () => { -# expect(resolve('.', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/'); -# }); - def test_dot_relative_with_complex_base(self): assert resolve('.', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/d;p?q' -# it('create an IRI from a ./ relative IRI and complex baseIRI', () => { -# expect(resolve('./', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/'); -# }); - def test_dot_slash_relative_with_complex_base(self): assert resolve('./', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/' -# it('create an IRI from a .. relative IRI and complex baseIRI', () => { -# expect(resolve('..', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/'); -# }); - def test_double_dot_relative_with_complex_base(self): assert resolve('..', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/' - -# it('create an IRI from a ../ relative IRI and complex baseIRI', () => { -# expect(resolve('../', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/'); -# }); - + def test_double_dot_slash_relative_with_complex_base(self): assert resolve('../', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/' -# it('create an IRI from a ../g relative IRI and complex baseIRI', () => { -# expect(resolve('../g', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/g'); -# }); - def test_double_dot_slash_g_relative_with_complex_base(self): assert resolve('../g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/g' -# it('create an IRI from a ../.. relative IRI and complex baseIRI', () => { -# expect(resolve('../..', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/'); -# }); - def test_double_dot_slash_double_dot_relative_with_complex_base(self): assert resolve('../..', 'file:///a/bb/ccc/d;p?q') == 'file:///a/' -# it('create an IRI from a ../../ relative IRI and complex baseIRI', () => { -# expect(resolve('../../', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/'); -# }); - def test_2x_dot_slash_double_dot_slash_relative_with_complex_base(self): assert resolve('../../', 'file:///a/bb/ccc/d;p?q') == 'file:///a/' -# it('create an IRI from a ../../g relative IRI and complex baseIRI', () => { -# expect(resolve('../../g', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/g'); -# }); - def test_2x_double_dot_slash_with_g_relative_with_complex_base(self): assert resolve('../../g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/g' -# it('create an IRI from a ../../.. relative IRI and complex baseIRI', () => { -# expect(resolve('../../..', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///'); -# }); - def test_2x_double_dot_slash_with_double_dot_relative_with_complex_base(self): assert resolve('../../..', 'file:///a/bb/ccc/d;p?q') == 'file:///' -# it('create an IRI from a ../../../ relative IRI and complex baseIRI', () => { -# expect(resolve('../../../', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///'); -# }); - def test_3x_double_dot_slash_relative_with_complex_base(self): assert resolve('../../../', 'file:///a/bb/ccc/d;p?q') == 'file:///' -# it('create an IRI from a ../../../g relative IRI and complex baseIRI', () => { -# expect(resolve('../../../g', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///g'); -# }); - def test_3x_double_dot_slash_with_g_relative_with_complex_base(self): assert resolve('../../../g', 'file:///a/bb/ccc/d;p?q') == 'file:///g' -# it('create an IRI from a ../../../../g relative IRI and complex baseIRI', () => { -# expect(resolve('../../../../g', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///g'); -# }); - def test_4x_double_dot_slash_with_g_relative_with_complex_base(self): assert resolve('../../../../g', 'file:///a/bb/ccc/d;p?q') == 'file:///g' -# it('create an IRI from a /./g relative IRI and complex baseIRI', () => { -# expect(resolve('/./g', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///g'); -# }); - def test_slash_dot_slash_g_relative_with_complex_base(self): assert resolve('/./g', 'file:///a/bb/ccc/d;p?q') == 'file:///g' -# it('create an IRI from a /../g relative IRI and complex baseIRI', () => { -# expect(resolve('/../g', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///g'); -# }); - def test_slash_double_dot_slash_g_relative_with_complex_base(self): assert resolve('/../g', 'file:///a/bb/ccc/d;p?q') == 'file:///g' -# it('create an IRI from a g. relative IRI and complex baseIRI', () => { -# expect(resolve('g.', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/g.'); -# }); - def test_dot_suffix_relative_with_complex_base(self): assert resolve('g.', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g' -# it('create an IRI from a .g relative IRI and complex baseIRI', () => { -# expect(resolve('.g', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/.g'); -# }); - def test_dot_prefix_relative_with_complex_base(self): assert resolve('.g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/.g' -# it('create an IRI from a g.. relative IRI and complex baseIRI', () => { -# expect(resolve('g..', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/g..'); -# }); - def test_double_dot_suffix_relative_with_complex_base(self): assert resolve('g..', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g..' -# it('create an IRI from a ..g relative IRI and complex baseIRI', () => { -# expect(resolve('..g', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/..g'); -# }); - def test_double_dot_prefix_relative_with_complex_base(self): assert resolve('..g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/..g' -# it('create an IRI from a ./../g relative IRI and complex baseIRI', () => { -# expect(resolve('./../g', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/g'); -# }); - def test_dot_slash_double_dot_slash_g_relative_with_complex_base(self): assert resolve('./../g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/g' -# it('create an IRI from a ./g/. relative IRI and complex baseIRI', () => { -# expect(resolve('./g/.', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/g/'); -# }); - def test_dot_slash_g_slash_dot_relative_with_complex_base(self): assert resolve('./g/.', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g/' -# it('create an IRI from a g/./h relative IRI and complex baseIRI', () => { -# expect(resolve('g/./h', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/g/h'); -# }); - def test_g_slash_dot_slash_h_relative_with_complex_base(self): assert resolve('g/./h', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g/h' -# it('create an IRI from a g/../h relative IRI and complex baseIRI', () => { -# expect(resolve('g/../h', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/h'); -# }); - def test_g_slash_double_dot_slash_h_relative_with_complex_base(self): assert resolve('g/../h', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/h' -# it('create an IRI from a g;x=1/./y relative IRI and complex baseIRI', () => { -# expect(resolve('g;x=1/./y', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/g;x=1/y'); -# }); - def test_g_semicolon_x_equals_1_slash_dot_slash_y_relative_with_complex_base(self): assert resolve('g;x=1/./y', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g;x=1/y' -# it('create an IRI from a g;x=1/../y relative IRI and complex baseIRI', () => { -# expect(resolve('g;x=1/../y', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/y'); -# }); - def test_g_semicolon_x_equals_1_slash_double_dot_slash_y_relative_with_complex_base(self): assert resolve('g;x=1/../y', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/y' -# it('create an IRI from a g?y/./x relative IRI and complex baseIRI', () => { -# expect(resolve('g?y/./x', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/g?y/./x'); -# }); - def test_g_questionmark_y_slash_dot_slash_x_relative_with_complex_base(self): assert resolve('g?y/./x', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g?y/./x' -# it('create an IRI from a g?y/../x relative IRI and complex baseIRI', () => { -# expect(resolve('g?y/../x', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/g?y/../x'); -# }); - -# it('create an IRI from a g#s/./x relative IRI and complex baseIRI', () => { -# expect(resolve('g#s/./x', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/g#s/./x'); -# }); - -# it('create an IRI from a g#s/../x relative IRI and complex baseIRI', () => { -# expect(resolve('g#s/../x', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/g#s/../x'); -# }); - -# it('create an IRI from a http:g relative IRI and complex baseIRI', () => { -# expect(resolve('http:g', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('http:g'); -# }); - -# it('create an IRI from a //example.org/.././useless/../../scheme-relative relative IRI and complex baseIRI', () => { -# expect(resolve('//example.org/.././useless/../../scheme-relative', -# 'http://example.com/some/deep/directory/and/file#with-a-fragment')) -# .toEqual('http://example.org/scheme-relative'); -# }); - -# it('create an IRI from a relative IRI a and baseIRI without // after scheme', () => { -# expect(resolve('a', 'tag:example')) -# .toEqual('tag:a'); -# }); - -# it('create an IRI from a relative IRI a and baseIRI without // after scheme, with one /', () => { -# expect(resolve('a', 'tag:example/foo')) -# .toEqual('tag:example/a'); -# }); - -# it('create an IRI from a relative IRI a and baseIRI without // after scheme, with two /', () => { -# expect(resolve('a', 'tag:example/foo/')) -# .toEqual('tag:example/foo/a'); -# }); + def test_g_questionmark_y_slash_double_dot_slash_x_relative_with_complex_base(self): + assert resolve('g?y/../x', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g?y/../x' + + def test_g_hash_s_slash_dot_slash_x_relative_with_complex_base(self): + assert resolve('g#s/./x', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g#s/./x' + + def test_g_hash_s_slash_double_dot_slash_x_relative_with_complex_base(self): + assert resolve('g#s/../x', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g#s/../x' + + def test_http_colon_g_relative_with_complex_base(self): + assert resolve('http:g', 'file:///a/bb/ccc/d;p?q') == 'http:g' + + def test_complex_relative_with_complex_base(self): + assert resolve('//example.org/.././useless/../../scheme-relative', 'http://example.com/some/deep/directory/and/file#with-a-fragment') == 'http://example.org/scheme-relative' + + def test_relative_with_complex_base_without_double_slash_after_scheme(self): + assert resolve('a', 'tag:example') == 'tag:a' + + def test_relative_with_complex_base_without_double_slash_after_scheme_with_one_slash(self): + assert resolve('a', 'tag:example/foo') == 'tag:example/a' def test_relative_a_with_base_without_double_slash_after_scheme_with_two_slash(self): assert resolve('a', 'tag:example/foo/') == 'tag:example/foo/a' -# it('create an IRI from a relative IRI with a ... segment and one .. and baseIRI', () => { -# expect(resolve('../.../../', 'http://example.org/a/b/c/')) -# .toEqual('http://example.org/a/b/'); -# }); - def test_relative_with_triple_dot_segment_and_double_dot_and_base(self): assert resolve('../.../../', 'http://example.org/a/b/c/') == 'http://example.org/a/b/' -# it('create an IRI from a relative IRI with a ... segment and two .. and baseIRI', () => { -# expect(resolve('../.../../../', 'http://example.org/a/b/c/')) -# .toEqual('http://example.org/a/'); -# }); - def test_relative_with_triple_dot_segment_and_2x_double_dot_and_base(self): assert resolve('../.../../../', 'http://example.org/a/b/c/') == 'http://example.org/a/' @@ -531,188 +316,86 @@ def test_multiple_parents(self): def test_multiple_currents(self): assert remove_dot_segments('/abc/././.') == '/abc/' -# it('should handle mixed current and parent directories', () => { -# expect(removeDotSegments('/abc/def/./ghi/../..')) -# .toEqual('/abc/'); -# }); - def test_mixed_current_and_parent(self): assert remove_dot_segments('/abc/def/./ghi/../..') == '/abc/' -# it('should handle another mixed current and parent directories', () => { -# expect(removeDotSegments('/a/b/c/./../../g')) -# .toEqual('/a/g'); -# }); - def test_another_mixed_current_and_parent(self): assert remove_dot_segments('/a/b/c/./../../g') == '/a/g' -# it('should not modify fragments', () => { -# expect(removeDotSegments('/abc#abcdef')) -# .toEqual('/abc#abcdef'); -# }); - def test_not_modify_fragments(self): assert remove_dot_segments('/abc#abcdef') == '/abc#abcdef' -# it('should not modify paths in fragments', () => { -# expect(removeDotSegments('/abc#a/bc/def')) -# .toEqual('/abc#a/bc/def'); -# }); - def test_not_modify_paths_in_fragments(self): assert remove_dot_segments('/abc#abcdef') == '/abc#a/bc/def' -# it('should not modify current paths in fragments', () => { -# expect(removeDotSegments('/abc#a/./bc/def')) -# .toEqual('/abc#a/./bc/def'); -# }); - def test_not_modify_current_paths_in_fragments(self): assert remove_dot_segments('/abc#a/./bc/def') == '/abc#a/./bc/def' -# it('should not modify parent paths in fragments', () => { -# expect(removeDotSegments('/abc#a/../bc/def')) -# .toEqual('/abc#a/../bc/def'); -# }); - def test_not_modify_parent_paths_in_fragments(self): assert remove_dot_segments('/abc#a/../bc/def') == '/abc#a/../bc/def' -# it('should not modify queries', () => { -# expect(removeDotSegments('/abc?abcdef')) -# .toEqual('/abc?abcdef'); -# }); - def test_not_modify_queries(self): assert remove_dot_segments('/abc?abcdef') == '/abc?abcdef' -# it('should not modify paths in queries', () => { -# expect(removeDotSegments('/abc?a/bc/def')) -# .toEqual('/abc?a/bc/def'); -# }); - def test_not_modify_paths_in_queries(self): assert remove_dot_segments('/abc?a/bc/def') == '/abc?a/bc/def' -# it('should not modify current paths in queries', () => { -# expect(removeDotSegments('/abc?a/./bc/def')) -# .toEqual('/abc?a/./bc/def'); -# }); - def test_not_modify_current_paths_in_queries(self): assert remove_dot_segments('/abc?a/./bc/def') == '/abc?a/./bc/def' -# it('should not modify parent paths in queries', () => { -# expect(removeDotSegments('/abc?a/../bc/def')) -# .toEqual('/abc?a/../bc/def'); -# }); - def test_not_modify_parent_paths_in_queries(self): assert remove_dot_segments('/abc?a/../bc/def') == '/abc?a/../bc/def' -# it('should handle mixed current and parent directories with a fragment', () => { -# expect(removeDotSegments('/abc/def/./ghi/../..#abc')) -# .toEqual('/abc#abc'); -# }); - def test_mixed_current_and_parent_with_fragment(self): assert remove_dot_segments('/abc/def/./ghi/../..#abc') == '/abc#abc' -# it('should handle a fragment without another path', () => { -# expect(removeDotSegments('#abc')) -# .toEqual('/#abc'); -# }); - def test_fragment_without_another_path(self): assert remove_dot_segments('#abc') == '/#abc' -# it('should not remove zero-length segments', () => { -# expect(removeDotSegments('/abc//def/')) -# .toEqual('/abc//def/'); -# }); - def test_not_remove_zerolength_segments(self): assert remove_dot_segments('/abc//def/') == '/abc//def/' -# it('should be able to parent into zero-length segments', () => { -# expect(removeDotSegments('/abc//def//../')) -# .toEqual('/abc//def/'); -# }); - def test_parent_into_zerolength_segments(self): assert remove_dot_segments('/abc//def//../') == '/abc//def/' -# it('should be able to current over zero-length segments', () => { -# expect(removeDotSegments('/abc//def//./')) -# .toEqual('/abc//def//'); -# }); - def test_current_over_zerolength_segments(self): assert remove_dot_segments('/abc//def//./') == '/abc//def//' -# it('should resolve a query against non-/', () => { -# expect(removeDotSegments('/def/ghi?a=b')) -# .toEqual('/def/ghi?a=b'); -# }); - def test_resolve_query_against_non_slash(self): assert remove_dot_segments('/def/ghi?a=b') == '/def/ghi?a=b' -# it('should resolve a query against /', () => { -# expect(removeDotSegments('/def/?a=b')) -# .toEqual('/def/?a=b'); -# }); - -# it('should resolve a .. and query', () => { -# expect(removeDotSegments('/def/..?a=b')) -# .toEqual('/?a=b'); -# }); - -# it('should just append a .g after a slash', () => { -# expect(removeDotSegments('/a/bb/ccc/.g')) -# .toEqual('/a/bb/ccc/.g'); -# }); - -# it('should just append a g. after a slash', () => { -# expect(removeDotSegments('/a/bb/ccc/g.')) -# .toEqual('/a/bb/ccc/g.'); -# }); - -# it('should just append a ..g after a slash', () => { -# expect(removeDotSegments('/a/bb/ccc/..g')) -# .toEqual('/a/bb/ccc/..g'); -# }); - -# it('should just append a g.. after a slash', () => { -# expect(removeDotSegments('/a/bb/ccc/g..')) -# .toEqual('/a/bb/ccc/g..'); -# }); - -# it('should end with a slash if there is a trailing /.', () => { -# expect(removeDotSegments('/a/bb/ccc/./g/.')) -# .toEqual('/a/bb/ccc/g/'); -# }); - -# it('should handle triple dots as a normal segment.', () => { -# expect(removeDotSegments('/invalid/...')) -# .toEqual('/invalid/...'); -# }); - -# it('should handle triple dots as a normal segment, followed by ...', () => { -# expect(removeDotSegments('/invalid/.../..')) -# .toEqual('/invalid/'); -# }); - -# it('should handle four dots as a normal segment.', () => { -# expect(removeDotSegments('/invalid/../..../../../.../.htaccess')) -# .toEqual('/.../.htaccess'); -# }); - -# it('should handle a segment with dot and an invalid char as a normal segment.', () => { -# expect(removeDotSegments('/invalid/../.a/../../.../.htaccess')) -# .toEqual('/.../.htaccess'); -# }); + def test_resolve_query_against_slash(self): + assert remove_dot_segments('/def/?a=b') == '/def/?a=b' + + def test_resolve_double_dot_and_query(self): + assert remove_dot_segments('/def/..?a=b') == '/?a=b' + + def test_append_dot_g_after_slash(self): + assert remove_dot_segments('/a/bb/ccc/.g') == '/a/bb/ccc/.g' + + def test_append_g_dot_after_slash(self): + assert remove_dot_segments('/a/bb/ccc/g.') == '/a/bb/ccc/g.' + + def test_append_double_dot_g_after_slash(self): + assert remove_dot_segments('/a/bb/ccc/..g') == '/a/bb/ccc/..g' + + def test_append_g_double_dot_after_slash(self): + assert remove_dot_segments('/a/bb/ccc/g..') == '/a/bb/ccc/g..' + + def test_end_with_slash_if_trailing_slash_dot(self): + assert remove_dot_segments('/a/bb/ccc/./g/.') == '/a/bb/ccc/g/' + + def test_triple_dots_as_normal_segment(self): + assert remove_dot_segments('/invalid/...') == '/invalid/...' + + def test_triple_dots_as_normal_segment_followed_by_double_dots(self): + assert remove_dot_segments('/invalid/.../..') == '/invalid/' + + def test_four_dots_as_normal_segment(self): + assert remove_dot_segments('/invalid/../..../../../.../.htaccess') == '/.../.htaccess' + + def test_segment_with_dot_and_invalid_char_as_normal_segment(self): + assert remove_dot_segments('/invalid/../.a/../../.../.htaccess') == '/.../.htaccess' if __name__ == "__main__": pytest.main(["-v", __file__]) From e435c5c20323b3b4a4f127ad9fe463c7fae44dae Mon Sep 17 00:00:00 2001 From: Miel Vander Sande Date: Wed, 5 Nov 2025 15:16:59 +0100 Subject: [PATCH 3/3] Finish JS port and fix tests --- lib/pyld/iri_resolver.py | 226 +++++++++++++++++++++++++++++++------ tests/test_iri_resolver.py | 6 +- 2 files changed, 194 insertions(+), 38 deletions(-) diff --git a/lib/pyld/iri_resolver.py b/lib/pyld/iri_resolver.py index a9030267..fa5722d8 100644 --- a/lib/pyld/iri_resolver.py +++ b/lib/pyld/iri_resolver.py @@ -1,51 +1,207 @@ -def resolve(relative_iri: str, base_iri: str | None = None) -> str: - # TODO: implement - return '' +""" +The functions 'remove_dot_segments()', 'resolve()' and 'is_character_allowed_after_relative_path_segment()' are direct ports from [relative-to-absolute-iri.js](https://github.com/rubensworks/relative-to-absolute-iri.js) +""" + +def is_character_allowed_after_relative_path_segment(ch: str) -> bool: + """Return True if a character is valid after '.' or '..' in a path segment.""" + return not ch or ch in ('#', '?', '/') + def remove_dot_segments(path: str) -> str: """ - Removes dot segments from a URL path. + Removes dot segments ('.' and '..') from a URL path, + as described in https://www.ietf.org/rfc/rfc3986.txt (page 32). - :param path: the path to remove dot segments from. + :param path: the IRI path to remove dot segments from. - :return: a path with normalized dot segments. + :return: a path with normalized dot segments, will always start with a '/'. """ + segment_buffers = [] + i = 0 + length = len(path) - # RFC 3986 5.2.4 (reworked) + while i < length: + ch = path[i] - # empty path shortcut - if len(path) == 0: - return '' + if ch == '/': + # Handle '/.' or '/..' + if i + 1 < length and path[i + 1] == '.': + # Handle '/..' + if i + 2 < length and path[i + 2] == '.': + next_ch = path[i + 3] if i + 3 < length else '' + if not is_character_allowed_after_relative_path_segment(next_ch): + segment_buffers.append([]) + i += 1 + continue - input = path.split('/') - output = [] + # Go to parent directory + if segment_buffers: + segment_buffers.pop() - while len(input) > 0: - next = input.pop(0) - done = len(input) == 0 + # Add trailing slash segment if ends with '/..' + if i + 3 >= length: + segment_buffers.append([]) - if next == '.': - if done: - # ensure output has trailing / - output.append('') - continue + i += 3 + continue + + # Handle '/.' + next_ch = path[i + 2] if i + 2 < length else '' + if not is_character_allowed_after_relative_path_segment(next_ch): + segment_buffers.append([]) + i += 1 + continue + + # Add trailing slash if ends with '/.' + if i + 2 >= length: + segment_buffers.append([]) - if next == '..': - if len(output) > 0: - output.pop() - if done: - # ensure output has trailing / - output.append('') + # Stay in current directory โ€” skip + i += 2 + continue + + # Regular '/' starts a new segment + segment_buffers.append([]) + i += 1 continue - output.append(next) + elif ch in ('#', '?'): + # Query or fragment โ†’ append unchanged and stop + if not segment_buffers: + segment_buffers.append([]) + segment_buffers[-1].append(path[i:]) + + # Break the while loop + break + + else: + # Regular character โ†’ append to current segment + if not segment_buffers: + segment_buffers.append([]) + segment_buffers[-1].append(ch) + i += 1 + + return '/' + '/'.join(''.join(buffer) for buffer in segment_buffers) + + +def remove_dot_segments_of_path(iri: str, colon_position: int) -> str: + """ + Remove dot segments from the path portion of an IRI (RFC 3986 ยง5.2.4). + + :param iri: an IRI (or part of IRI). + :param colonPosition: the position of the first ':' in the IRI. + + :return: the IRI where dot segments were removed. + """ + # Determine where to start looking for the first '/' that indicates the start of the path + if colon_position >= 0: + if len(iri) > colon_position + 2 and iri[colon_position + 1] == '/' and iri[colon_position + 2] == '/': + search_offset = colon_position + 3 + else: + search_offset = colon_position + 1 + else: + if len(iri) > 1 and iri[0] == '/' and iri[1] == '/': + search_offset = 2 + else: + search_offset = 0 + + # Find the start of the path + path_separator = iri.find('/', search_offset) + if path_separator < 0: + return iri + + base = iri[:path_separator] + path = iri[path_separator:] + + # Remove dot segments from the path + return base + remove_dot_segments(path) + +def resolve(relative_iri: str, base_iri: str = "") -> str: + # """ + # Resolves a given relative IRI to an absolute IRI. + + # :param base_iri: the base IRI. + # :param relative_iri: the relative IRI. + + # :return: the absolute IRI. + # """ + + base_fragment_pos = base_iri.find("#") + + # Ignore any fragments in the base IRI + if base_fragment_pos > 0: + base_iri = base_iri[:base_fragment_pos] + + # Convert empty value directly to base IRI + if not relative_iri: + if ":" not in base_iri: + raise ValueError(f"Found invalid baseIRI '{base_iri}' for value '{relative_iri}'") + return base_iri + + # If the value starts with a query character, concat directly (strip existing query) + if relative_iri.startswith("?"): + base_query_pos = base_iri.find("?") + if base_query_pos > 0: + base_iri = base_iri[:base_query_pos] + return base_iri + relative_iri + + # If the value starts with a fragment character, concat directly + if relative_iri.startswith("#"): + return base_iri + relative_iri + + # Ignore baseIRI if it is empty + if not base_iri: + relative_colon_pos = relative_iri.find(":") + if relative_colon_pos < 0: + raise ValueError(f"Found invalid relative IRI '{relative_iri}' for a missing baseIRI") + return remove_dot_segments_of_path(relative_iri, relative_colon_pos) + + # Ignore baseIRI if the value is absolute + value_colon_pos = relative_iri.find(":") + if value_colon_pos >= 0: + return remove_dot_segments_of_path(relative_iri, value_colon_pos) + + # baseIRI must be absolute + base_colon_pos = base_iri.find(":") + if base_colon_pos < 0: + raise ValueError(f"Found invalid baseIRI '{base_iri}' for value '{relative_iri}'") + + base_scheme = base_iri[:base_colon_pos + 1] + + # Inherit base scheme if relative starts with '//' + if relative_iri.startswith("//"): + return base_scheme + remove_dot_segments_of_path(relative_iri, value_colon_pos) + + # Determine where the path of base starts + if base_iri.find("//", base_colon_pos) == base_colon_pos + 1: + base_slash_after_colon_pos = base_iri.find("/", base_colon_pos + 3) + if base_slash_after_colon_pos < 0: + if len(base_iri) > base_colon_pos + 3: + return base_iri + "/" + remove_dot_segments_of_path(relative_iri, value_colon_pos) + else: + return base_scheme + remove_dot_segments_of_path(relative_iri, value_colon_pos) + else: + base_slash_after_colon_pos = base_iri.find("/", base_colon_pos + 1) + if base_slash_after_colon_pos < 0: + return base_scheme + remove_dot_segments_of_path(relative_iri, value_colon_pos) + + # If relative starts with '/', append after base authority + if relative_iri.startswith("/"): + return base_iri[:base_slash_after_colon_pos] + remove_dot_segments(relative_iri) + + base_path = base_iri[base_slash_after_colon_pos:] + last_slash = base_path.rfind("/") + + # Ignore everything after last '/' in base path + if last_slash >= 0 and last_slash < len(base_path) - 1: + base_path = base_path[:last_slash + 1] + if (relative_iri.startswith(".") and + not relative_iri.startswith("..") and + not relative_iri.startswith("./") and + len(relative_iri) > 2): + relative_iri = relative_iri[1:] - # ensure output has leading / - # merge path segments from section 5.2.3 - # note that if the path includes no segments, the entire path is removed - if len(output) > 0 and path.startswith('/') and output[0] != '': - output.insert(0, '') - if len(output) == 1 and output[0] == '': - return '/' + relative_iri = base_path + relative_iri + relative_iri = remove_dot_segments(relative_iri) - return '/'.join(output) + return base_iri[:base_slash_after_colon_pos] + relative_iri \ No newline at end of file diff --git a/tests/test_iri_resolver.py b/tests/test_iri_resolver.py index f810c4d8..2a95ccf3 100644 --- a/tests/test_iri_resolver.py +++ b/tests/test_iri_resolver.py @@ -168,7 +168,7 @@ def test_empty_relative_with_complex_base(self): assert resolve('', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/d;p?q' def test_dot_relative_with_complex_base(self): - assert resolve('.', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/d;p?q' + assert resolve('.', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/' def test_dot_slash_relative_with_complex_base(self): assert resolve('./', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/' @@ -210,7 +210,7 @@ def test_slash_double_dot_slash_g_relative_with_complex_base(self): assert resolve('/../g', 'file:///a/bb/ccc/d;p?q') == 'file:///g' def test_dot_suffix_relative_with_complex_base(self): - assert resolve('g.', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g' + assert resolve('g.', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g.' def test_dot_prefix_relative_with_complex_base(self): assert resolve('.g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/.g' @@ -326,7 +326,7 @@ def test_not_modify_fragments(self): assert remove_dot_segments('/abc#abcdef') == '/abc#abcdef' def test_not_modify_paths_in_fragments(self): - assert remove_dot_segments('/abc#abcdef') == '/abc#a/bc/def' + assert remove_dot_segments('/abc#a/bc/def') == '/abc#a/bc/def' def test_not_modify_current_paths_in_fragments(self): assert remove_dot_segments('/abc#a/./bc/def') == '/abc#a/./bc/def'