Skip to content

Commit ed3e241

Browse files
bbaylesanonrig
authored andcommitted
Add WHATWG URL class
1 parent 9bec4c4 commit ed3e241

File tree

4 files changed

+218
-0
lines changed

4 files changed

+218
-0
lines changed

README.rst

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,24 @@ ada-url
33

44
This is ``ada_url``, a Python library for parsing and joining URLs.
55

6+
67
Examples
78
--------
89

10+
This package exposes a ``URL`` class that is intended to match the one described in the
11+
`WHATWG URL spec <https://url.spec.whatwg.org/#url-class>`__.
12+
13+
.. code-block:: python
14+
15+
>>> import ada_url
16+
>>> with ada_url.URL('https://example.org/path/../file.txt') as urlobj:
17+
... urlobj.host = 'example.com'
18+
... new_url = urlobj.href
19+
>>> new_url
20+
'https://example.com/file.txt'
21+
22+
It also provides some higher level functions for parsing and manipulating URLs.
23+
924
.. code-block:: python
1025
1126
>>> import ada_url

ada_url/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from ada_url.ada_adapter import (
2+
URL,
23
check_url,
34
join_url,
45
normalize_url,
@@ -7,6 +8,7 @@
78
)
89

910
__all__ = [
11+
'URL',
1012
'check_url',
1113
'join_url',
1214
'normalize_url',

ada_url/ada_adapter.py

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,137 @@
1414
)
1515
PARSE_ATTRIBUTES = URL_ATTRIBUTES + ('origin',)
1616

17+
GET_ATTRIBUTES = frozenset(PARSE_ATTRIBUTES)
18+
SET_ATTRIBUTES = frozenset(URL_ATTRIBUTES)
19+
1720

1821
def _get_str(x):
1922
ret = ffi.string(x.data, x.length).decode('utf-8') if x.length else ''
2023
return ret
2124

2225

26+
class URL:
27+
"""
28+
Parses a *url* (with an optional *base*) according to the
29+
WHATWG URL parsing standard.
30+
31+
.. code-block:: python
32+
33+
>>> from ada_url import URL
34+
>>> old_url = 'https://example.org:443/file.txt?q=1'
35+
>>> with URL(old_url) as urlobj:
36+
... old_host = urlobj.host
37+
... urlobj.host = 'example.com'
38+
... new_url = urlobj.href
39+
>>> old_host
40+
'example.org'
41+
>>> new_url
42+
'https://example.com:443/file.txt?q=1'
43+
44+
Note that you should use this class as a context manager to ensure
45+
that resources are freed. If you use it without a ``with``
46+
statement, call the ``close()`` method manually.
47+
48+
You can read and write the following attributes:
49+
50+
* ``href``
51+
* ``protocol``
52+
* ``username``
53+
* ``password``
54+
* ``host``
55+
* ``hostname``
56+
* ``port``
57+
* ``pathname``
58+
* ``search``
59+
60+
You can additionally read the ``origin`` attribute.
61+
62+
The class also exposes a static method that checks whether the input
63+
*url* (and optional *base*) can be parsed:
64+
65+
.. code-block:: python
66+
67+
>>> url = 'https://example.org:443/file_1.txt'
68+
>>> base = 'file_2.txt'
69+
>>> URL.can_parse(url, base)
70+
True
71+
72+
See the `WHATWG docs <https://url.spec.whatwg.org/#url-class>`__ for
73+
more details on the URL class.
74+
75+
"""
76+
77+
def __init__(self, url, base=None):
78+
url_bytes = url.encode('utf-8')
79+
80+
if base is None:
81+
self.urlobj = lib.ada_parse(url_bytes, len(url_bytes))
82+
else:
83+
base_bytes = base.encode('utf-8')
84+
self.urlobj = lib.ada_parse_with_base(
85+
url_bytes, len(url_bytes), base_bytes, len(base_bytes)
86+
)
87+
88+
if not lib.ada_is_valid(self.urlobj):
89+
raise ValueError('Invlid input')
90+
91+
def __getattr__(self, attr):
92+
if attr in GET_ATTRIBUTES:
93+
get_func = getattr(lib, f'ada_get_{attr}')
94+
data = get_func(self.urlobj)
95+
ret = _get_str(data)
96+
if attr == 'origin':
97+
lib.ada_free_owned_string(data)
98+
99+
return ret
100+
101+
return super().__getattr__(self, attr)
102+
103+
def __setattr__(self, attr, value):
104+
if attr in SET_ATTRIBUTES:
105+
try:
106+
value_bytes = value.encode()
107+
except Exception:
108+
raise ValueError(f'Invalid value for {attr}') from None
109+
110+
set_func = getattr(lib, f'ada_set_{attr}')
111+
ret = set_func(self.urlobj, value_bytes, len(value_bytes))
112+
if (ret is not None) and (not ret):
113+
raise ValueError(f'Invalid value for {attr}') from None
114+
115+
return ret
116+
117+
return super().__setattr__(attr, value)
118+
119+
def close(self):
120+
lib.ada_free(self.urlobj)
121+
122+
def __enter__(self, *args, **kwargs):
123+
return self
124+
125+
def __exit__(self, *args, **kwargs):
126+
self.close()
127+
128+
@staticmethod
129+
def can_parse(url, base=None):
130+
try:
131+
url_bytes = url.encode('utf-8')
132+
except Exception:
133+
return False
134+
135+
if base is None:
136+
return lib.ada_can_parse(url_bytes, len(url_bytes))
137+
138+
try:
139+
base_bytes = base.encode('utf-8')
140+
except Exception:
141+
return False
142+
143+
return lib.ada_can_parse_with_base(
144+
url_bytes, len(url_bytes), base_bytes, len(base_bytes)
145+
)
146+
147+
23148
def check_url(s):
24149
"""
25150
Returns ``True`` if *s* represents a valid URL, and ``False`` otherwise.

tests/test_ada_url.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from unittest import TestCase
22

33
from ada_url import (
4+
URL,
45
check_url,
56
join_url,
67
normalize_url,
@@ -10,6 +11,81 @@
1011

1112

1213
class ADAURLTests(TestCase):
14+
def test_class_get(self):
15+
url = 'https://user_1:password_1@example.org:8080/dir/../api?q=1#frag'
16+
with URL(url) as urlobj:
17+
self.assertEqual(
18+
urlobj.href, 'https://user_1:password_1@example.org:8080/api?q=1#frag'
19+
)
20+
self.assertEqual(urlobj.username, 'user_1')
21+
self.assertEqual(urlobj.password, 'password_1')
22+
self.assertEqual(urlobj.protocol, 'https:')
23+
self.assertEqual(urlobj.port, '8080')
24+
self.assertEqual(urlobj.hostname, 'example.org')
25+
self.assertEqual(urlobj.host, 'example.org:8080')
26+
self.assertEqual(urlobj.pathname, '/api')
27+
self.assertEqual(urlobj.search, '?q=1')
28+
self.assertEqual(urlobj.hash, '#frag')
29+
self.assertEqual(urlobj.origin, 'https://example.org:8080')
30+
31+
with self.assertRaises(AttributeError):
32+
urlobj.bogus
33+
34+
def test_class_set(self):
35+
url = 'https://username:password@www.google.com:8080/'
36+
with URL(url) as urlobj:
37+
urlobj.href = 'https://www.yagiz.co'
38+
urlobj.hash = 'new-hash'
39+
urlobj.hostname = 'new-host'
40+
urlobj.host = 'changed-host:9090'
41+
urlobj.pathname = 'new-pathname'
42+
urlobj.search = 'new-search'
43+
urlobj.protocol = 'wss'
44+
actual = urlobj.href
45+
46+
with self.assertRaises(ValueError):
47+
urlobj.hostname = 1
48+
49+
with self.assertRaises(ValueError):
50+
urlobj.hostname = '127.0.0.0.0.1'
51+
52+
expected = 'wss://changed-host:9090/new-pathname?new-search#new-hash'
53+
self.assertEqual(actual, expected)
54+
55+
def test_class_with_base(self):
56+
url = '../example.txt'
57+
base = 'https://example.org/path/'
58+
with URL(url, base) as urlobj:
59+
self.assertEqual(urlobj.href, 'https://example.org/example.txt')
60+
61+
def test_class_invalid(self):
62+
with self.assertRaises(ValueError):
63+
with URL('bogus'):
64+
pass
65+
66+
def test_class_can_parse(self):
67+
for url, expected in (
68+
(1, False),
69+
(None, False),
70+
('bogus', False),
71+
('https://example.org', True),
72+
):
73+
with self.subTest(url=url):
74+
actual = URL.can_parse(url)
75+
self.assertEqual(actual, expected)
76+
77+
def test_class_can_parse_with_base(self):
78+
url = 'example.txt'
79+
for base, expected in (
80+
('https://example.org', True),
81+
(1, False),
82+
(None, False),
83+
('bogus', False),
84+
):
85+
with self.subTest(url=url):
86+
actual = URL.can_parse(url, base)
87+
self.assertEqual(actual, expected)
88+
1389
def test_check_url(self):
1490
for s, expected in (
1591
('https:example.org', True),

0 commit comments

Comments
 (0)