Skip to content

Commit e436230

Browse files
committed
Refactor package
1 parent 24a4600 commit e436230

File tree

7 files changed

+152
-136
lines changed

7 files changed

+152
-136
lines changed

loading_sdk/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from loading_sdk.api import LoadingApiClient
2-
from loading_sdk.async_api import async_loading_api_client as AsyncLoadingApiClient
1+
from loading_sdk.sync_api import LoadingApiClient
2+
from loading_sdk.async_api import AsyncLoadingApiClient
33

44
__all__ = ["LoadingApiClient", "AsyncLoadingApiClient"]

loading_sdk/async_api/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from loading_sdk.async_api.client import (
2+
async_loading_api_client as AsyncLoadingApiClient,
3+
)
4+
5+
__all__ = ["AsyncLoadingApiClient"]

loading_sdk/async_api.py renamed to loading_sdk/async_api/client.py

Lines changed: 2 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,10 @@
1-
import json
21
import math
3-
import re
42

53
import aiohttp
6-
from bs4 import BeautifulSoup
7-
4+
from loading_sdk.async_api.extractors import AboutPageExtractor
85
from loading_sdk.settings import (
96
API_URL,
107
API_VERSION,
11-
BASE_URL,
128
EDITORIAL_POST_TYPES,
139
EDITORIAL_SORT,
1410
USER_AGENT,
@@ -22,68 +18,6 @@ async def async_loading_api_client(email=None, password=None):
2218
return client
2319

2420

25-
class AboutPageExtractor:
26-
async def extract_about_data(self):
27-
about_page_source = await self._get_source(f"{BASE_URL}/om")
28-
main_script_url = self._extract_main_script_url(about_page_source)
29-
main_script_source = await self._get_source(f"{BASE_URL}/{main_script_url}")
30-
about_script_url = self._get_about_script_url(main_script_source)
31-
about_script_source = await self._get_source(about_script_url)
32-
33-
return self._get_about_data(about_script_source)
34-
35-
async def _get_source(self, url):
36-
headers = {"User-Agent": USER_AGENT}
37-
38-
async with aiohttp.ClientSession() as session:
39-
async with session.get(url, headers=headers) as response:
40-
return await response.text()
41-
42-
def _get_about_script_url(self, source_code):
43-
chunk_urls = []
44-
45-
# Extracts the code with the javascript chunks.
46-
p = re.compile("(static/js/).+?(?=\{)(.+?(?=\[)).+(.chunk.js)")
47-
m = p.search(source_code)
48-
49-
if m:
50-
# Transform the code into valid JSON so the chunk ids can be stored in a python dict.
51-
s = re.sub(r"([0-9]+?(?=:))", r'"\1"', m.group(2))
52-
chunk_ids = json.loads(s)
53-
54-
for k, v in chunk_ids.items():
55-
chunk_url = f"{BASE_URL}/{m.group(1)}{k}.{v}{m.group(3)}"
56-
chunk_urls.append(chunk_url)
57-
58-
return chunk_urls[-1]
59-
60-
def _get_about_data(self, source_code):
61-
m = re.search("var.e=(.+?)(?=\.map).+a=(.+?)(?=\.map)", source_code)
62-
63-
if m:
64-
people = re.sub(r"(\{|\,)([a-z]+)(\:)", r'\1"\2"\3', m.group(1))
65-
people = re.sub(r"(.+)(')(.+)(')(.+)", r'\1"\3"\5', people)
66-
people = people.replace('slags "vuxen p', "slags 'vuxen p")
67-
people = people.replace('riktigt"-framtid', "riktigt'-framtid")
68-
people = people.replace("\\n", "")
69-
people = people.encode("utf-8").decode("unicode_escape")
70-
71-
moderators = re.sub(r"(\{|\,)([a-z]+)(\:)", r'\1"\2"\3', m.group(2))
72-
moderators = re.sub(r"(.+)(')(.+)(')(.+)", r'\1"\3"\5', moderators)
73-
moderators = moderators.replace("\\n", "")
74-
moderators = moderators.encode("utf-8").decode("unicode_escape")
75-
76-
about = {"people": json.loads(people), "moderators": json.loads(moderators)}
77-
78-
return about
79-
80-
def _extract_main_script_url(self, html):
81-
soup = BeautifulSoup(html, "html.parser")
82-
main_script = soup.find(src=re.compile("/static/js/main\.[0-9a-zA-Z]+\.js"))
83-
84-
return main_script["src"][1:]
85-
86-
8721
class AsyncLoadingApiClient:
8822
"""
8923
An async client that allows python apps to easily communicate with the loading forums web api.
@@ -566,4 +500,4 @@ async def get_about(self):
566500
about_page = AboutPageExtractor()
567501
about_data = await about_page.extract_about_data()
568502

569-
return about_data
503+
return about_data
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import json
2+
import re
3+
4+
import aiohttp
5+
from bs4 import BeautifulSoup
6+
from loading_sdk.settings import BASE_URL, USER_AGENT
7+
8+
9+
class AboutPageExtractor:
10+
async def extract_about_data(self):
11+
about_page_source = await self._get_source(f"{BASE_URL}/om")
12+
main_script_url = self._extract_main_script_url(about_page_source)
13+
main_script_source = await self._get_source(f"{BASE_URL}/{main_script_url}")
14+
about_script_url = self._get_about_script_url(main_script_source)
15+
about_script_source = await self._get_source(about_script_url)
16+
17+
return self._get_about_data(about_script_source)
18+
19+
async def _get_source(self, url):
20+
headers = {"User-Agent": USER_AGENT}
21+
22+
async with aiohttp.ClientSession() as session:
23+
async with session.get(url, headers=headers) as response:
24+
return await response.text()
25+
26+
def _get_about_script_url(self, source_code):
27+
chunk_urls = []
28+
29+
# Extracts the code with the javascript chunks.
30+
match = re.search(r"(static/js/).+?(?=\{)(.+?(?=\[)).+(.chunk.js)", source_code)
31+
32+
if match:
33+
# Transform the code into valid JSON so the chunk ids can be stored in a python dict.
34+
file_name_values = re.sub(r"([0-9]+?(?=:))", r'"\1"', match.group(2))
35+
chunk_ids = json.loads(file_name_values)
36+
37+
for key, value in chunk_ids.items():
38+
chunk_url = f"{BASE_URL}/{match.group(1)}{key}.{value}{match.group(3)}"
39+
chunk_urls.append(chunk_url)
40+
41+
return chunk_urls[-1]
42+
43+
def _get_about_data(self, source_code):
44+
match = re.search(r"var.e=(.+?)(?=\.map).+a=(.+?)(?=\.map)", source_code)
45+
46+
if not match:
47+
return None
48+
49+
people = re.sub(r"(\{|\,)([a-z]+)(\:)", r'\1"\2"\3', match.group(1))
50+
people = re.sub(r"(.+)(')(.+)(')(.+)", r'\1"\3"\5', people)
51+
people = people.replace('slags "vuxen p', "slags 'vuxen p")
52+
people = people.replace('riktigt"-framtid', "riktigt'-framtid")
53+
people = people.replace("\\n", "")
54+
people = people.encode("utf-8").decode("unicode_escape")
55+
56+
moderators = re.sub(r"(\{|\,)([a-z]+)(\:)", r'\1"\2"\3', match.group(2))
57+
moderators = re.sub(r"(.+)(')(.+)(')(.+)", r'\1"\3"\5', moderators)
58+
moderators = moderators.replace("\\n", "")
59+
moderators = moderators.encode("utf-8").decode("unicode_escape")
60+
61+
return {
62+
"people": json.loads(people),
63+
"moderators": json.loads(moderators),
64+
}
65+
66+
def _extract_main_script_url(self, html):
67+
soup = BeautifulSoup(html, "html.parser")
68+
main_script = soup.find(src=re.compile(r"/static/js/main\.[0-9a-zA-Z]+\.js"))
69+
70+
return main_script["src"][1:]

loading_sdk/sync_api/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from loading_sdk.sync_api.client import LoadingApiClient
2+
3+
__all__ = ["LoadingApiClient"]

loading_sdk/api.py renamed to loading_sdk/sync_api/client.py

Lines changed: 1 addition & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,79 +1,14 @@
1-
import json
21
import math
3-
import re
42

53
import requests
6-
from bs4 import BeautifulSoup
7-
84
from loading_sdk.settings import (
95
API_URL,
106
API_VERSION,
11-
BASE_URL,
127
EDITORIAL_POST_TYPES,
138
EDITORIAL_SORT,
149
USER_AGENT,
1510
)
16-
17-
18-
class AboutPageExtractor:
19-
def __init__(self):
20-
about_page_source = self._get_source(f"{BASE_URL}/om")
21-
main_script_url = self._extract_main_script_url(about_page_source)
22-
main_script_source = self._get_source(f"{BASE_URL}/{main_script_url}")
23-
about_script_url = self._get_about_script_url(main_script_source)
24-
about_script_source = self._get_source(about_script_url)
25-
26-
self.data = self._get_about_data(about_script_source)
27-
28-
def _get_source(self, url):
29-
headers = {"User-Agent": USER_AGENT}
30-
response = requests.get(url, headers=headers)
31-
32-
return response.text
33-
34-
def _get_about_script_url(self, source_code):
35-
chunk_urls = []
36-
37-
# Extracts the code with the javascript chunks.
38-
p = re.compile("(static/js/).+?(?=\{)(.+?(?=\[)).+(.chunk.js)")
39-
m = p.search(source_code)
40-
41-
if m:
42-
# Transform the code into valid JSON so the chunk ids can be stored in a python dict.
43-
s = re.sub(r"([0-9]+?(?=:))", r'"\1"', m.group(2))
44-
chunk_ids = json.loads(s)
45-
46-
for k, v in chunk_ids.items():
47-
chunk_url = f"{BASE_URL}/{m.group(1)}{k}.{v}{m.group(3)}"
48-
chunk_urls.append(chunk_url)
49-
50-
return chunk_urls[-1]
51-
52-
def _get_about_data(self, source_code):
53-
m = re.search("var.e=(.+?)(?=\.map).+a=(.+?)(?=\.map)", source_code)
54-
55-
if m:
56-
people = re.sub(r"(\{|\,)([a-z]+)(\:)", r'\1"\2"\3', m.group(1))
57-
people = re.sub(r"(.+)(')(.+)(')(.+)", r'\1"\3"\5', people)
58-
people = people.replace('slags "vuxen p', "slags 'vuxen p")
59-
people = people.replace('riktigt"-framtid', "riktigt'-framtid")
60-
people = people.replace("\\n", "")
61-
people = people.encode("utf-8").decode("unicode_escape")
62-
63-
moderators = re.sub(r"(\{|\,)([a-z]+)(\:)", r'\1"\2"\3', m.group(2))
64-
moderators = re.sub(r"(.+)(')(.+)(')(.+)", r'\1"\3"\5', moderators)
65-
moderators = moderators.replace("\\n", "")
66-
moderators = moderators.encode("utf-8").decode("unicode_escape")
67-
68-
about = {"people": json.loads(people), "moderators": json.loads(moderators)}
69-
70-
return about
71-
72-
def _extract_main_script_url(self, html):
73-
soup = BeautifulSoup(html, "html.parser")
74-
main_script = soup.find(src=re.compile("/static/js/main\.[0-9a-zA-Z]+\.js"))
75-
76-
return main_script["src"][1:]
11+
from loading_sdk.sync_api.extractors import AboutPageExtractor
7712

7813

7914
class LoadingApiClient:

loading_sdk/sync_api/extractors.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import json
2+
import re
3+
4+
import requests
5+
from bs4 import BeautifulSoup
6+
from loading_sdk.settings import BASE_URL, USER_AGENT
7+
8+
9+
class AboutPageExtractor:
10+
def __init__(self):
11+
about_page_source = self._get_source(f"{BASE_URL}/om")
12+
main_script_url = self._extract_main_script_url(about_page_source)
13+
main_script_source = self._get_source(f"{BASE_URL}/{main_script_url}")
14+
about_script_url = self._get_about_script_url(main_script_source)
15+
about_script_source = self._get_source(about_script_url)
16+
17+
self.data = self._get_about_data(about_script_source)
18+
19+
def _get_source(self, url):
20+
headers = {"User-Agent": USER_AGENT}
21+
response = requests.get(url, headers=headers, timeout=10)
22+
23+
return response.text
24+
25+
def _get_about_script_url(self, source_code):
26+
chunk_urls = []
27+
28+
# Extracts the code with the javascript chunks.
29+
match = re.search(r"(static/js/).+?(?=\{)(.+?(?=\[)).+(.chunk.js)", source_code)
30+
31+
if match:
32+
# Transform the code into valid JSON so the chunk ids can be stored in a python dict.
33+
file_name_values = re.sub(r"([0-9]+?(?=:))", r'"\1"', match.group(2))
34+
chunk_ids = json.loads(file_name_values)
35+
36+
for key, value in chunk_ids.items():
37+
chunk_url = f"{BASE_URL}/{match.group(1)}{key}.{value}{match.group(3)}"
38+
chunk_urls.append(chunk_url)
39+
40+
return chunk_urls[-1]
41+
42+
def _get_about_data(self, source_code):
43+
match = re.search(r"var.e=(.+?)(?=\.map).+a=(.+?)(?=\.map)", source_code)
44+
45+
if not match:
46+
return None
47+
48+
people = re.sub(r"(\{|\,)([a-z]+)(\:)", r'\1"\2"\3', match.group(1))
49+
people = re.sub(r"(.+)(')(.+)(')(.+)", r'\1"\3"\5', people)
50+
people = people.replace('slags "vuxen p', "slags 'vuxen p")
51+
people = people.replace('riktigt"-framtid', "riktigt'-framtid")
52+
people = people.replace("\\n", "")
53+
people = people.encode("utf-8").decode("unicode_escape")
54+
55+
moderators = re.sub(r"(\{|\,)([a-z]+)(\:)", r'\1"\2"\3', match.group(2))
56+
moderators = re.sub(r"(.+)(')(.+)(')(.+)", r'\1"\3"\5', moderators)
57+
moderators = moderators.replace("\\n", "")
58+
moderators = moderators.encode("utf-8").decode("unicode_escape")
59+
60+
return {
61+
"people": json.loads(people),
62+
"moderators": json.loads(moderators),
63+
}
64+
65+
def _extract_main_script_url(self, html):
66+
soup = BeautifulSoup(html, "html.parser")
67+
main_script = soup.find(src=re.compile(r"/static/js/main\.[0-9a-zA-Z]+\.js"))
68+
69+
return main_script["src"][1:]

0 commit comments

Comments
 (0)