Refactor package

hnrkcode · hnrkcode · commit e43623055df0 · 2022-09-03T15:00:14.000+02:00
diff --git a/loading_sdk/__init__.py b/loading_sdk/__init__.py
@@ -1,4 +1,4 @@
-from loading_sdk.api import LoadingApiClient
-from loading_sdk.async_api import async_loading_api_client as AsyncLoadingApiClient
+from loading_sdk.sync_api import LoadingApiClient
+from loading_sdk.async_api import AsyncLoadingApiClient
 
 __all__ = ["LoadingApiClient", "AsyncLoadingApiClient"]
diff --git a/loading_sdk/async_api/__init__.py b/loading_sdk/async_api/__init__.py
@@ -0,0 +1,5 @@
+from loading_sdk.async_api.client import (
+    async_loading_api_client as AsyncLoadingApiClient,
+)
+
+__all__ = ["AsyncLoadingApiClient"]
diff --git a/loading_sdk/async_api/client.py b/loading_sdk/async_api/client.py
@@ -1,14 +1,10 @@
-import json
 import math
-import re
 
 import aiohttp
-from bs4 import BeautifulSoup
-
+from loading_sdk.async_api.extractors import AboutPageExtractor
 from loading_sdk.settings import (
     API_URL,
     API_VERSION,
-    BASE_URL,
     EDITORIAL_POST_TYPES,
     EDITORIAL_SORT,
     USER_AGENT,
@@ -22,68 +18,6 @@ async def async_loading_api_client(email=None, password=None):
     return client
 
 
-class AboutPageExtractor:
-    async def extract_about_data(self):
-        about_page_source = await self._get_source(f"{BASE_URL}/om")
-        main_script_url = self._extract_main_script_url(about_page_source)
-        main_script_source = await self._get_source(f"{BASE_URL}/{main_script_url}")
-        about_script_url = self._get_about_script_url(main_script_source)
-        about_script_source = await self._get_source(about_script_url)
-
-        return self._get_about_data(about_script_source)
-
-    async def _get_source(self, url):
-        headers = {"User-Agent": USER_AGENT}
-
-        async with aiohttp.ClientSession() as session:
-            async with session.get(url, headers=headers) as response:
-                return await response.text()
-
-    def _get_about_script_url(self, source_code):
-        chunk_urls = []
-
-        # Extracts the code with the javascript chunks.
-        p = re.compile("(static/js/).+?(?=\{)(.+?(?=\[)).+(.chunk.js)")
-        m = p.search(source_code)
-
-        if m:
-            # Transform the code into valid JSON so the chunk ids can be stored in a python dict.
-            s = re.sub(r"([0-9]+?(?=:))", r'"\1"', m.group(2))
-            chunk_ids = json.loads(s)
-
-            for k, v in chunk_ids.items():
-                chunk_url = f"{BASE_URL}/{m.group(1)}{k}.{v}{m.group(3)}"
-                chunk_urls.append(chunk_url)
-
-        return chunk_urls[-1]
-
-    def _get_about_data(self, source_code):
-        m = re.search("var.e=(.+?)(?=\.map).+a=(.+?)(?=\.map)", source_code)
-
-        if m:
-            people = re.sub(r"(\{|\,)([a-z]+)(\:)", r'\1"\2"\3', m.group(1))
-            people = re.sub(r"(.+)(')(.+)(')(.+)", r'\1"\3"\5', people)
-            people = people.replace('slags "vuxen p', "slags 'vuxen p")
-            people = people.replace('riktigt"-framtid', "riktigt'-framtid")
-            people = people.replace("\\n", "")
-            people = people.encode("utf-8").decode("unicode_escape")
-
-            moderators = re.sub(r"(\{|\,)([a-z]+)(\:)", r'\1"\2"\3', m.group(2))
-            moderators = re.sub(r"(.+)(')(.+)(')(.+)", r'\1"\3"\5', moderators)
-            moderators = moderators.replace("\\n", "")
-            moderators = moderators.encode("utf-8").decode("unicode_escape")
-
-            about = {"people": json.loads(people), "moderators": json.loads(moderators)}
-
-            return about
-
-    def _extract_main_script_url(self, html):
-        soup = BeautifulSoup(html, "html.parser")
-        main_script = soup.find(src=re.compile("/static/js/main\.[0-9a-zA-Z]+\.js"))
-
-        return main_script["src"][1:]
-
-
 class AsyncLoadingApiClient:
     """
     An async client that allows python apps to easily communicate with the loading forums web api.
@@ -566,4 +500,4 @@ async def get_about(self):
         about_page = AboutPageExtractor()
         about_data = await about_page.extract_about_data()
 
-        return about_data
+        return about_data
diff --git a/loading_sdk/async_api/extractors.py b/loading_sdk/async_api/extractors.py
@@ -0,0 +1,70 @@
+import json
+import re
+
+import aiohttp
+from bs4 import BeautifulSoup
+from loading_sdk.settings import BASE_URL, USER_AGENT
+
+
+class AboutPageExtractor:
+    async def extract_about_data(self):
+        about_page_source = await self._get_source(f"{BASE_URL}/om")
+        main_script_url = self._extract_main_script_url(about_page_source)
+        main_script_source = await self._get_source(f"{BASE_URL}/{main_script_url}")
+        about_script_url = self._get_about_script_url(main_script_source)
+        about_script_source = await self._get_source(about_script_url)
+
+        return self._get_about_data(about_script_source)
+
+    async def _get_source(self, url):
+        headers = {"User-Agent": USER_AGENT}
+
+        async with aiohttp.ClientSession() as session:
+            async with session.get(url, headers=headers) as response:
+                return await response.text()
+
+    def _get_about_script_url(self, source_code):
+        chunk_urls = []
+
+        # Extracts the code with the javascript chunks.
+        match = re.search(r"(static/js/).+?(?=\{)(.+?(?=\[)).+(.chunk.js)", source_code)
+
+        if match:
+            # Transform the code into valid JSON so the chunk ids can be stored in a python dict.
+            file_name_values = re.sub(r"([0-9]+?(?=:))", r'"\1"', match.group(2))
+            chunk_ids = json.loads(file_name_values)
+
+            for key, value in chunk_ids.items():
+                chunk_url = f"{BASE_URL}/{match.group(1)}{key}.{value}{match.group(3)}"
+                chunk_urls.append(chunk_url)
+
+        return chunk_urls[-1]
+
+    def _get_about_data(self, source_code):
+        match = re.search(r"var.e=(.+?)(?=\.map).+a=(.+?)(?=\.map)", source_code)
+
+        if not match:
+            return None
+
+        people = re.sub(r"(\{|\,)([a-z]+)(\:)", r'\1"\2"\3', match.group(1))
+        people = re.sub(r"(.+)(')(.+)(')(.+)", r'\1"\3"\5', people)
+        people = people.replace('slags "vuxen p', "slags 'vuxen p")
+        people = people.replace('riktigt"-framtid', "riktigt'-framtid")
+        people = people.replace("\\n", "")
+        people = people.encode("utf-8").decode("unicode_escape")
+
+        moderators = re.sub(r"(\{|\,)([a-z]+)(\:)", r'\1"\2"\3', match.group(2))
+        moderators = re.sub(r"(.+)(')(.+)(')(.+)", r'\1"\3"\5', moderators)
+        moderators = moderators.replace("\\n", "")
+        moderators = moderators.encode("utf-8").decode("unicode_escape")
+
+        return {
+            "people": json.loads(people),
+            "moderators": json.loads(moderators),
+        }
+
+    def _extract_main_script_url(self, html):
+        soup = BeautifulSoup(html, "html.parser")
+        main_script = soup.find(src=re.compile(r"/static/js/main\.[0-9a-zA-Z]+\.js"))
+
+        return main_script["src"][1:]
diff --git a/loading_sdk/sync_api/__init__.py b/loading_sdk/sync_api/__init__.py
@@ -0,0 +1,3 @@
+from loading_sdk.sync_api.client import LoadingApiClient
+
+__all__ = ["LoadingApiClient"]
diff --git a/loading_sdk/sync_api/client.py b/loading_sdk/sync_api/client.py
@@ -1,79 +1,14 @@
-import json
 import math
-import re
 
 import requests
-from bs4 import BeautifulSoup
-
 from loading_sdk.settings import (
     API_URL,
     API_VERSION,
-    BASE_URL,
     EDITORIAL_POST_TYPES,
     EDITORIAL_SORT,
     USER_AGENT,
 )
-
-
-class AboutPageExtractor:
-    def __init__(self):
-        about_page_source = self._get_source(f"{BASE_URL}/om")
-        main_script_url = self._extract_main_script_url(about_page_source)
-        main_script_source = self._get_source(f"{BASE_URL}/{main_script_url}")
-        about_script_url = self._get_about_script_url(main_script_source)
-        about_script_source = self._get_source(about_script_url)
-
-        self.data = self._get_about_data(about_script_source)
-
-    def _get_source(self, url):
-        headers = {"User-Agent": USER_AGENT}
-        response = requests.get(url, headers=headers)
-
-        return response.text
-
-    def _get_about_script_url(self, source_code):
-        chunk_urls = []
-
-        # Extracts the code with the javascript chunks.
-        p = re.compile("(static/js/).+?(?=\{)(.+?(?=\[)).+(.chunk.js)")
-        m = p.search(source_code)
-
-        if m:
-            # Transform the code into valid JSON so the chunk ids can be stored in a python dict.
-            s = re.sub(r"([0-9]+?(?=:))", r'"\1"', m.group(2))
-            chunk_ids = json.loads(s)
-
-            for k, v in chunk_ids.items():
-                chunk_url = f"{BASE_URL}/{m.group(1)}{k}.{v}{m.group(3)}"
-                chunk_urls.append(chunk_url)
-
-        return chunk_urls[-1]
-
-    def _get_about_data(self, source_code):
-        m = re.search("var.e=(.+?)(?=\.map).+a=(.+?)(?=\.map)", source_code)
-
-        if m:
-            people = re.sub(r"(\{|\,)([a-z]+)(\:)", r'\1"\2"\3', m.group(1))
-            people = re.sub(r"(.+)(')(.+)(')(.+)", r'\1"\3"\5', people)
-            people = people.replace('slags "vuxen p', "slags 'vuxen p")
-            people = people.replace('riktigt"-framtid', "riktigt'-framtid")
-            people = people.replace("\\n", "")
-            people = people.encode("utf-8").decode("unicode_escape")
-
-            moderators = re.sub(r"(\{|\,)([a-z]+)(\:)", r'\1"\2"\3', m.group(2))
-            moderators = re.sub(r"(.+)(')(.+)(')(.+)", r'\1"\3"\5', moderators)
-            moderators = moderators.replace("\\n", "")
-            moderators = moderators.encode("utf-8").decode("unicode_escape")
-
-            about = {"people": json.loads(people), "moderators": json.loads(moderators)}
-
-            return about
-
-    def _extract_main_script_url(self, html):
-        soup = BeautifulSoup(html, "html.parser")
-        main_script = soup.find(src=re.compile("/static/js/main\.[0-9a-zA-Z]+\.js"))
-
-        return main_script["src"][1:]
+from loading_sdk.sync_api.extractors import AboutPageExtractor
 
 
 class LoadingApiClient:
diff --git a/loading_sdk/sync_api/extractors.py b/loading_sdk/sync_api/extractors.py
@@ -0,0 +1,69 @@
+import json
+import re
+
+import requests
+from bs4 import BeautifulSoup
+from loading_sdk.settings import BASE_URL, USER_AGENT
+
+
+class AboutPageExtractor:
+    def __init__(self):
+        about_page_source = self._get_source(f"{BASE_URL}/om")
+        main_script_url = self._extract_main_script_url(about_page_source)
+        main_script_source = self._get_source(f"{BASE_URL}/{main_script_url}")
+        about_script_url = self._get_about_script_url(main_script_source)
+        about_script_source = self._get_source(about_script_url)
+
+        self.data = self._get_about_data(about_script_source)
+
+    def _get_source(self, url):
+        headers = {"User-Agent": USER_AGENT}
+        response = requests.get(url, headers=headers, timeout=10)
+
+        return response.text
+
+    def _get_about_script_url(self, source_code):
+        chunk_urls = []
+
+        # Extracts the code with the javascript chunks.
+        match = re.search(r"(static/js/).+?(?=\{)(.+?(?=\[)).+(.chunk.js)", source_code)
+
+        if match:
+            # Transform the code into valid JSON so the chunk ids can be stored in a python dict.
+            file_name_values = re.sub(r"([0-9]+?(?=:))", r'"\1"', match.group(2))
+            chunk_ids = json.loads(file_name_values)
+
+            for key, value in chunk_ids.items():
+                chunk_url = f"{BASE_URL}/{match.group(1)}{key}.{value}{match.group(3)}"
+                chunk_urls.append(chunk_url)
+
+        return chunk_urls[-1]
+
+    def _get_about_data(self, source_code):
+        match = re.search(r"var.e=(.+?)(?=\.map).+a=(.+?)(?=\.map)", source_code)
+
+        if not match:
+            return None
+
+        people = re.sub(r"(\{|\,)([a-z]+)(\:)", r'\1"\2"\3', match.group(1))
+        people = re.sub(r"(.+)(')(.+)(')(.+)", r'\1"\3"\5', people)
+        people = people.replace('slags "vuxen p', "slags 'vuxen p")
+        people = people.replace('riktigt"-framtid', "riktigt'-framtid")
+        people = people.replace("\\n", "")
+        people = people.encode("utf-8").decode("unicode_escape")
+
+        moderators = re.sub(r"(\{|\,)([a-z]+)(\:)", r'\1"\2"\3', match.group(2))
+        moderators = re.sub(r"(.+)(')(.+)(')(.+)", r'\1"\3"\5', moderators)
+        moderators = moderators.replace("\\n", "")
+        moderators = moderators.encode("utf-8").decode("unicode_escape")
+
+        return {
+            "people": json.loads(people),
+            "moderators": json.loads(moderators),
+        }
+
+    def _extract_main_script_url(self, html):
+        soup = BeautifulSoup(html, "html.parser")
+        main_script = soup.find(src=re.compile(r"/static/js/main\.[0-9a-zA-Z]+\.js"))
+
+        return main_script["src"][1:]

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from loading_sdk.sync_api.client import LoadingApiClient`
	`2`	`+`
	`3`	`+__all__ = ["LoadingApiClient"]`