1- import json
21import math
3- import re
42
53import aiohttp
6- from bs4 import BeautifulSoup
7-
4+ from loading_sdk .async_api .extractors import AboutPageExtractor
85from loading_sdk .settings import (
96 API_URL ,
107 API_VERSION ,
11- BASE_URL ,
128 EDITORIAL_POST_TYPES ,
139 EDITORIAL_SORT ,
1410 USER_AGENT ,
@@ -22,68 +18,6 @@ async def async_loading_api_client(email=None, password=None):
2218 return client
2319
2420
25- class AboutPageExtractor :
26- async def extract_about_data (self ):
27- about_page_source = await self ._get_source (f"{ BASE_URL } /om" )
28- main_script_url = self ._extract_main_script_url (about_page_source )
29- main_script_source = await self ._get_source (f"{ BASE_URL } /{ main_script_url } " )
30- about_script_url = self ._get_about_script_url (main_script_source )
31- about_script_source = await self ._get_source (about_script_url )
32-
33- return self ._get_about_data (about_script_source )
34-
35- async def _get_source (self , url ):
36- headers = {"User-Agent" : USER_AGENT }
37-
38- async with aiohttp .ClientSession () as session :
39- async with session .get (url , headers = headers ) as response :
40- return await response .text ()
41-
42- def _get_about_script_url (self , source_code ):
43- chunk_urls = []
44-
45- # Extracts the code with the javascript chunks.
46- p = re .compile ("(static/js/).+?(?=\{)(.+?(?=\[)).+(.chunk.js)" )
47- m = p .search (source_code )
48-
49- if m :
50- # Transform the code into valid JSON so the chunk ids can be stored in a python dict.
51- s = re .sub (r"([0-9]+?(?=:))" , r'"\1"' , m .group (2 ))
52- chunk_ids = json .loads (s )
53-
54- for k , v in chunk_ids .items ():
55- chunk_url = f"{ BASE_URL } /{ m .group (1 )} { k } .{ v } { m .group (3 )} "
56- chunk_urls .append (chunk_url )
57-
58- return chunk_urls [- 1 ]
59-
60- def _get_about_data (self , source_code ):
61- m = re .search ("var.e=(.+?)(?=\.map).+a=(.+?)(?=\.map)" , source_code )
62-
63- if m :
64- people = re .sub (r"(\{|\,)([a-z]+)(\:)" , r'\1"\2"\3' , m .group (1 ))
65- people = re .sub (r"(.+)(')(.+)(')(.+)" , r'\1"\3"\5' , people )
66- people = people .replace ('slags "vuxen p' , "slags 'vuxen p" )
67- people = people .replace ('riktigt"-framtid' , "riktigt'-framtid" )
68- people = people .replace ("\\ n" , "" )
69- people = people .encode ("utf-8" ).decode ("unicode_escape" )
70-
71- moderators = re .sub (r"(\{|\,)([a-z]+)(\:)" , r'\1"\2"\3' , m .group (2 ))
72- moderators = re .sub (r"(.+)(')(.+)(')(.+)" , r'\1"\3"\5' , moderators )
73- moderators = moderators .replace ("\\ n" , "" )
74- moderators = moderators .encode ("utf-8" ).decode ("unicode_escape" )
75-
76- about = {"people" : json .loads (people ), "moderators" : json .loads (moderators )}
77-
78- return about
79-
80- def _extract_main_script_url (self , html ):
81- soup = BeautifulSoup (html , "html.parser" )
82- main_script = soup .find (src = re .compile ("/static/js/main\.[0-9a-zA-Z]+\.js" ))
83-
84- return main_script ["src" ][1 :]
85-
86-
8721class AsyncLoadingApiClient :
8822 """
8923 An async client that allows python apps to easily communicate with the loading forums web api.
@@ -566,4 +500,4 @@ async def get_about(self):
566500 about_page = AboutPageExtractor ()
567501 about_data = await about_page .extract_about_data ()
568502
569- return about_data
503+ return about_data
0 commit comments