@@ -30,16 +30,28 @@ def login(self, username: str, password: str, timeout: Optional[float] = 10000)
3030 self .page .wait_for_url ('https://twitter.com/home' )
3131
3232 def get_all_liked_tweets (self , username : str , scroll_timeout : float = 0.8 ) -> list [str ]:
33- return self .get_liked_tweets_until (username , 'nothing' , scroll_timeout )
33+ """Get the username's all liked tweets
34+ Returns the list of links of liked tweets
35+ """
36+ return self .get_liked_tweets_until (
37+ username , 'nothing' , scroll_timeout
38+ ) # 'nothing' was intended because the given `until_link` would be never found on the links list
3439
3540 def get_liked_tweets_until (self , username : str , until_link : str , scroll_timeout : float = 0.8 ) -> list [str ]:
36- self ._open_liked_tweets (username )
41+ """Scrolling down the list of liked tweets until the given `until_link` found
42+ Returns the list of links of liked tweets
43+ """
44+ self ._goto_liked_tweets (username )
3745 links : list [str ] = []
3846
3947 previous_height = self .page_current_height
4048 while True :
49+ # 1. scroll down
50+ # 2. get the link of tweets in the current screen(tweets are not reachable if it's not rendering)
51+ # 3. break if page reaches to the bottom or the given `until_link` found
52+
4153 self .page .mouse .wheel (0 , 1500 )
42- time .sleep (scroll_timeout ) # wait for mouse cursor down
54+ time .sleep (scroll_timeout ) # wait for mouse wheel to scroll down
4355 is_page_bottom = self .page_current_height == previous_height
4456 if is_page_bottom :
4557 break
@@ -57,31 +69,31 @@ def get_liked_tweets_until(self, username: str, until_link: str, scroll_timeout:
5769 return links
5870
5971 def get_recent_liked_tweet (self , username : str ) -> str :
60- self ._open_liked_tweets (username )
72+ self ._goto_liked_tweets (username )
6173 return self ._get_article_links_in_current_screen ()[0 ]
6274
63- def get_video_of_tweet (self , link : str , timeout : Optional [float ] = 5000 ) -> Optional [tuple [str , list [ str ] ]]:
64- links : list [str ] = []
75+ def get_video_of_tweet (self , link : str , timeout : Optional [float ] = 5000 ) -> list [tuple [str , str ]]:
76+ video_links : list [str ] = []
6577
6678 def _request_m3u8_capture_handler (request : Request ) -> None :
6779 if 'm3u8' in request .url :
68- links .append (request .url )
80+ video_links .append (request .url )
6981
7082 self .page .on ('request' , _request_m3u8_capture_handler )
7183 self .page .goto (link )
7284 try :
7385 self .page .wait_for_selector ('video' , timeout = timeout )
7486 except Error :
75- return None
87+ return []
7688
77- return self ._get_video_name (), links
89+ return [( f' { self ._parse_tweet_name () } _ { index } .mp4' , link ) for index , link in enumerate ( video_links )]
7890
79- def _get_video_name (self ) -> str :
91+ def _parse_tweet_name (self ) -> str :
8092 uploader = self .page .get_by_test_id ('primaryColumn' ).get_by_role ('link' ).nth (0 ).inner_text ().strip ()
8193 content = self .page .get_by_role ('article' ).get_by_test_id ('tweetText' ).nth (0 ).inner_text ().strip ()
82- return f'{ uploader } - { content } .mp4 '
94+ return f'{ uploader } - { content } '
8395
84- def _open_liked_tweets (self , username : str ) -> None :
96+ def _goto_liked_tweets (self , username : str ) -> None :
8597 self .page .goto (f'https://twitter.com/{ username } /likes' )
8698 self .page .wait_for_selector ('article' )
8799
@@ -98,7 +110,7 @@ def _get_article_links_in_current_screen(self) -> list[str]:
98110 for i in range (article_length )
99111 ]
100112 break
101- except Error :
102- self .page .mouse .wheel (0 , 500 )
113+ except Error : # if articles in the page are not reachable
114+ self .page .mouse .wheel (0 , 500 ) # scrolling down to refresh the articles
103115
104116 return links
0 commit comments