@@ -72,6 +72,25 @@ def get_recent_liked_tweet(self, username: str) -> str:
7272 self ._goto_liked_tweets (username )
7373 return self ._get_tweets_in_current_screen ()[0 ]
7474
75+ def _get_tweets_in_current_screen (self ) -> list [str ]:
76+ links : list [str ] = []
77+
78+ while True :
79+ articles = self .page .locator ('article' )
80+ article_length = articles .count ()
81+ try :
82+ links = [
83+ 'https://twitter.com' +
84+ (articles .nth (i ).locator ('div' ).locator ('a' ).nth (3 ).get_attribute ('href' , timeout = 500 ) or '' )
85+ for i in range (article_length )
86+ ]
87+ break
88+ except Error : # if articles in the page are not reachable
89+ self .page .mouse .wheel (0 , 500 ) # scrolling down to refresh the articles
90+ self .page .mouse .wheel (0 , - 500 ) # scrolling down to refresh the articles
91+
92+ return links
93+
7594 def get_video_of_tweet (self , link : str , timeout : Optional [float ] = 10000 ) -> list [tuple [str , str ]]:
7695 video_links : list [str ] = []
7796
@@ -97,30 +116,54 @@ def _goto_liked_tweets(self, username: str) -> None:
97116 self .page .goto (f'https://twitter.com/{ username } /likes' )
98117 self .page .wait_for_selector ('article' )
99118
100- def _get_video_tweets_in_current_screen (self ) -> list [str ]:
119+ def get_all_media_tweets (self , username : str , scroll_timeout : float = 0.8 ) -> list [str ]:
120+ """Get the username's all liked tweets
121+ Returns the list of links of liked tweets
122+ """
123+ return self .get_media_tweets_until (
124+ username , 'nothing' , scroll_timeout
125+ ) # 'nothing' was intended because the given `until_link` would be never found on the links list
126+
127+ def get_media_tweets_until (self , username : str , until_link : str , scroll_timeout : float = 0.8 ) -> list [str ]:
128+ """Scrolling down the list of media tweets until the given `until_link` found
129+ Returns the list of links of media tweets
130+ """
131+ self ._goto_media_tweets (username )
101132 links : list [str ] = []
102133
134+ previous_height = self .page_current_height
103135 while True :
104- articles = self .page .locator ('article:has(video)' )
105- article_length = articles .count ()
106- try :
107- links = [
108- 'https://twitter.com' +
109- (articles .nth (i ).locator ('div' ).locator ('a' ).nth (3 ).get_attribute ('href' , timeout = 500 ) or '' )
110- for i in range (article_length )
111- ]
136+ # 1. scroll down
137+ # 2. get the link of tweets in the current screen(tweets are not reachable if it's not rendering)
138+ # 3. break if page reaches to the bottom or the given `until_link` found
139+
140+ self .page .mouse .wheel (0 , 1500 )
141+ time .sleep (scroll_timeout ) # wait for mouse wheel to scroll down
142+ is_page_bottom = self .page_current_height == previous_height
143+ if is_page_bottom :
144+ break
145+ previous_height = self .page_current_height
146+
147+ new_links = self ._get_video_tweets_in_current_screen ()
148+ links .extend (new_links )
149+ links = list (set (links ))
150+
151+ print (f'Found { len (links )} media tweets.' )
152+
153+ if until_link in links :
112154 break
113- except Error : # if articles in the page are not reachable
114- self .page .mouse .wheel (0 , 500 ) # scrolling down to refresh the articles
115- self .page .mouse .wheel (0 , - 500 ) # scrolling down to refresh the articles
116155
117156 return links
118157
119- def _get_tweets_in_current_screen (self ) -> list [str ]:
158+ def _goto_media_tweets (self , username : str ) -> None :
159+ self .page .goto (f'https://twitter.com/{ username } /media' )
160+ self .page .wait_for_selector ('article' )
161+
162+ def _get_video_tweets_in_current_screen (self ) -> list [str ]:
120163 links : list [str ] = []
121164
122165 while True :
123- articles = self .page .locator ('article' )
166+ articles = self .page .locator ('article:has(video) ' )
124167 article_length = articles .count ()
125168 try :
126169 links = [
0 commit comments