Skip to content
This repository was archived by the owner on Apr 17, 2023. It is now read-only.

Commit fc6cf2f

Browse files
committed
Add PlatformVideoDownloader
1 parent c0f9876 commit fc6cf2f

File tree

1 file changed

+65
-0
lines changed

1 file changed

+65
-0
lines changed

utils/platform_video_downloader.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
from typing import Optional
2+
3+
from playwright.sync_api import sync_playwright
4+
from youtube_dl.utils import YoutubeDLError
5+
6+
from .monsnode_parser import MonsnodeParser
7+
from .twitter_crawler import TwitterCrawler
8+
from .youtube_dl_wrapper import youtube_dl_wrapper
9+
10+
11+
class PlatformVideoDownloader:
12+
monsnode_parser: MonsnodeParser
13+
video_output_path: str = 'videos'
14+
15+
def __init__(self, video_output_path: str = 'videos', monsnode_parser: MonsnodeParser = MonsnodeParser()):
16+
self.video_output_path = video_output_path
17+
self.monsnode_parser = monsnode_parser
18+
19+
def download_monsnode_video(self, link: str) -> None:
20+
video_filename, video_link = self.monsnode_parser.get_video(link)
21+
youtube_dl_option = self._make_youtube_dl_option(video_filename)
22+
youtube_dl_wrapper.download([video_link], youtube_dl_option)
23+
24+
def download_twitter_video(
25+
self,
26+
link: str,
27+
username: Optional[str] = None,
28+
password: Optional[str] = None,
29+
) -> None:
30+
try:
31+
youtube_dl_option = self._make_youtube_dl_option()
32+
youtube_dl_wrapper.download([link], youtube_dl_option)
33+
except YoutubeDLError as exception:
34+
if 'not authorized' not in str(exception):
35+
raise exception
36+
if not username or not password:
37+
raise Exception('Username and password are required to download private Twitter video') from exception
38+
39+
videos_info = self._get_twitter_private_videos_info(link, username, password)
40+
for video_filename, video_link in videos_info:
41+
youtube_dl_option = self._make_youtube_dl_option(video_filename)
42+
youtube_dl_wrapper.download([video_link], youtube_dl_option)
43+
44+
def _make_youtube_dl_option(self, video_filename: Optional[str] = None) -> dict[str, str]:
45+
youtube_dl_output_template = \
46+
f'{self.video_output_path}/{video_filename}' \
47+
if video_filename else \
48+
f'{self.video_output_path}/%(title)s.%(ext)s'
49+
return {'format': 'bestvideo/best', 'outtmpl': youtube_dl_output_template}
50+
51+
def _get_twitter_private_videos_info(
52+
self,
53+
target_link: str,
54+
username: str,
55+
password: str,
56+
) -> list[tuple[str, str]]:
57+
with sync_playwright() as playwright_sync:
58+
browser = playwright_sync.webkit.launch(headless=False)
59+
page = browser.new_page()
60+
crawler = TwitterCrawler(page)
61+
crawler.login(username, password)
62+
videos_info = crawler.get_video_of_tweet(target_link)
63+
page.close()
64+
browser.close()
65+
return videos_info

0 commit comments

Comments
 (0)