|
| 1 | +from typing import Optional |
| 2 | + |
| 3 | +from playwright.sync_api import sync_playwright |
| 4 | +from youtube_dl.utils import YoutubeDLError |
| 5 | + |
| 6 | +from .monsnode_parser import MonsnodeParser |
| 7 | +from .twitter_crawler import TwitterCrawler |
| 8 | +from .youtube_dl_wrapper import youtube_dl_wrapper |
| 9 | + |
| 10 | + |
| 11 | +class PlatformVideoDownloader: |
| 12 | + monsnode_parser: MonsnodeParser |
| 13 | + video_output_path: str = 'videos' |
| 14 | + |
| 15 | + def __init__(self, video_output_path: str = 'videos', monsnode_parser: MonsnodeParser = MonsnodeParser()): |
| 16 | + self.video_output_path = video_output_path |
| 17 | + self.monsnode_parser = monsnode_parser |
| 18 | + |
| 19 | + def download_monsnode_video(self, link: str) -> None: |
| 20 | + video_filename, video_link = self.monsnode_parser.get_video(link) |
| 21 | + youtube_dl_option = self._make_youtube_dl_option(video_filename) |
| 22 | + youtube_dl_wrapper.download([video_link], youtube_dl_option) |
| 23 | + |
| 24 | + def download_twitter_video( |
| 25 | + self, |
| 26 | + link: str, |
| 27 | + username: Optional[str] = None, |
| 28 | + password: Optional[str] = None, |
| 29 | + ) -> None: |
| 30 | + try: |
| 31 | + youtube_dl_option = self._make_youtube_dl_option() |
| 32 | + youtube_dl_wrapper.download([link], youtube_dl_option) |
| 33 | + except YoutubeDLError as exception: |
| 34 | + if 'not authorized' not in str(exception): |
| 35 | + raise exception |
| 36 | + if not username or not password: |
| 37 | + raise Exception('Username and password are required to download private Twitter video') from exception |
| 38 | + |
| 39 | + videos_info = self._get_twitter_private_videos_info(link, username, password) |
| 40 | + for video_filename, video_link in videos_info: |
| 41 | + youtube_dl_option = self._make_youtube_dl_option(video_filename) |
| 42 | + youtube_dl_wrapper.download([video_link], youtube_dl_option) |
| 43 | + |
| 44 | + def _make_youtube_dl_option(self, video_filename: Optional[str] = None) -> dict[str, str]: |
| 45 | + youtube_dl_output_template = \ |
| 46 | + f'{self.video_output_path}/{video_filename}' \ |
| 47 | + if video_filename else \ |
| 48 | + f'{self.video_output_path}/%(title)s.%(ext)s' |
| 49 | + return {'format': 'bestvideo/best', 'outtmpl': youtube_dl_output_template} |
| 50 | + |
| 51 | + def _get_twitter_private_videos_info( |
| 52 | + self, |
| 53 | + target_link: str, |
| 54 | + username: str, |
| 55 | + password: str, |
| 56 | + ) -> list[tuple[str, str]]: |
| 57 | + with sync_playwright() as playwright_sync: |
| 58 | + browser = playwright_sync.webkit.launch(headless=False) |
| 59 | + page = browser.new_page() |
| 60 | + crawler = TwitterCrawler(page) |
| 61 | + crawler.login(username, password) |
| 62 | + videos_info = crawler.get_video_of_tweet(target_link) |
| 63 | + page.close() |
| 64 | + browser.close() |
| 65 | + return videos_info |
0 commit comments