diff --git a/.env b/.env deleted file mode 100644 index 939c71c..0000000 --- a/.env +++ /dev/null @@ -1 +0,0 @@ -GOOGLE_API_KEY = "Enter Your API Key here" diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..39bb49f --- /dev/null +++ b/.env.example @@ -0,0 +1 @@ +GOOGLE_API_KEY=your_google_api_key_here diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e2225bc --- /dev/null +++ b/.gitignore @@ -0,0 +1,32 @@ +# Environment variables +.env + +# Claude Code +CLAUDE.md +.claude/ + +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python + +# Virtual environments +venv/ +env/ +ENV/ +.venv + +# IDE +.vscode/ +.idea/ +*.swp +*.swo + +# OS +.DS_Store +Thumbs.db + +# Streamlit +.streamlit/secrets.toml diff --git a/README.md b/README.md index f210644..2e993fb 100644 --- a/README.md +++ b/README.md @@ -31,14 +31,8 @@ The YouTube Video Transcript Summarizer with GenAI is an innovative tool designe To run this project, you need to install the following packages: -```python -pip install python-dotenv -pip install streamlit -pip install streamlit-extras -pip install youtube-transcript-api -pip install google-generativeai -pip install langcodes -pip install language_data +```bash +pip install -r requirements.txt ```
@@ -49,9 +43,14 @@ To use this project, follow these steps: 1. Clone the repository: ```git clone https://github.com/gopiashokan/YouTube-Video-Transcript-Summarizer-with-GenAI.git``` 2. Install the required packages: ```pip install -r requirements.txt``` -3. Add your Google API key to the `.env` file. -4. Run the Streamlit app: ```streamlit run app.py``` -5. Access the app in your browser at ```http://localhost:8501``` +3. Create a `.env` file in the root directory (use `.env.example` as template) +4. Add your Google API key to the `.env` file: + ``` + GOOGLE_API_KEY=your_actual_api_key_here + ``` + **⚠️ IMPORTANT:** Never commit your `.env` file to Git. It contains sensitive API keys. +5. Run the Streamlit app: ```streamlit run app.py``` +6. Access the app in your browser at ```http://localhost:8501```
diff --git a/app.py b/app.py index 615abd5..1778038 100644 --- a/app.py +++ b/app.py @@ -1,11 +1,12 @@ import os +import re import langcodes import google.generativeai as genai import streamlit as st from streamlit_extras.add_vertical_space import add_vertical_space from dotenv import load_dotenv from youtube_transcript_api import YouTubeTranscriptApi -from warnings import filterwarnings +from urllib.parse import urlparse, parse_qs @@ -39,71 +40,136 @@ def streamlit_config(): +def extract_video_id(video_link): + """ + Extract video ID from various YouTube URL formats. + Supports: + - https://www.youtube.com/watch?v=VIDEO_ID + - https://youtu.be/VIDEO_ID + - https://www.youtube.com/embed/VIDEO_ID + - https://www.youtube.com/v/VIDEO_ID + """ + try: + # Pattern for youtube.com URLs + if 'youtube.com' in video_link: + parsed_url = urlparse(video_link) + if parsed_url.path == '/watch': + video_id = parse_qs(parsed_url.query).get('v') + if video_id: + return video_id[0] + elif '/embed/' in parsed_url.path: + return parsed_url.path.split('/embed/')[1].split('?')[0] + elif '/v/' in parsed_url.path: + return parsed_url.path.split('/v/')[1].split('?')[0] + + # Pattern for youtu.be URLs + elif 'youtu.be' in video_link: + parsed_url = urlparse(video_link) + return parsed_url.path.lstrip('/') + + # If it's already just the video ID (11 characters) + elif re.match(r'^[A-Za-z0-9_-]{11}$', video_link.strip()): + return video_link.strip() + + return None + + except Exception as e: + return None + + def extract_languages(video_id): + """ + Extract available transcript languages for a YouTube video. + Returns tuple of (language_list, language_dict) or (None, None) on error. + """ + try: + # Create YouTubeTranscriptApi instance + ytt_api = YouTubeTranscriptApi() - # Fetch the List of Available Transcripts for Given Video - transcript_list = YouTubeTranscriptApi.list_transcripts(video_id) + # Fetch the List of Available Transcripts for Given Video + transcript_list = ytt_api.list(video_id) - # Extract the Language Codes from List ---> ['en','ta'] - available_transcripts = [i.language_code for i in transcript_list] + # Extract the Language Codes from List ---> ['en','ta'] + available_transcripts = [i.language_code for i in transcript_list] - # Convert Language_codes to Human-Readable Language_names ---> 'en' into 'English' - language_list = list({langcodes.Language.get(i).display_name() for i in available_transcripts}) + # Convert Language_codes to Human-Readable Language_names ---> 'en' into 'English' + language_list = list({langcodes.Language.get(i).display_name() for i in available_transcripts}) - # Create a Dictionary Mapping Language_names to Language_codes - language_dict = {langcodes.Language.get(i).display_name():i for i in available_transcripts} + # Create a Dictionary Mapping Language_names to Language_codes + language_dict = {langcodes.Language.get(i).display_name():i for i in available_transcripts} - return language_list, language_dict + return language_list, language_dict + + except Exception as e: + st.error(f"Error fetching transcripts: {str(e)}") + return None, None def extract_transcript(video_id, language): - + """ + Extract transcript text for a YouTube video in specified language. + Returns transcript string or None on error. + """ try: - # Request Transcript for YouTube Video using API - transcript_content = YouTubeTranscriptApi.get_transcript(video_id=video_id, languages=[language]) - + # Create YouTubeTranscriptApi instance + ytt_api = YouTubeTranscriptApi() + + # Get list of available transcripts + transcript_list = ytt_api.list(video_id) + + # Find transcript in the specified language + transcript = transcript_list.find_transcript([language]) + + # Fetch the actual transcript content + transcript_content = transcript.fetch() + # Extract Transcript Content from JSON Response and Join to Single Response - transcript = ' '.join([i['text'] for i in transcript_content]) + transcript_text = ' '.join([i.text for i in transcript_content]) + + return transcript_text - return transcript - - except Exception as e: - add_vertical_space(5) - st.markdown(f'
{e}
', unsafe_allow_html=True) + st.error(f"Error extracting transcript: {str(e)}") + return None def generate_summary(transcript_text): - + """ + Generate AI-powered summary using Google Gemini. + Returns summary string or None on error. + """ try: + # Check if API key exists + api_key = os.getenv('GOOGLE_API_KEY') + if not api_key: + st.error("Google API key not found. Please add GOOGLE_API_KEY to your .env file.") + return None + # Configures the genai Library - genai.configure(api_key=os.environ['GOOGLE_API_KEY']) + genai.configure(api_key=api_key) - # Initializes a Gemini-Pro Generative Model - model = genai.GenerativeModel(model_name = 'gemini-pro') + # Initializes a Gemini 2.5 Flash Generative Model + model = genai.GenerativeModel(model_name='gemini-2.5-flash') # Define a Prompt for AI Model - prompt = """You are a YouTube video summarizer. You will be taking the transcript text and summarizing the entire video, - providing the important points are proper sub-heading in a concise manner (within 500 words). + prompt = """You are a YouTube video summarizer. You will be taking the transcript text and summarizing the entire video, + providing the important points with proper sub-headings in a concise manner (within 500 words). Please provide the summary of the text given here: """ - + response = model.generate_content(prompt + transcript_text) return response.text except Exception as e: - add_vertical_space(5) - st.markdown(f'
{e}
', unsafe_allow_html=True) + st.error(f"Error generating summary: {str(e)}") + return None def main(): - # Filter the Warnings - filterwarnings(action='ignore') - # Load the Environment Variables load_dotenv() @@ -112,51 +178,61 @@ def main(): # Initialize the Button Variable button = False + video_id = None + language = None with st.sidebar: image_url = 'https://raw.githubusercontent.com/gopiashokan/YouTube-Video-Transcript-Summarizer-with-GenAI/main/image/youtube_banner.JPG' - st.image(image_url, use_column_width=True) + st.image(image_url, use_container_width=True) add_vertical_space(2) - # Get YouTube Video Link From User + # Get YouTube Video Link From User video_link = st.text_input(label='Enter YouTube Video Link') if video_link: # Extract the Video ID From URL - video_id = video_link.split('=')[1].split('&')[0] - - # Extract Language from Video_ID - language_list, language_dict = extract_languages(video_id) - - # User Select the Transcript Language - language_input = st.selectbox(label='Select Transcript Language', - options=language_list) - - # Get Language_code from Dict - language = language_dict[language_input] - - # Click Submit Button - add_vertical_space(1) - button = st.button(label='Submit') - + video_id = extract_video_id(video_link) + + if not video_id: + st.error("Invalid YouTube URL. Please enter a valid YouTube video link.") + else: + # Extract Language from Video_ID + language_list, language_dict = extract_languages(video_id) + + if language_list and language_dict: + # User Select the Transcript Language + language_input = st.selectbox(label='Select Transcript Language', + options=language_list) + + # Get Language_code from Dict + language = language_dict[language_input] + + # Click Submit Button + add_vertical_space(1) + button = st.button(label='Submit') + # User Enter the Video Link and Click Submit Button - if button and video_link: - + if button and video_link and video_id and language: + # UI Split into Columns _, col2, _ = st.columns([0.07,0.83,0.1]) # Display the Video Thumbnail Image with col2: - st.image(image=f'http://img.youtube.com/vi/{video_id}/0.jpg', - use_column_width=True) + st.image(image=f'http://img.youtube.com/vi/{video_id}/0.jpg', + use_container_width=True) # Extract Transcript from YouTube Video add_vertical_space(2) with st.spinner(text='Extracting Transcript...'): transcript_text = extract_transcript(video_id, language) + if not transcript_text: + st.error("Failed to extract transcript. Please try again.") + return + # Generating Summary using Gemini AI with st.spinner(text='Generating Summary...'): summary = generate_summary(transcript_text) @@ -164,15 +240,11 @@ def main(): # Display the Summary if summary: st.write(summary) + else: + st.error("Failed to generate summary. Please try again.") if __name__ == '__main__': - - try: - main() - - except Exception as e: - add_vertical_space(5) - st.markdown(f'
{e}
', unsafe_allow_html=True) + main() diff --git a/requirements.txt b/requirements.txt index 0e28e34..bbc5194 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ python-dotenv streamlit streamlit-extras -youtube-transcript-api +youtube-transcript-api>=0.6.0 google-generativeai langcodes language_data