gopiashokan · wiecon-dev · Nov 6, 2025
diff --git a/.env b/.env
diff --git a/.env.example b/.env.example
@@ -0,0 +1 @@
+GOOGLE_API_KEY=your_google_api_key_here
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,32 @@
+# Environment variables
+.env
+
+# Claude Code
+CLAUDE.md
+.claude/
+
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+
+# Virtual environments
+venv/
+env/
+ENV/
+.venv
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+
+# OS
+.DS_Store
+Thumbs.db
+
+# Streamlit
+.streamlit/secrets.toml
diff --git a/README.md b/README.md
@@ -31,14 +31,8 @@ The YouTube Video Transcript Summarizer with GenAI is an innovative tool designe
 
 To run this project, you need to install the following packages:
 
-```python
-pip install python-dotenv
-pip install streamlit
-pip install streamlit-extras
-pip install youtube-transcript-api
-pip install google-generativeai
-pip install langcodes
-pip install language_data
+```bash
+pip install -r requirements.txt
 ```
 
 <br />
@@ -49,9 +43,14 @@ To use this project, follow these steps:
 
 1. Clone the repository: ```git clone https://github.com/gopiashokan/YouTube-Video-Transcript-Summarizer-with-GenAI.git```
 2. Install the required packages: ```pip install -r requirements.txt```
-3. Add your Google API key to the `.env` file.
-4. Run the Streamlit app: ```streamlit run app.py```
-5. Access the app in your browser at ```http://localhost:8501```
+3. Create a `.env` file in the root directory (use `.env.example` as template)
+4. Add your Google API key to the `.env` file:
+   ```
+   GOOGLE_API_KEY=your_actual_api_key_here
+   ```
+   **⚠️ IMPORTANT:** Never commit your `.env` file to Git. It contains sensitive API keys.
+5. Run the Streamlit app: ```streamlit run app.py```
+6. Access the app in your browser at ```http://localhost:8501```
 
 <br />
 

diff --git a/app.py b/app.py
@@ -1,11 +1,12 @@
 import os
+import re
 import langcodes
 import google.generativeai as genai
 import streamlit as st
 from streamlit_extras.add_vertical_space import add_vertical_space
 from dotenv import load_dotenv
 from youtube_transcript_api import YouTubeTranscriptApi
-from warnings import filterwarnings
+from urllib.parse import urlparse, parse_qs
 
 
 
@@ -39,71 +40,136 @@ def streamlit_config():
 
 
 
+def extract_video_id(video_link):
+    """
+    Extract video ID from various YouTube URL formats.
+    Supports:
+    - https://www.youtube.com/watch?v=VIDEO_ID
+    - https://youtu.be/VIDEO_ID
+    - https://www.youtube.com/embed/VIDEO_ID
+    - https://www.youtube.com/v/VIDEO_ID
+    """
+    try:
+        # Pattern for youtube.com URLs
+        if 'youtube.com' in video_link:
+            parsed_url = urlparse(video_link)
+            if parsed_url.path == '/watch':
+                video_id = parse_qs(parsed_url.query).get('v')
+                if video_id:
+                    return video_id[0]
+            elif '/embed/' in parsed_url.path:
+                return parsed_url.path.split('/embed/')[1].split('?')[0]
+            elif '/v/' in parsed_url.path:
+                return parsed_url.path.split('/v/')[1].split('?')[0]
+
+        # Pattern for youtu.be URLs
+        elif 'youtu.be' in video_link:
+            parsed_url = urlparse(video_link)
+            return parsed_url.path.lstrip('/')
+
+        # If it's already just the video ID (11 characters)
+        elif re.match(r'^[A-Za-z0-9_-]{11}$', video_link.strip()):
+            return video_link.strip()
+
+        return None
+
+    except Exception as e:
+        return None
+
+
 def extract_languages(video_id):
+    """
+    Extract available transcript languages for a YouTube video.
+    Returns tuple of (language_list, language_dict) or (None, None) on error.
+    """
+    try:
+        # Create YouTubeTranscriptApi instance
+        ytt_api = YouTubeTranscriptApi()
 
-    # Fetch the List of Available Transcripts for Given Video
-    transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
+        # Fetch the List of Available Transcripts for Given Video
+        transcript_list = ytt_api.list(video_id)
 
-    # Extract the Language Codes from List ---> ['en','ta']
-    available_transcripts = [i.language_code for i in transcript_list]
+        # Extract the Language Codes from List ---> ['en','ta']
+        available_transcripts = [i.language_code for i in transcript_list]
 
-    # Convert Language_codes to Human-Readable Language_names ---> 'en' into 'English'
-    language_list = list({langcodes.Language.get(i).display_name() for i in available_transcripts})
+        # Convert Language_codes to Human-Readable Language_names ---> 'en' into 'English'
+        language_list = list({langcodes.Language.get(i).display_name() for i in available_transcripts})
 
-    # Create a Dictionary Mapping Language_names to Language_codes
-    language_dict = {langcodes.Language.get(i).display_name():i for i in available_transcripts}
+        # Create a Dictionary Mapping Language_names to Language_codes
+        language_dict = {langcodes.Language.get(i).display_name():i for i in available_transcripts}
 
-    return language_list, language_dict
+        return language_list, language_dict
+
+    except Exception as e:
+        st.error(f"Error fetching transcripts: {str(e)}")
+        return None, None
 
 
 
 def extract_transcript(video_id, language):
-
+    """
+    Extract transcript text for a YouTube video in specified language.
+    Returns transcript string or None on error.
+    """
     try:
-        # Request Transcript for YouTube Video using API
-        transcript_content = YouTubeTranscriptApi.get_transcript(video_id=video_id, languages=[language])
-
+        # Create YouTubeTranscriptApi instance
+        ytt_api = YouTubeTranscriptApi()
+
+        # Get list of available transcripts
+        transcript_list = ytt_api.list(video_id)
+
+        # Find transcript in the specified language
+        transcript = transcript_list.find_transcript([language])
+
+        # Fetch the actual transcript content
+        transcript_content = transcript.fetch()
+
         # Extract Transcript Content from JSON Response and Join to Single Response
-        transcript = ' '.join([i['text'] for i in transcript_content])
+        transcript_text = ' '.join([i.text for i in transcript_content])
+
+        return transcript_text
 
-        return transcript
-
-
     except Exception as e:
-        add_vertical_space(5)
-        st.markdown(f'<h5 style="text-position:center;color:orange;">{e}</h5>', unsafe_allow_html=True)
+        st.error(f"Error extracting transcript: {str(e)}")
+        return None
 
 
 
 def generate_summary(transcript_text):
-
+    """
+    Generate AI-powered summary using Google Gemini.
+    Returns summary string or None on error.
+    """
     try:
+        # Check if API key exists
+        api_key = os.getenv('GOOGLE_API_KEY')
+        if not api_key:
+            st.error("Google API key not found. Please add GOOGLE_API_KEY to your .env file.")
+            return None
+
         # Configures the genai Library
-        genai.configure(api_key=os.environ['GOOGLE_API_KEY'])
+        genai.configure(api_key=api_key)
 
-        # Initializes a Gemini-Pro Generative Model
-        model = genai.GenerativeModel(model_name = 'gemini-pro')  
+        # Initializes a Gemini 2.5 Flash Generative Model
+        model = genai.GenerativeModel(model_name='gemini-2.5-flash')
 
         # Define a Prompt for AI Model
-        prompt = """You are a YouTube video summarizer. You will be taking the transcript text and summarizing the entire video, 
-                    providing the important points are proper sub-heading in a concise manner (within 500 words). 
+        prompt = """You are a YouTube video summarizer. You will be taking the transcript text and summarizing the entire video,
+                    providing the important points with proper sub-headings in a concise manner (within 500 words).
                     Please provide the summary of the text given here: """
-        
+
         response = model.generate_content(prompt + transcript_text)
 
         return response.text
 
     except Exception as e:
-        add_vertical_space(5)
-        st.markdown(f'<h5 style="text-position:center;color:orange;">{e}</h5>', unsafe_allow_html=True)
+        st.error(f"Error generating summary: {str(e)}")
+        return None
 
 
 
 def main():
 
-    # Filter the Warnings
-    filterwarnings(action='ignore')
-
     # Load the Environment Variables
     load_dotenv()
 
@@ -112,67 +178,73 @@ def main():
 
     # Initialize the Button Variable
     button = False
+    video_id = None
+    language = None
 
     with st.sidebar:
 
         image_url = 'https://raw.githubusercontent.com/gopiashokan/YouTube-Video-Transcript-Summarizer-with-GenAI/main/image/youtube_banner.JPG'
-        st.image(image_url, use_column_width=True)
+        st.image(image_url, use_container_width=True)
         add_vertical_space(2)
 
-        # Get YouTube Video Link From User 
+        # Get YouTube Video Link From User
         video_link = st.text_input(label='Enter YouTube Video Link')
 
         if video_link:
             # Extract the Video ID From URL
-            video_id = video_link.split('=')[1].split('&')[0]
-
-            # Extract Language from Video_ID
-            language_list, language_dict = extract_languages(video_id)
-
-            # User Select the Transcript Language
-            language_input = st.selectbox(label='Select Transcript Language', 
-                                        options=language_list)
-
-            # Get Language_code from Dict
-            language = language_dict[language_input]
-
-            # Click Submit Button
-            add_vertical_space(1)
-            button = st.button(label='Submit')
-
+            video_id = extract_video_id(video_link)
+
+            if not video_id:
+                st.error("Invalid YouTube URL. Please enter a valid YouTube video link.")
+            else:
+                # Extract Language from Video_ID
+                language_list, language_dict = extract_languages(video_id)
+
+                if language_list and language_dict:
+                    # User Select the Transcript Language
+                    language_input = st.selectbox(label='Select Transcript Language',
+                                                options=language_list)
+
+                    # Get Language_code from Dict
+                    language = language_dict[language_input]
+
+                    # Click Submit Button
+                    add_vertical_space(1)
+                    button = st.button(label='Submit')
+
 
     # User Enter the Video Link and Click Submit Button
-    if button and video_link:
-        
+    if button and video_link and video_id and language:
+
         # UI Split into Columns
         _, col2, _ = st.columns([0.07,0.83,0.1])
 
         # Display the Video Thumbnail Image
         with col2:
-            st.image(image=f'http://img.youtube.com/vi/{video_id}/0.jpg', 
-                     use_column_width=True)
+            st.image(image=f'http://img.youtube.com/vi/{video_id}/0.jpg',
+                     use_container_width=True)
 
         # Extract Transcript from YouTube Video
         add_vertical_space(2)
         with st.spinner(text='Extracting Transcript...'):
             transcript_text = extract_transcript(video_id, language)
 
+        if not transcript_text:
+            st.error("Failed to extract transcript. Please try again.")
+            return
+
         # Generating Summary using Gemini AI
         with st.spinner(text='Generating Summary...'):
             summary = generate_summary(transcript_text)
 
         # Display the Summary
         if summary:
             st.write(summary)
+        else:
+            st.error("Failed to generate summary. Please try again.")
 
 
 
 if __name__ == '__main__':
-
-    try:
-        main()
-
-    except Exception as e:
-        add_vertical_space(5)
-        st.markdown(f'<h5 style="text-position:center;color:orange;">{e}</h5>', unsafe_allow_html=True)
+    main()
 
diff --git a/requirements.txt b/requirements.txt
@@ -1,7 +1,7 @@
 python-dotenv
 streamlit
 streamlit-extras
-youtube-transcript-api
+youtube-transcript-api>=0.6.0
 google-generativeai
 langcodes
 language_data