diff --git a/.env b/.env
deleted file mode 100644
index 939c71c..0000000
--- a/.env
+++ /dev/null
@@ -1 +0,0 @@
-GOOGLE_API_KEY = "Enter Your API Key here"
diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..39bb49f
--- /dev/null
+++ b/.env.example
@@ -0,0 +1 @@
+GOOGLE_API_KEY=your_google_api_key_here
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..e2225bc
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,32 @@
+# Environment variables
+.env
+
+# Claude Code
+CLAUDE.md
+.claude/
+
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+
+# Virtual environments
+venv/
+env/
+ENV/
+.venv
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+
+# OS
+.DS_Store
+Thumbs.db
+
+# Streamlit
+.streamlit/secrets.toml
diff --git a/README.md b/README.md
index f210644..2e993fb 100644
--- a/README.md
+++ b/README.md
@@ -31,14 +31,8 @@ The YouTube Video Transcript Summarizer with GenAI is an innovative tool designe
To run this project, you need to install the following packages:
-```python
-pip install python-dotenv
-pip install streamlit
-pip install streamlit-extras
-pip install youtube-transcript-api
-pip install google-generativeai
-pip install langcodes
-pip install language_data
+```bash
+pip install -r requirements.txt
```
@@ -49,9 +43,14 @@ To use this project, follow these steps:
1. Clone the repository: ```git clone https://github.com/gopiashokan/YouTube-Video-Transcript-Summarizer-with-GenAI.git```
2. Install the required packages: ```pip install -r requirements.txt```
-3. Add your Google API key to the `.env` file.
-4. Run the Streamlit app: ```streamlit run app.py```
-5. Access the app in your browser at ```http://localhost:8501```
+3. Create a `.env` file in the root directory (use `.env.example` as template)
+4. Add your Google API key to the `.env` file:
+ ```
+ GOOGLE_API_KEY=your_actual_api_key_here
+ ```
+ **⚠️ IMPORTANT:** Never commit your `.env` file to Git. It contains sensitive API keys.
+5. Run the Streamlit app: ```streamlit run app.py```
+6. Access the app in your browser at ```http://localhost:8501```
diff --git a/app.py b/app.py
index 615abd5..1778038 100644
--- a/app.py
+++ b/app.py
@@ -1,11 +1,12 @@
import os
+import re
import langcodes
import google.generativeai as genai
import streamlit as st
from streamlit_extras.add_vertical_space import add_vertical_space
from dotenv import load_dotenv
from youtube_transcript_api import YouTubeTranscriptApi
-from warnings import filterwarnings
+from urllib.parse import urlparse, parse_qs
@@ -39,71 +40,136 @@ def streamlit_config():
+def extract_video_id(video_link):
+ """
+ Extract video ID from various YouTube URL formats.
+ Supports:
+ - https://www.youtube.com/watch?v=VIDEO_ID
+ - https://youtu.be/VIDEO_ID
+ - https://www.youtube.com/embed/VIDEO_ID
+ - https://www.youtube.com/v/VIDEO_ID
+ """
+ try:
+ # Pattern for youtube.com URLs
+ if 'youtube.com' in video_link:
+ parsed_url = urlparse(video_link)
+ if parsed_url.path == '/watch':
+ video_id = parse_qs(parsed_url.query).get('v')
+ if video_id:
+ return video_id[0]
+ elif '/embed/' in parsed_url.path:
+ return parsed_url.path.split('/embed/')[1].split('?')[0]
+ elif '/v/' in parsed_url.path:
+ return parsed_url.path.split('/v/')[1].split('?')[0]
+
+ # Pattern for youtu.be URLs
+ elif 'youtu.be' in video_link:
+ parsed_url = urlparse(video_link)
+ return parsed_url.path.lstrip('/')
+
+ # If it's already just the video ID (11 characters)
+ elif re.match(r'^[A-Za-z0-9_-]{11}$', video_link.strip()):
+ return video_link.strip()
+
+ return None
+
+ except Exception as e:
+ return None
+
+
def extract_languages(video_id):
+ """
+ Extract available transcript languages for a YouTube video.
+ Returns tuple of (language_list, language_dict) or (None, None) on error.
+ """
+ try:
+ # Create YouTubeTranscriptApi instance
+ ytt_api = YouTubeTranscriptApi()
- # Fetch the List of Available Transcripts for Given Video
- transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
+ # Fetch the List of Available Transcripts for Given Video
+ transcript_list = ytt_api.list(video_id)
- # Extract the Language Codes from List ---> ['en','ta']
- available_transcripts = [i.language_code for i in transcript_list]
+ # Extract the Language Codes from List ---> ['en','ta']
+ available_transcripts = [i.language_code for i in transcript_list]
- # Convert Language_codes to Human-Readable Language_names ---> 'en' into 'English'
- language_list = list({langcodes.Language.get(i).display_name() for i in available_transcripts})
+ # Convert Language_codes to Human-Readable Language_names ---> 'en' into 'English'
+ language_list = list({langcodes.Language.get(i).display_name() for i in available_transcripts})
- # Create a Dictionary Mapping Language_names to Language_codes
- language_dict = {langcodes.Language.get(i).display_name():i for i in available_transcripts}
+ # Create a Dictionary Mapping Language_names to Language_codes
+ language_dict = {langcodes.Language.get(i).display_name():i for i in available_transcripts}
- return language_list, language_dict
+ return language_list, language_dict
+
+ except Exception as e:
+ st.error(f"Error fetching transcripts: {str(e)}")
+ return None, None
def extract_transcript(video_id, language):
-
+ """
+ Extract transcript text for a YouTube video in specified language.
+ Returns transcript string or None on error.
+ """
try:
- # Request Transcript for YouTube Video using API
- transcript_content = YouTubeTranscriptApi.get_transcript(video_id=video_id, languages=[language])
-
+ # Create YouTubeTranscriptApi instance
+ ytt_api = YouTubeTranscriptApi()
+
+ # Get list of available transcripts
+ transcript_list = ytt_api.list(video_id)
+
+ # Find transcript in the specified language
+ transcript = transcript_list.find_transcript([language])
+
+ # Fetch the actual transcript content
+ transcript_content = transcript.fetch()
+
# Extract Transcript Content from JSON Response and Join to Single Response
- transcript = ' '.join([i['text'] for i in transcript_content])
+ transcript_text = ' '.join([i.text for i in transcript_content])
+
+ return transcript_text
- return transcript
-
-
except Exception as e:
- add_vertical_space(5)
- st.markdown(f'