Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .env

This file was deleted.

1 change: 1 addition & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
GOOGLE_API_KEY=your_google_api_key_here
32 changes: 32 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Environment variables
.env

# Claude Code
CLAUDE.md
.claude/

# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python

# Virtual environments
venv/
env/
ENV/
.venv

# IDE
.vscode/
.idea/
*.swp
*.swo

# OS
.DS_Store
Thumbs.db

# Streamlit
.streamlit/secrets.toml
21 changes: 10 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,8 @@ The YouTube Video Transcript Summarizer with GenAI is an innovative tool designe

To run this project, you need to install the following packages:

```python
pip install python-dotenv
pip install streamlit
pip install streamlit-extras
pip install youtube-transcript-api
pip install google-generativeai
pip install langcodes
pip install language_data
```bash
pip install -r requirements.txt
```

<br />
Expand All @@ -49,9 +43,14 @@ To use this project, follow these steps:

1. Clone the repository: ```git clone https://github.com/gopiashokan/YouTube-Video-Transcript-Summarizer-with-GenAI.git```
2. Install the required packages: ```pip install -r requirements.txt```
3. Add your Google API key to the `.env` file.
4. Run the Streamlit app: ```streamlit run app.py```
5. Access the app in your browser at ```http://localhost:8501```
3. Create a `.env` file in the root directory (use `.env.example` as template)
4. Add your Google API key to the `.env` file:
```
GOOGLE_API_KEY=your_actual_api_key_here
```
**⚠️ IMPORTANT:** Never commit your `.env` file to Git. It contains sensitive API keys.
5. Run the Streamlit app: ```streamlit run app.py```
6. Access the app in your browser at ```http://localhost:8501```

<br />

Expand Down
194 changes: 133 additions & 61 deletions app.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import os
import re
import langcodes
import google.generativeai as genai
import streamlit as st
from streamlit_extras.add_vertical_space import add_vertical_space
from dotenv import load_dotenv
from youtube_transcript_api import YouTubeTranscriptApi
from warnings import filterwarnings
from urllib.parse import urlparse, parse_qs



Expand Down Expand Up @@ -39,71 +40,136 @@ def streamlit_config():



def extract_video_id(video_link):
"""
Extract video ID from various YouTube URL formats.
Supports:
- https://www.youtube.com/watch?v=VIDEO_ID
- https://youtu.be/VIDEO_ID
- https://www.youtube.com/embed/VIDEO_ID
- https://www.youtube.com/v/VIDEO_ID
"""
try:
# Pattern for youtube.com URLs
if 'youtube.com' in video_link:
parsed_url = urlparse(video_link)
if parsed_url.path == '/watch':
video_id = parse_qs(parsed_url.query).get('v')
if video_id:
return video_id[0]
elif '/embed/' in parsed_url.path:
return parsed_url.path.split('/embed/')[1].split('?')[0]
elif '/v/' in parsed_url.path:
return parsed_url.path.split('/v/')[1].split('?')[0]

# Pattern for youtu.be URLs
elif 'youtu.be' in video_link:
parsed_url = urlparse(video_link)
return parsed_url.path.lstrip('/')

# If it's already just the video ID (11 characters)
elif re.match(r'^[A-Za-z0-9_-]{11}$', video_link.strip()):
return video_link.strip()

return None

except Exception as e:
return None


def extract_languages(video_id):
"""
Extract available transcript languages for a YouTube video.
Returns tuple of (language_list, language_dict) or (None, None) on error.
"""
try:
# Create YouTubeTranscriptApi instance
ytt_api = YouTubeTranscriptApi()

# Fetch the List of Available Transcripts for Given Video
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
# Fetch the List of Available Transcripts for Given Video
transcript_list = ytt_api.list(video_id)

# Extract the Language Codes from List ---> ['en','ta']
available_transcripts = [i.language_code for i in transcript_list]
# Extract the Language Codes from List ---> ['en','ta']
available_transcripts = [i.language_code for i in transcript_list]

# Convert Language_codes to Human-Readable Language_names ---> 'en' into 'English'
language_list = list({langcodes.Language.get(i).display_name() for i in available_transcripts})
# Convert Language_codes to Human-Readable Language_names ---> 'en' into 'English'
language_list = list({langcodes.Language.get(i).display_name() for i in available_transcripts})

# Create a Dictionary Mapping Language_names to Language_codes
language_dict = {langcodes.Language.get(i).display_name():i for i in available_transcripts}
# Create a Dictionary Mapping Language_names to Language_codes
language_dict = {langcodes.Language.get(i).display_name():i for i in available_transcripts}

return language_list, language_dict
return language_list, language_dict

except Exception as e:
st.error(f"Error fetching transcripts: {str(e)}")
return None, None



def extract_transcript(video_id, language):

"""
Extract transcript text for a YouTube video in specified language.
Returns transcript string or None on error.
"""
try:
# Request Transcript for YouTube Video using API
transcript_content = YouTubeTranscriptApi.get_transcript(video_id=video_id, languages=[language])

# Create YouTubeTranscriptApi instance
ytt_api = YouTubeTranscriptApi()

# Get list of available transcripts
transcript_list = ytt_api.list(video_id)

# Find transcript in the specified language
transcript = transcript_list.find_transcript([language])

# Fetch the actual transcript content
transcript_content = transcript.fetch()

# Extract Transcript Content from JSON Response and Join to Single Response
transcript = ' '.join([i['text'] for i in transcript_content])
transcript_text = ' '.join([i.text for i in transcript_content])

return transcript_text

return transcript


except Exception as e:
add_vertical_space(5)
st.markdown(f'<h5 style="text-position:center;color:orange;">{e}</h5>', unsafe_allow_html=True)
st.error(f"Error extracting transcript: {str(e)}")
return None



def generate_summary(transcript_text):

"""
Generate AI-powered summary using Google Gemini.
Returns summary string or None on error.
"""
try:
# Check if API key exists
api_key = os.getenv('GOOGLE_API_KEY')
if not api_key:
st.error("Google API key not found. Please add GOOGLE_API_KEY to your .env file.")
return None

# Configures the genai Library
genai.configure(api_key=os.environ['GOOGLE_API_KEY'])
genai.configure(api_key=api_key)

# Initializes a Gemini-Pro Generative Model
model = genai.GenerativeModel(model_name = 'gemini-pro')
# Initializes a Gemini 2.5 Flash Generative Model
model = genai.GenerativeModel(model_name='gemini-2.5-flash')

# Define a Prompt for AI Model
prompt = """You are a YouTube video summarizer. You will be taking the transcript text and summarizing the entire video,
providing the important points are proper sub-heading in a concise manner (within 500 words).
prompt = """You are a YouTube video summarizer. You will be taking the transcript text and summarizing the entire video,
providing the important points with proper sub-headings in a concise manner (within 500 words).
Please provide the summary of the text given here: """

response = model.generate_content(prompt + transcript_text)

return response.text

except Exception as e:
add_vertical_space(5)
st.markdown(f'<h5 style="text-position:center;color:orange;">{e}</h5>', unsafe_allow_html=True)
st.error(f"Error generating summary: {str(e)}")
return None



def main():

# Filter the Warnings
filterwarnings(action='ignore')

# Load the Environment Variables
load_dotenv()

Expand All @@ -112,67 +178,73 @@ def main():

# Initialize the Button Variable
button = False
video_id = None
language = None

with st.sidebar:

image_url = 'https://raw.githubusercontent.com/gopiashokan/YouTube-Video-Transcript-Summarizer-with-GenAI/main/image/youtube_banner.JPG'
st.image(image_url, use_column_width=True)
st.image(image_url, use_container_width=True)
add_vertical_space(2)

# Get YouTube Video Link From User
# Get YouTube Video Link From User
video_link = st.text_input(label='Enter YouTube Video Link')

if video_link:
# Extract the Video ID From URL
video_id = video_link.split('=')[1].split('&')[0]

# Extract Language from Video_ID
language_list, language_dict = extract_languages(video_id)

# User Select the Transcript Language
language_input = st.selectbox(label='Select Transcript Language',
options=language_list)

# Get Language_code from Dict
language = language_dict[language_input]

# Click Submit Button
add_vertical_space(1)
button = st.button(label='Submit')

video_id = extract_video_id(video_link)

if not video_id:
st.error("Invalid YouTube URL. Please enter a valid YouTube video link.")
else:
# Extract Language from Video_ID
language_list, language_dict = extract_languages(video_id)

if language_list and language_dict:
# User Select the Transcript Language
language_input = st.selectbox(label='Select Transcript Language',
options=language_list)

# Get Language_code from Dict
language = language_dict[language_input]

# Click Submit Button
add_vertical_space(1)
button = st.button(label='Submit')


# User Enter the Video Link and Click Submit Button
if button and video_link:
if button and video_link and video_id and language:

# UI Split into Columns
_, col2, _ = st.columns([0.07,0.83,0.1])

# Display the Video Thumbnail Image
with col2:
st.image(image=f'http://img.youtube.com/vi/{video_id}/0.jpg',
use_column_width=True)
st.image(image=f'http://img.youtube.com/vi/{video_id}/0.jpg',
use_container_width=True)

# Extract Transcript from YouTube Video
add_vertical_space(2)
with st.spinner(text='Extracting Transcript...'):
transcript_text = extract_transcript(video_id, language)

if not transcript_text:
st.error("Failed to extract transcript. Please try again.")
return

# Generating Summary using Gemini AI
with st.spinner(text='Generating Summary...'):
summary = generate_summary(transcript_text)

# Display the Summary
if summary:
st.write(summary)
else:
st.error("Failed to generate summary. Please try again.")



if __name__ == '__main__':

try:
main()

except Exception as e:
add_vertical_space(5)
st.markdown(f'<h5 style="text-position:center;color:orange;">{e}</h5>', unsafe_allow_html=True)
main()

2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
python-dotenv
streamlit
streamlit-extras
youtube-transcript-api
youtube-transcript-api>=0.6.0
google-generativeai
langcodes
language_data