justyoung commited on
Commit
047fd82
·
verified ·
1 Parent(s): a3a25bd

Upload 8 files

Browse files
Files changed (8) hide show
  1. .gitattributes +35 -35
  2. .gitignore +2 -0
  3. README.md +3 -3
  4. packages.txt +1 -0
  5. requirements.txt +10 -0
  6. setup.py +33 -0
  7. tes.py +2 -0
  8. youtube-summarizer/app.py +155 -0
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .env
2
+ venv/
README.md CHANGED
@@ -1,3 +1,3 @@
1
- ---
2
- license: afl-3.0
3
- ---
 
1
+ # Youtube AI Summarizer
2
+
3
+
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # --extra-index-url https://download.pytorch.org/whl/cu118
2
+ # torch==2.3.1+cu118
3
+ torch
4
+ git+https://github.com/huggingface/transformers
5
+ gradio
6
+ python-dotenv==1.0.1
7
+ yt_dlp==2024.8.6
8
+ google-generativeai==0.7.2
9
+ numpy<2
10
+ spaces
setup.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from setuptools import setup, find_packages
2
+
3
+ setup(
4
+ name="youtube-summarizer",
5
+ version="0.1.0",
6
+ packages=find_packages(),
7
+ install_requires=[
8
+ "torch",
9
+ "transformers",
10
+ "gradio",
11
+ "python-dotenv>=1.0.1",
12
+ "yt_dlp>=2024.8.6",
13
+ "google-generativeai>=0.7.2",
14
+ "numpy<2",
15
+ ],
16
+ entry_points={
17
+ "console_scripts": [
18
+ "youtube-summarizer=app:main",
19
+ ],
20
+ },
21
+ author="",
22
+ author_email="",
23
+ description="AI-powered YouTube video transcription and summarization tool",
24
+ long_description=open("README.md").read(),
25
+ long_description_content_type="text/markdown",
26
+ url="",
27
+ classifiers=[
28
+ "Programming Language :: Python :: 3",
29
+ "License :: OSI Approved :: MIT License",
30
+ "Operating System :: OS Independent",
31
+ ],
32
+ python_requires=">=3.8",
33
+ )
tes.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ import torch
2
+ print(torch.__version__)
youtube-summarizer/app.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import yt_dlp
3
+ from dotenv import load_dotenv
4
+ import os
5
+ import google.generativeai as genai
6
+ import re
7
+ import torch
8
+ from transformers import pipeline
9
+ from transformers.pipelines.audio_utils import ffmpeg_read
10
+ import time
11
+
12
+ load_dotenv()
13
+ default_gemini_api_key = os.getenv('gemini_api_key')
14
+
15
+ device = 0 if torch.cuda.is_available() else "cpu"
16
+
17
+ def load_pipeline(model_name):
18
+ return pipeline(
19
+ task="automatic-speech-recognition",
20
+ model=model_name,
21
+ chunk_length_s=30,
22
+ device=device,
23
+ )
24
+
25
+ def configure_genai(api_key, model_variant):
26
+ genai.configure(api_key=api_key)
27
+ return genai.GenerativeModel(model_variant)
28
+
29
+ def extract_youtube_id(youtube_url):
30
+ youtube_id_match = re.search(r'(?:v=|\/)([0-9A-Za-z_-]{11}).*', youtube_url)
31
+ if youtube_id_match:
32
+ return youtube_id_match.group(1)
33
+ return None
34
+
35
+ def download_youtube_audio(youtube_url, output_filename):
36
+ ydl_opts = {
37
+ 'format': 'bestaudio/best',
38
+ 'postprocessors': [{
39
+ 'key': 'FFmpegExtractAudio',
40
+ 'preferredcodec': 'mp3',
41
+ 'preferredquality': '192',
42
+ }],
43
+ 'outtmpl': output_filename,
44
+ }
45
+
46
+ try:
47
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
48
+ ydl.download([youtube_url])
49
+ print(f"Downloaded audio from YouTube URL: {youtube_url}")
50
+ return output_filename
51
+ except Exception as e:
52
+ print(f"Error downloading YouTube audio: {str(e)}")
53
+ raise Exception(f"Failed to download YouTube audio: {str(e)}")
54
+
55
+ def summarize_transcription(transcription, model, gemini_prompt):
56
+ try:
57
+ prompt = f"{gemini_prompt}:\n\n{transcription}"
58
+ response = model.generate_content(prompt)
59
+ return response.text
60
+ except Exception as e:
61
+ print(f"Error summarizing transcription: {str(e)}")
62
+ return f"Error summarizing transcription: {str(e)}"
63
+
64
+ def process_audio(audio_file, language, whisper_model):
65
+ print("Starting transcription...")
66
+ start_time = time.time()
67
+
68
+ if device == 0:
69
+ pipe = load_pipeline(whisper_model)
70
+ else:
71
+ pipe = load_pipeline("openai/whisper-tiny")
72
+
73
+ with open(audio_file, "rb") as f:
74
+ inputs = f.read()
75
+
76
+ inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
77
+ inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
78
+
79
+ if language:
80
+ print(f"Using language: {language}")
81
+ transcription = pipe(inputs, batch_size=8, generate_kwargs={"task": "transcribe", "language": language}, return_timestamps=True)["text"]
82
+ else:
83
+ print("No language defined, using default language")
84
+ transcription = pipe(inputs, batch_size=8, generate_kwargs={"task": "transcribe"}, return_timestamps=True)["text"]
85
+
86
+ end_time = time.time()
87
+ processing_time = round(end_time - start_time, 2)
88
+ return transcription, processing_time
89
+
90
+ def main():
91
+ parser = argparse.ArgumentParser(description='YouTube Video Transcriber and Summarizer')
92
+ input_group = parser.add_mutually_exclusive_group(required=True)
93
+ input_group.add_argument('--youtube-url', help='YouTube video URL to process')
94
+ input_group.add_argument('--audio-file', help='Local audio file to process')
95
+ parser.add_argument('--whisper-model', default='openai/whisper-large-v3',
96
+ choices=['openai/whisper-tiny', 'openai/whisper-base', 'openai/whisper-small',
97
+ 'openai/whisper-medium', 'openai/whisper-large-v3'],
98
+ help='Whisper model to use for transcription')
99
+ parser.add_argument('--gemini-api-key', help='Gemini API key (optional)')
100
+ parser.add_argument('--gemini-model', default='gemini-1.5-pro',
101
+ choices=['gemini-1.5-flash', 'gemini-1.5-pro'],
102
+ help='Gemini model variant to use')
103
+ parser.add_argument('--language', help='Language code for transcription (e.g., en, es, fr)')
104
+ parser.add_argument('--prompt', default='Create a resume from this transcript',
105
+ help='Prompt for Gemini summarization')
106
+
107
+ args = parser.parse_args()
108
+
109
+ try:
110
+ print("Initializing...")
111
+ gemini_api_key = args.gemini_api_key if args.gemini_api_key else default_gemini_api_key
112
+ model = configure_genai(gemini_api_key, args.gemini_model)
113
+
114
+ if args.youtube_url:
115
+ print("Processing YouTube URL...")
116
+ youtube_id = extract_youtube_id(args.youtube_url)
117
+ output_filename = youtube_id if youtube_id else "unknown"
118
+ print("Downloading YouTube audio...")
119
+ audio_file = download_youtube_audio(args.youtube_url, output_filename)
120
+ audio_file = f"{audio_file}.mp3"
121
+ print(f"Audio file downloaded: {audio_file}")
122
+ else:
123
+ print("Processing local audio file...")
124
+ audio_file = args.audio_file
125
+ print(f"Using audio file: {audio_file}")
126
+
127
+ print("Starting transcription...")
128
+ transcription, processing_time = process_audio(audio_file, args.language, args.whisper_model)
129
+
130
+ if args.youtube_url and os.path.exists(audio_file):
131
+ os.remove(audio_file)
132
+ print(f"Deleted temporary audio file: {audio_file}")
133
+
134
+ print("Summarizing transcription...")
135
+ summary = summarize_transcription(transcription, model, args.prompt)
136
+
137
+ print("\nSaving outputs...")
138
+ with open("transcription_output.txt", "w", encoding="utf-8") as f:
139
+ f.write(transcription)
140
+ with open("summary_output.txt", "w", encoding="utf-8") as f:
141
+ f.write(summary)
142
+
143
+ print("\nResults:")
144
+ print(f"\nTranscription (saved to transcription_output.txt):")
145
+ print(transcription)
146
+ print(f"\nSummary (saved to summary_output.txt):")
147
+ print(summary)
148
+ print(f"\nProcessing time: {processing_time} seconds")
149
+
150
+ except Exception as e:
151
+ print(f"Error: {str(e)}")
152
+ exit(1)
153
+
154
+ if __name__ == "__main__":
155
+ main()