Spaces:

BatuhanYilmaz
/

Whisper-Auto-Subtitled-Video-Generator

Running

App Files Files Community

BatuhanYilmaz commited on Aug 24

Commit

f13254e

•

1 Parent(s): 0a8bf2e

Update 01_🎥_Input_YouTube_Link.py

Browse files

Files changed (1) hide show

01_🎥_Input_YouTube_Link.py +92 -92

01_🎥_Input_YouTube_Link.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import whisper
-from pytube import YouTube
 import requests
 import time
 import streamlit as st
@@ -11,17 +12,39 @@ from io import StringIO
 from utils import write_vtt, write_srt
 import ffmpeg
 from languages import LANGUAGES
-st.set_page_config(page_title="Auto Subtitled Video Generator ", page_icon=":movie_camera:", layout="wide")
 # Define a function that we can use to load lottie files from a link.
-@st.cache()
 def load_lottieurl(url: str):
     r = requests.get(url)
     if r.status_code != 200:
         return None
     return r.json()
 col1, col2 = st.columns([1, 3])
 with col1:
     lottie = load_lottieurl("https://assets8.lottiefiles.com/packages/lf20_jh9gfdye.json")
@@ -34,24 +57,12 @@ with col2:
     ###### ➠ If you want to transcribe the video in its original language, select the task as "Transcribe"
     ###### ➠ If you want to translate the subtitles to English, select the task as "Translate"
     ###### I recommend starting with the base model and then experimenting with the larger models, the small and medium models often work well. """)
-@st.cache(allow_output_mutation=True)
-def populate_metadata(link):
-    yt = YouTube(link)
-    author = yt.author
-    title = yt.title
-    description = yt.description
-    thumbnail = yt.thumbnail_url
-    length = yt.length
-    views = yt.views
-    return author, title, description, thumbnail, length, views
-@st.cache(allow_output_mutation=True)
 def download_video(link):
-    yt = YouTube(link)
-    video = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first().download()
     return video
@@ -59,11 +70,6 @@ def convert(seconds):
     return time.strftime("%H:%M:%S", time.gmtime(seconds))
-loaded_model = whisper.load_model("base")
-current_size = "None"
-@st.cache(allow_output_mutation=True)
 def change_model(current_size, size):
     if current_size != size:
         loaded_model = whisper.load_model(size)
@@ -72,10 +78,10 @@ def change_model(current_size, size):
         raise Exception("Model size is the same as the current size.")
-@st.cache(allow_output_mutation=True)
 def inference(link, loaded_model, task):
-    yt = YouTube(link)
-    path = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp3")
     if task == "Transcribe":
         options = dict(task="transcribe", best_of=5)
         results = loaded_model.transcribe(path, **options)
@@ -94,7 +100,6 @@ def inference(link, loaded_model, task):
         raise ValueError("Task not supported")
-@st.cache(allow_output_mutation=True)
 def getSubs(segments: Iterator[dict], format: str, maxLineWidth: int) -> str:
     segmentStream = StringIO()
@@ -120,8 +125,8 @@ def get_language_code(language):
 def generate_subtitled_video(video, audio, transcript):
     video_file = ffmpeg.input(video)
     audio_file = ffmpeg.input(audio)
-    ffmpeg.concat(video_file.filter("subtitles", transcript), audio_file, v=1, a=1).output("final.mp4").run(quiet=True, overwrite_output=True)
-    video_with_subs = open("final.mp4", "rb")
     return video_with_subs
@@ -130,25 +135,27 @@ def main():
     loaded_model = change_model(current_size, size)
     st.write(f"Model is {'multilingual' if loaded_model.is_multilingual else 'English-only'} "
         f"and has {sum(np.prod(p.shape) for p in loaded_model.parameters()):,} parameters.")
-    link = st.text_input("YouTube Link (The longer the video, the longer the processing time)")
     task = st.selectbox("Select Task", ["Transcribe", "Translate"], index=0)
     if task == "Transcribe":
         if st.button("Transcribe"):
-            author, title, description, thumbnail, length, views = populate_metadata(link)
-            results = inference(link, loaded_model, task)
             video = download_video(link)
             lang = results[3]
             detected_language = get_language_code(lang)
             col3, col4 = st.columns(2)
-            col5, col6, col7, col8 = st.columns(4)
-            col9, col10 = st.columns(2)
             with col3:
                 st.video(video)
-            # Write the results to a .txt file and download it.
             with open("transcript.txt", "w+", encoding='utf8') as f:
-                f.writelines(results[0])
                 f.close()
             with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
                 datatxt = f.read()
@@ -164,50 +171,47 @@ def main():
                 f.close()
             with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
                 datasrt = f.read()
-            with col5:
-                st.download_button(label="Download Transcript (.txt)",
-                                data=datatxt,
-                                file_name="transcript.txt")
-            with col6:
-                st.download_button(label="Download Transcript (.vtt)",
-                                    data=datavtt,
-                                    file_name="transcript.vtt")
-            with col7:
-                st.download_button(label="Download Transcript (.srt)",
-                                    data=datasrt,
-                                    file_name="transcript.srt")
-            with col9:
-                st.success("You can download the transcript in .srt format, edit it (if you need to) and upload it to YouTube to create subtitles for your video.")
-            with col10:
-                st.info("Streamlit refreshes after the download button is clicked. The data is cached so you can download the transcript again without having to transcribe the video again.")
             with col4:
-                with st.spinner("Generating Subtitled Video "):
-                    video_with_subs = generate_subtitled_video(video, "audio.mp3", "transcript.srt")
                 st.video(video_with_subs)
                 st.balloons()
-            with col8:
-                st.download_button(label="Download Subtitled Video",
-                                    data=video_with_subs,
-                                    file_name=f"{title} with subtitles.mp4")
     elif task == "Translate":
         if st.button("Translate to English"):
-            author, title, description, thumbnail, length, views = populate_metadata(link)
-            results = inference(link, loaded_model, task)
             video = download_video(link)
             lang = results[3]
             detected_language = get_language_code(lang)
             col3, col4 = st.columns(2)
-            col5, col6, col7, col8 = st.columns(4)
-            col9, col10 = st.columns(2)
             with col3:
                 st.video(video)
-            # Write the results to a .txt file and download it.
             with open("transcript.txt", "w+", encoding='utf8') as f:
-                f.writelines(results[0])
                 f.close()
             with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
                 datatxt = f.read()
@@ -223,36 +227,32 @@ def main():
                 f.close()
             with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
                 datasrt = f.read()
-            with col5:
-                st.download_button(label="Download Transcript (.txt)",
-                                data=datatxt,
-                                file_name="transcript.txt")
-            with col6:
-                st.download_button(label="Download Transcript (.vtt)",
-                                    data=datavtt,
-                                    file_name="transcript.vtt")
-            with col7:
-                st.download_button(label="Download Transcript (.srt)",
-                                    data=datasrt,
-                                    file_name="transcript.srt")
-            with col9:
-                st.success("You can download the transcript in .srt format, edit it (if you need to) and upload it to YouTube to create subtitles for your video.")
-            with col10:
-                st.info("Streamlit refreshes after the download button is clicked. The data is cached so you can download the transcript again without having to transcribe the video again.")
             with col4:
-                with st.spinner("Generating Subtitled Video "):
-                    video_with_subs = generate_subtitled_video(video, "audio.mp3", "transcript.srt")
                 st.video(video_with_subs)
                 st.balloons()
-            with col8:
-                st.download_button(label="Download Subtitled Video ",
-                                    data=video_with_subs,
-                                    file_name=f"{title} with subtitles.mp4")
     else:
-        st.error("Please select a task.")
 if __name__ == "__main__":
     main()
-    st.markdown("###### Made with :heart: by [@BatuhanYılmaz](https://github.com/BatuhanYilmaz26) [![this is an image link](https://i.imgur.com/thJhzOO.png)](https://www.buymeacoffee.com/batuhanylmz)")

 import whisper
+from pytubefix import YouTube
+from pytubefix.cli import on_progress
 import requests
 import time
 import streamlit as st
 from utils import write_vtt, write_srt
 import ffmpeg
 from languages import LANGUAGES
+import torch
+from zipfile import ZipFile
+from io import BytesIO
+import base64
+import pathlib
+import re
+st.set_page_config(page_title="Auto Subtitled Video Generator", page_icon=":movie_camera:", layout="wide")
+torch.cuda.is_available()
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+# Model options: tiny, base, small, medium, large
+loaded_model = whisper.load_model("small", device=DEVICE)
+current_size = "None"
 # Define a function that we can use to load lottie files from a link.
 def load_lottieurl(url: str):
     r = requests.get(url)
     if r.status_code != 200:
         return None
     return r.json()
+APP_DIR = pathlib.Path(__file__).parent.absolute()
+LOCAL_DIR = APP_DIR / "local_youtube"
+LOCAL_DIR.mkdir(exist_ok=True)
+save_dir = LOCAL_DIR / "output"
+save_dir.mkdir(exist_ok=True)
 col1, col2 = st.columns([1, 3])
 with col1:
     lottie = load_lottieurl("https://assets8.lottiefiles.com/packages/lf20_jh9gfdye.json")
     ###### ➠ If you want to transcribe the video in its original language, select the task as "Transcribe"
     ###### ➠ If you want to translate the subtitles to English, select the task as "Translate"
     ###### I recommend starting with the base model and then experimenting with the larger models, the small and medium models often work well. """)
 def download_video(link):
+    yt = YouTube(link, on_progress_callback=on_progress)
+    ys = yt.streams.get_highest_resolution()
+    video = ys.download(filename=f"{save_dir}/youtube_video.mp4")
     return video
     return time.strftime("%H:%M:%S", time.gmtime(seconds))
 def change_model(current_size, size):
     if current_size != size:
         loaded_model = whisper.load_model(size)
         raise Exception("Model size is the same as the current size.")
 def inference(link, loaded_model, task):
+    yt = YouTube(link, on_progress_callback=on_progress)
+    ys = yt.streams.get_audio_only()
+    path = ys.download(filename=f"{save_dir}/audio.mp3", mp3=True)
     if task == "Transcribe":
         options = dict(task="transcribe", best_of=5)
         results = loaded_model.transcribe(path, **options)
         raise ValueError("Task not supported")
 def getSubs(segments: Iterator[dict], format: str, maxLineWidth: int) -> str:
     segmentStream = StringIO()
 def generate_subtitled_video(video, audio, transcript):
     video_file = ffmpeg.input(video)
     audio_file = ffmpeg.input(audio)
+    ffmpeg.concat(video_file.filter("subtitles", transcript), audio_file, v=1, a=1).output("youtube_sub.mp4").run(quiet=True, overwrite_output=True)
+    video_with_subs = open("youtube_sub.mp4", "rb")
     return video_with_subs
     loaded_model = change_model(current_size, size)
     st.write(f"Model is {'multilingual' if loaded_model.is_multilingual else 'English-only'} "
         f"and has {sum(np.prod(p.shape) for p in loaded_model.parameters()):,} parameters.")
+    link = st.text_input("YouTube Link (The longer the video, the longer the processing time)", placeholder="Input YouTube link and press enter")
     task = st.selectbox("Select Task", ["Transcribe", "Translate"], index=0)
     if task == "Transcribe":
         if st.button("Transcribe"):
+            with st.spinner("Transcribing the video..."):
+                results = inference(link, loaded_model, task)
             video = download_video(link)
             lang = results[3]
             detected_language = get_language_code(lang)
             col3, col4 = st.columns(2)
             with col3:
                 st.video(video)
+            # Split result["text"]  on !,? and . , but save the punctuation
+            sentences = re.split("([!?.])", results[0])
+            # Join the punctuation back to the sentences
+            sentences = ["".join(i) for i in zip(sentences[0::2], sentences[1::2])]
+            text = "\n\n".join(sentences)
             with open("transcript.txt", "w+", encoding='utf8') as f:
+                f.writelines(text)
                 f.close()
             with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
                 datatxt = f.read()
                 f.close()
             with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
                 datasrt = f.read()
             with col4:
+                with st.spinner("Generating Subtitled Video"):
+                    video_with_subs = generate_subtitled_video(video, f"{save_dir}/audio.mp3", "transcript.srt")
                 st.video(video_with_subs)
                 st.balloons()
+            zipObj = ZipFile("YouTube_transcripts_and_video.zip", "w")
+            zipObj.write("transcript.txt")
+            zipObj.write("transcript.vtt")
+            zipObj.write("transcript.srt")
+            zipObj.write("youtube_sub.mp4")
+            zipObj.close()
+            ZipfileDotZip = "YouTube_transcripts_and_video.zip"
+            with open(ZipfileDotZip, "rb") as f:
+                datazip = f.read()
+                b64 = base64.b64encode(datazip).decode()
+                href = f"<a href=\"data:file/zip;base64,{b64}\" download='{ZipfileDotZip}'>\
+        Download Transcripts and Video\
+    </a>"
+            st.markdown(href, unsafe_allow_html=True)
     elif task == "Translate":
         if st.button("Translate to English"):
+            with st.spinner("Translating to English..."):
+                results = inference(link, loaded_model, task)
             video = download_video(link)
             lang = results[3]
             detected_language = get_language_code(lang)
             col3, col4 = st.columns(2)
             with col3:
                 st.video(video)
+            # Split result["text"]  on !,? and . , but save the punctuation
+            sentences = re.split("([!?.])", results[0])
+            # Join the punctuation back to the sentences
+            sentences = ["".join(i) for i in zip(sentences[0::2], sentences[1::2])]
+            text = "\n\n".join(sentences)
             with open("transcript.txt", "w+", encoding='utf8') as f:
+                f.writelines(text)
                 f.close()
             with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
                 datatxt = f.read()
                 f.close()
             with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
                 datasrt = f.read()
             with col4:
+                with st.spinner("Generating Subtitled Video"):
+                    video_with_subs = generate_subtitled_video(video, f"{save_dir}/audio.mp3", "transcript.srt")
                 st.video(video_with_subs)
                 st.balloons()
+            zipObj = ZipFile("YouTube_transcripts_and_video.zip", "w")
+            zipObj.write("transcript.txt")
+            zipObj.write("transcript.vtt")
+            zipObj.write("transcript.srt")
+            zipObj.write("youtube_sub.mp4")
+            zipObj.close()
+            ZipfileDotZip = "YouTube_transcripts_and_video.zip"
+            with open(ZipfileDotZip, "rb") as f:
+                datazip = f.read()
+                b64 = base64.b64encode(datazip).decode()
+                href = f"<a href=\"data:file/zip;base64,{b64}\" download='{ZipfileDotZip}'>\
+        Download Transcripts and Video\
+    </a>"
+            st.markdown(href, unsafe_allow_html=True)
     else:
+        st.info("Please select a task.")
 if __name__ == "__main__":
     main()