Spaces:
Sleeping
Sleeping
from pytube import YouTube | |
from transformers import pipeline | |
import streamlit as st | |
import os | |
model = whisper.load_model("base") | |
summarizer = pipeline("summarization") | |
def get_audio_from_video(file_path): | |
# You can use a library like moviepy or ffmpeg to extract audio from the video | |
# For simplicity, I'm assuming the video is already in a format that the model can transcribe | |
return file_path | |
def get_audio(url): | |
yt = YouTube(url) | |
video = yt.streams.filter(only_audio=True).first() | |
out_file = video.download(output_path=".") | |
base, ext = os.path.splitext(out_file) | |
new_file = base + '.mp3' | |
os.rename(out_file, new_file) | |
return new_file | |
def get_text(url=None, file_path=None): | |
if url: | |
result = model.transcribe(get_audio(url)) | |
elif file_path: | |
result = model.transcribe(get_audio_from_video(file_path)) | |
return result['text'] | |
def get_summary(url=None, file_path=None): | |
article = get_text(url, file_path) | |
b = summarizer(article) | |
b = b[0]['summary_text'] | |
return b | |
st.title("Youtube video transcription with OpenAI's Whisper") | |
st.write("Enter the link of any youtube video or upload a video file to get the transcription and a summary in the form of text.") | |
option = st.radio('Choose an option', ['Get the transcription of any Youtube video', 'Summary of Youtube video', 'Upload a video file']) | |
if option == 'Get the transcription of any Youtube video': | |
url1 = st.text_input('Enter the Youtube video URL') | |
if st.button('Get Transcription'): | |
transcription = get_text(url1) | |
st.text_area('Transcription of the video', transcription) | |
elif option == 'Summary of Youtube video': | |
url2 = st.text_input('Enter the Youtube video URL') | |
if st.button('Get Summary'): | |
summary = get_summary(url2) | |
st.text_area('Summary text of the Youtube Video', summary) | |
elif option == 'Upload a video file': | |
uploaded_file = st.file_uploader("Choose a video file", type=["mp4"]) | |
if uploaded_file: | |
video_path = "temp_video_file.mp4" | |
with open(video_path, "wb") as f: | |
f.write(uploaded_file.getvalue()) | |
if st.button('Transcribe Uploaded Video'): | |
transcription = get_text(file_path=video_path) | |
st.text_area('Transcription of the uploaded video file', transcription) |