Spaces:
Runtime error
Runtime error
import whisper | |
import os | |
import ffmpeg | |
import textwrap | |
from flask import Flask | |
from pytube import YouTube | |
from youtube_transcript_api import YouTubeTranscriptApi | |
from youtube_transcript_api.formatters import SRTFormatter | |
from deep_translator import GoogleTranslator | |
''' | |
to run api paste " uvicorn milestone-2:app " in terminal | |
''' | |
def download_audio(url:str, download_path:str): | |
try: | |
yt = YouTube(url) | |
audio = yt.streams.filter(only_audio=True).first() | |
vid_title = yt.title | |
file_name = vid_title + '.mp3' | |
audio.download(output_path=download_path, filename=file_name) | |
except KeyError: | |
return 400, "Error: audio souce not avaliable or cannot be download" | |
except ValueError: | |
return 400, "Error: invalide URL" | |
except Exception as e: | |
return 400, "Error downloading video: " + str(e) | |
return os.path.join(download_path, file_name) | |
def download_captions(url:str, download_path:str): | |
formatter = SRTFormatter() | |
try: | |
yt = YouTube(url) | |
vid_id = url.split("v=")[1] | |
caption = YouTubeTranscriptApi.get_transcript(vid_id) | |
srt_formatted = formatter.format_transcript(caption) | |
file_name = yt.title + '.srt' | |
file_path = os.path.join(download_path, file_name) | |
with open(file_path, 'w', encoding='utf-8') as srt_file: | |
srt_file.write(srt_formatted) | |
except KeyError: | |
return 400, "Error: video not avaliable or cannot be download" | |
except ValueError: | |
return 400, "Error: invalide URL" | |
except Exception as e: | |
400, "Error extracting transcript from: " + str(e) | |
srt_file.close() | |
def sep_audio(video:str, output_path): #seperates audio from video file | |
try: | |
input = ffmpeg.input(video) | |
audio = input.audio.filter("anull") | |
except FileNotFoundError: | |
print("%s file couldn't be accessed"%video) | |
temp = video.split('/')[-1] #gets last element if a file path | |
file_name = temp.split('.')[0] + '.mp3' | |
file_path = os.path.join(output_path, file_name) | |
try: | |
output = ffmpeg.output(audio, file_path) | |
output.run() | |
return file_path | |
except: | |
print("error creating audio file") | |
def transcribe_audio(input_file:str, output_path:str): #eventually add a check for if file is mp3 | |
try: | |
model = whisper.load_model("base") | |
result = model.transcribe(input_file) | |
except FileNotFoundError: | |
print("%s file was not found " % input_file) | |
try: | |
file_name = input_file.split('/')[-1] | |
file_name = file_name.split('.')[0] | |
file_path = os.path.join(output_path, file_name) + ".txt" | |
with open(file_path, 'w', encoding='utf-8') as out_file: | |
wrapped_text = textwrap.fill(result["text"], width=100) | |
out_file.write(wrapped_text) | |
except FileNotFoundError: | |
print("%s this dir can't be accessed " % output_path) | |
out_file.close() | |
return(file_path) | |
def translate_text(input_file:str, output_path:str, lang: str): | |
translator = GoogleTranslator(source= 'english', target=lang) | |
try: #try to open our caption file | |
in_file = open(input_file, 'r', encoding="utf8") #opening file to read | |
except FileNotFoundError: | |
print("%s file was not found " % input_file) | |
try: #try to create a new file to store translation | |
out_file_name = (input_file.split('/')[-1]).split('.')[0] + ' translation.txt' # we do a split incase file is abs path then take old name | |
out_file_path = os.path.join(output_path, out_file_name) | |
out_file = open(out_file_path, 'w', encoding='utf8') | |
except FileNotFoundError: | |
print("%s this dir can't be accessed " % output_path) | |
for i in in_file.readlines(): #reading all files in the 'captions' directory | |
translated_line = translator.translate(i) | |
out_file.write(translated_line+'\n') | |
print('%s has be sucessfully translate' % input_file) | |
in_file.close() | |
out_file.close() | |
### FRONT END ### | |
import streamlit as st | |
from transformers import pipeline | |
pipe = pipeline('video-translation') | |
text = st.text_area('enter a video url!') | |
if text: | |
out = pipe(text) | |
st.json(out) |