JosefBirman's picture
Update app.py
b098f80
import whisper
import os
import ffmpeg
import textwrap
from flask import Flask
from pytube import YouTube
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import SRTFormatter
from deep_translator import GoogleTranslator
'''
to run api paste " uvicorn milestone-2:app " in terminal
'''
def download_audio(url:str, download_path:str):
try:
yt = YouTube(url)
audio = yt.streams.filter(only_audio=True).first()
vid_title = yt.title
file_name = vid_title + '.mp3'
audio.download(output_path=download_path, filename=file_name)
except KeyError:
return 400, "Error: audio souce not avaliable or cannot be download"
except ValueError:
return 400, "Error: invalide URL"
except Exception as e:
return 400, "Error downloading video: " + str(e)
return os.path.join(download_path, file_name)
def download_captions(url:str, download_path:str):
formatter = SRTFormatter()
try:
yt = YouTube(url)
vid_id = url.split("v=")[1]
caption = YouTubeTranscriptApi.get_transcript(vid_id)
srt_formatted = formatter.format_transcript(caption)
file_name = yt.title + '.srt'
file_path = os.path.join(download_path, file_name)
with open(file_path, 'w', encoding='utf-8') as srt_file:
srt_file.write(srt_formatted)
except KeyError:
return 400, "Error: video not avaliable or cannot be download"
except ValueError:
return 400, "Error: invalide URL"
except Exception as e:
400, "Error extracting transcript from: " + str(e)
srt_file.close()
def sep_audio(video:str, output_path): #seperates audio from video file
try:
input = ffmpeg.input(video)
audio = input.audio.filter("anull")
except FileNotFoundError:
print("%s file couldn't be accessed"%video)
temp = video.split('/')[-1] #gets last element if a file path
file_name = temp.split('.')[0] + '.mp3'
file_path = os.path.join(output_path, file_name)
try:
output = ffmpeg.output(audio, file_path)
output.run()
return file_path
except:
print("error creating audio file")
def transcribe_audio(input_file:str, output_path:str): #eventually add a check for if file is mp3
try:
model = whisper.load_model("base")
result = model.transcribe(input_file)
except FileNotFoundError:
print("%s file was not found " % input_file)
try:
file_name = input_file.split('/')[-1]
file_name = file_name.split('.')[0]
file_path = os.path.join(output_path, file_name) + ".txt"
with open(file_path, 'w', encoding='utf-8') as out_file:
wrapped_text = textwrap.fill(result["text"], width=100)
out_file.write(wrapped_text)
except FileNotFoundError:
print("%s this dir can't be accessed " % output_path)
out_file.close()
return(file_path)
def translate_text(input_file:str, output_path:str, lang: str):
translator = GoogleTranslator(source= 'english', target=lang)
try: #try to open our caption file
in_file = open(input_file, 'r', encoding="utf8") #opening file to read
except FileNotFoundError:
print("%s file was not found " % input_file)
try: #try to create a new file to store translation
out_file_name = (input_file.split('/')[-1]).split('.')[0] + ' translation.txt' # we do a split incase file is abs path then take old name
out_file_path = os.path.join(output_path, out_file_name)
out_file = open(out_file_path, 'w', encoding='utf8')
except FileNotFoundError:
print("%s this dir can't be accessed " % output_path)
for i in in_file.readlines(): #reading all files in the 'captions' directory
translated_line = translator.translate(i)
out_file.write(translated_line+'\n')
print('%s has be sucessfully translate' % input_file)
in_file.close()
out_file.close()
### FRONT END ###
import streamlit as st
from transformers import pipeline
pipe = pipeline('video-translation')
text = st.text_area('enter a video url!')
if text:
out = pipe(text)
st.json(out)