import whisper import os import ffmpeg import textwrap from flask import Flask from pytube import YouTube from youtube_transcript_api import YouTubeTranscriptApi from youtube_transcript_api.formatters import SRTFormatter from deep_translator import GoogleTranslator ''' to run api paste " uvicorn milestone-2:app " in terminal ''' def download_audio(url:str, download_path:str): try: yt = YouTube(url) audio = yt.streams.filter(only_audio=True).first() vid_title = yt.title file_name = vid_title + '.mp3' audio.download(output_path=download_path, filename=file_name) except KeyError: return 400, "Error: audio souce not avaliable or cannot be download" except ValueError: return 400, "Error: invalide URL" except Exception as e: return 400, "Error downloading video: " + str(e) return os.path.join(download_path, file_name) def download_captions(url:str, download_path:str): formatter = SRTFormatter() try: yt = YouTube(url) vid_id = url.split("v=")[1] caption = YouTubeTranscriptApi.get_transcript(vid_id) srt_formatted = formatter.format_transcript(caption) file_name = yt.title + '.srt' file_path = os.path.join(download_path, file_name) with open(file_path, 'w', encoding='utf-8') as srt_file: srt_file.write(srt_formatted) except KeyError: return 400, "Error: video not avaliable or cannot be download" except ValueError: return 400, "Error: invalide URL" except Exception as e: 400, "Error extracting transcript from: " + str(e) srt_file.close() def sep_audio(video:str, output_path): #seperates audio from video file try: input = ffmpeg.input(video) audio = input.audio.filter("anull") except FileNotFoundError: print("%s file couldn't be accessed"%video) temp = video.split('/')[-1] #gets last element if a file path file_name = temp.split('.')[0] + '.mp3' file_path = os.path.join(output_path, file_name) try: output = ffmpeg.output(audio, file_path) output.run() return file_path except: print("error creating audio file") def transcribe_audio(input_file:str, output_path:str): #eventually add a check for if file is mp3 try: model = whisper.load_model("base") result = model.transcribe(input_file) except FileNotFoundError: print("%s file was not found " % input_file) try: file_name = input_file.split('/')[-1] file_name = file_name.split('.')[0] file_path = os.path.join(output_path, file_name) + ".txt" with open(file_path, 'w', encoding='utf-8') as out_file: wrapped_text = textwrap.fill(result["text"], width=100) out_file.write(wrapped_text) except FileNotFoundError: print("%s this dir can't be accessed " % output_path) out_file.close() return(file_path) def translate_text(input_file:str, output_path:str, lang: str): translator = GoogleTranslator(source= 'english', target=lang) try: #try to open our caption file in_file = open(input_file, 'r', encoding="utf8") #opening file to read except FileNotFoundError: print("%s file was not found " % input_file) try: #try to create a new file to store translation out_file_name = (input_file.split('/')[-1]).split('.')[0] + ' translation.txt' # we do a split incase file is abs path then take old name out_file_path = os.path.join(output_path, out_file_name) out_file = open(out_file_path, 'w', encoding='utf8') except FileNotFoundError: print("%s this dir can't be accessed " % output_path) for i in in_file.readlines(): #reading all files in the 'captions' directory translated_line = translator.translate(i) out_file.write(translated_line+'\n') print('%s has be sucessfully translate' % input_file) in_file.close() out_file.close() ### FRONT END ### import streamlit as st from transformers import pipeline pipe = pipeline('video-translation') text = st.text_area('enter a video url!') if text: out = pipe(text) st.json(out)