JosefBirman commited on
Commit
df07f44
·
1 Parent(s): 1718583

added functions for video download, captions, translations, and audio

Browse files
Files changed (1) hide show
  1. app.py +152 -2
app.py CHANGED
@@ -1,4 +1,154 @@
1
  import streamlit as st
 
 
 
 
 
 
 
 
 
2
 
3
- x = st.slider('Select a value')
4
- st.write(x, 'squared is', x * x)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import whisper
3
+ import os
4
+ import ffmpeg
5
+ import textwrap
6
+ from flask import Flask
7
+ from pytube import YouTube
8
+ from youtube_transcript_api import YouTubeTranscriptApi
9
+ from youtube_transcript_api.formatters import SRTFormatter
10
+ from deep_translator import GoogleTranslator
11
 
12
+
13
+ '''
14
+ to run api paste " uvicorn milestone-2:app " in terminal
15
+ '''
16
+
17
+ def download_audio(url:str, download_path:str):
18
+
19
+ try:
20
+ yt = YouTube(url)
21
+ audio = yt.streams.filter(only_audio=True).first()
22
+ vid_title = yt.title
23
+ file_name = vid_title + '.mp3'
24
+ audio.download(output_path=download_path, filename=file_name)
25
+
26
+ except KeyError:
27
+ return 400, "Error: audio souce not avaliable or cannot be download"
28
+ except ValueError:
29
+ return 400, "Error: invalide URL"
30
+ except Exception as e:
31
+ return 400, "Error downloading video: " + str(e)
32
+
33
+ return os.path.join(download_path, file_name)
34
+
35
+
36
+ def download_captions(url:str, download_path:str):
37
+
38
+ formatter = SRTFormatter()
39
+
40
+ try:
41
+ yt = YouTube(url)
42
+ vid_id = url.split("v=")[1]
43
+ caption = YouTubeTranscriptApi.get_transcript(vid_id)
44
+ srt_formatted = formatter.format_transcript(caption)
45
+ file_name = yt.title + '.srt'
46
+ file_path = os.path.join(download_path, file_name)
47
+ with open(file_path, 'w', encoding='utf-8') as srt_file:
48
+ srt_file.write(srt_formatted)
49
+ except KeyError:
50
+ return 400, "Error: video not avaliable or cannot be download"
51
+ except ValueError:
52
+ return 400, "Error: invalide URL"
53
+ except Exception as e:
54
+ 400, "Error extracting transcript from: " + str(e)
55
+
56
+ srt_file.close()
57
+
58
+ def sep_audio(video:str, output_path): #seperates audio from video file
59
+
60
+ try:
61
+ input = ffmpeg.input(video)
62
+ audio = input.audio.filter("anull")
63
+ except FileNotFoundError:
64
+ print("%s file couldn't be accessed"%video)
65
+
66
+ temp = video.split('/')[-1] #gets last element if a file path
67
+ file_name = temp.split('.')[0] + '.mp3'
68
+ file_path = os.path.join(output_path, file_name)
69
+
70
+ try:
71
+ output = ffmpeg.output(audio, file_path)
72
+ output.run()
73
+ return file_path
74
+ except:
75
+ print("error creating audio file")
76
+
77
+
78
+ def transcribe_audio(input_file:str, output_path:str): #eventually add a check for if file is mp3
79
+ try:
80
+ model = whisper.load_model("base")
81
+ result = model.transcribe(input_file)
82
+ except FileNotFoundError:
83
+ print("%s file was not found " % input_file)
84
+
85
+ try:
86
+ file_name = input_file.split('/')[-1]
87
+ file_name = file_name.split('.')[0]
88
+ file_path = os.path.join(output_path, file_name) + ".txt"
89
+ with open(file_path, 'w', encoding='utf-8') as out_file:
90
+ wrapped_text = textwrap.fill(result["text"], width=100)
91
+ out_file.write(wrapped_text)
92
+
93
+ except FileNotFoundError:
94
+ print("%s this dir can't be accessed " % output_path)
95
+
96
+ out_file.close()
97
+ return(file_path)
98
+
99
+ def translate_text(input_file:str, output_path:str, lang: str):
100
+
101
+ translator = GoogleTranslator(source= 'english', target=lang)
102
+
103
+ try: #try to open our caption file
104
+ in_file = open(input_file, 'r', encoding="utf8") #opening file to read
105
+ except FileNotFoundError:
106
+ print("%s file was not found " % input_file)
107
+
108
+ try: #try to create a new file to store translation
109
+ out_file_name = (input_file.split('/')[-1]).split('.')[0] + ' translation.txt' # we do a split incase file is abs path then take old name
110
+ out_file_path = os.path.join(output_path, out_file_name)
111
+ out_file = open(out_file_path, 'w', encoding='utf8')
112
+ except FileNotFoundError:
113
+ print("%s this dir can't be accessed " % output_path)
114
+
115
+ for i in in_file.readlines(): #reading all files in the 'captions' directory
116
+ translated_line = translator.translate(i)
117
+ out_file.write(translated_line+'\n')
118
+
119
+ print('%s has be sucessfully translate' % input_file)
120
+ in_file.close()
121
+ out_file.close()
122
+
123
+
124
+
125
+ app = Flask(__name__)
126
+
127
+
128
+ @app.route("/")
129
+
130
+ def main():
131
+ audio_dir_name = 'audio'
132
+ video_dir_name = 'videos'
133
+ whisper_dir_name = 'whisper_transcripts'
134
+ current_dir = os.getcwd()
135
+ audio_dir_path = os.path.join(current_dir, audio_dir_name)
136
+ video_dir_path = os.path.join(current_dir, video_dir_name)
137
+ whisper_dir_path = os.path.join(current_dir, whisper_dir_name)
138
+
139
+ if(not(os.path.exists(audio_dir_path))):
140
+ os.makedirs(audio_dir_path)
141
+ if(not(os.path.exists(whisper_dir_path))):
142
+ os.makedirs(whisper_dir_path)
143
+
144
+
145
+ files = os.listdir(video_dir_path) # accessing all files in audio dir
146
+ in_file = os.path.join(video_dir_path, files[0]) #grabs first file in the directory
147
+
148
+
149
+ translate_text(transcribe_audio(sep_audio(in_file,audio_dir_path),whisper_dir_path),current_dir,'german')
150
+
151
+ return 200, "audio has been transcribed successfully"
152
+
153
+ if __name__ == "__main__":
154
+ app.run(debug=True)