kamau1 commited on
Commit
4d49ee8
·
verified ·
1 Parent(s): 2b61740

Update pages/04_🔊_Upload_Audio_File.py

Browse files
Files changed (1) hide show
  1. pages/04_🔊_Upload_Audio_File.py +40 -7
pages/04_🔊_Upload_Audio_File.py CHANGED
@@ -2,6 +2,7 @@ import whisper
2
  import streamlit as st
3
  from streamlit_lottie import st_lottie
4
  from utils import write_vtt, write_srt
 
5
  import ffmpeg
6
  import requests
7
  from typing import Iterator
@@ -36,11 +37,11 @@ with col1:
36
 
37
  with col2:
38
  st.write("""
39
- ## Auto Transcriber
40
  ##### Input an audio file and get a transcript.
41
  ###### ➠ If you want to transcribe the audio in its original language, select the task as "Transcribe"
42
- ###### ➠ If you want to translate the transcription to English, select the task as "Translate"
43
- ###### I recommend starting with the base model and then experimenting with the larger models, the small and medium models often work well. """)
44
 
45
  loaded_model = whisper.load_model("base")
46
  current_size = "None"
@@ -92,6 +93,32 @@ def getSubs(segments: Iterator[dict], format: str, maxLineWidth: int) -> str:
92
  segmentStream.seek(0)
93
  return segmentStream.read()
94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
  def main():
97
  size = st.selectbox("Select Model Size (The larger the model, the more accurate the transcription will be, but it will take longer)", ["tiny", "base", "small", "medium", "large"], index=1)
@@ -103,7 +130,7 @@ def main():
103
  filename = input_file.name[:-4]
104
  else:
105
  filename = None
106
- task = st.selectbox("Select Task", ["Transcribe", "Translate"], index=0)
107
  if task == "Transcribe":
108
  if st.button("Transcribe"):
109
  results = inferecence(loaded_model, input_file, task)
@@ -150,7 +177,7 @@ def main():
150
  with col10:
151
  st.info("Streamlit refreshes after the download button is clicked. The data is cached so you can download the transcript again without having to transcribe the video again.")
152
 
153
- elif task == "Translate":
154
  if st.button("Translate to English"):
155
  results = inferecence(loaded_model, input_file, task)
156
  col3, col4 = st.columns(2)
@@ -195,11 +222,17 @@ def main():
195
  st.success("You can download the transcript in .srt format, edit it (if you need to) and upload it to YouTube to create subtitles for your video.")
196
  with col10:
197
  st.info("Streamlit refreshes after the download button is clicked. The data is cached so you can download the transcript again without having to transcribe the video again.")
198
-
 
 
 
 
 
199
  else:
200
  st.error("Please select a task.")
201
 
202
 
203
  if __name__ == "__main__":
204
  main()
205
- st.markdown("###### Made with :heart: by [@BatuhanYılmaz](https://github.com/BatuhanYilmaz26) [![this is an image link](https://i.imgur.com/thJhzOO.png)](https://www.buymeacoffee.com/batuhanylmz)")
 
 
2
  import streamlit as st
3
  from streamlit_lottie import st_lottie
4
  from utils import write_vtt, write_srt
5
+ from flores200_codes import flores_codes
6
  import ffmpeg
7
  import requests
8
  from typing import Iterator
 
37
 
38
  with col2:
39
  st.write("""
40
+ ## Sema Audio Transcriber
41
  ##### Input an audio file and get a transcript.
42
  ###### ➠ If you want to transcribe the audio in its original language, select the task as "Transcribe"
43
+ ###### ➠ If you want to translate the subtitles to English, select the task as "Translate with Whisper"
44
+ ###### If you want to translate the subtitles from English to any of the 200 supported languages, select the task as "Translate with Sema" """)
45
 
46
  loaded_model = whisper.load_model("base")
47
  current_size = "None"
 
93
  segmentStream.seek(0)
94
  return segmentStream.read()
95
 
96
+ def translate(userinput, target_lang, source_lang=None):
97
+ if source_lang:
98
+ url = f"{Public_Url}/translate_enter/"
99
+ data = {
100
+ "userinput": userinput,
101
+ "source_lang": source_lang,
102
+ "target_lang": target_lang,
103
+ }
104
+ response = requests.post(url, json=data)
105
+ result = response.json()
106
+ print(type(result))
107
+ source_lange = source_lang
108
+ translation = result['translated_text']
109
+
110
+ else:
111
+ url = f"{Public_Url}/translate_detect/"
112
+ data = {
113
+ "userinput": userinput,
114
+ "target_lang": target_lang,
115
+ }
116
+
117
+ response = requests.post(url, json=data)
118
+ result = response.json()
119
+ source_lange = result['source_language']
120
+ translation = result['translated_text']
121
+ return source_lange, translation
122
 
123
  def main():
124
  size = st.selectbox("Select Model Size (The larger the model, the more accurate the transcription will be, but it will take longer)", ["tiny", "base", "small", "medium", "large"], index=1)
 
130
  filename = input_file.name[:-4]
131
  else:
132
  filename = None
133
+ task = st.selectbox("Select Task", ["Transcribe", "Translate with Whisper", "Translate with Sema"], index=0)
134
  if task == "Transcribe":
135
  if st.button("Transcribe"):
136
  results = inferecence(loaded_model, input_file, task)
 
177
  with col10:
178
  st.info("Streamlit refreshes after the download button is clicked. The data is cached so you can download the transcript again without having to transcribe the video again.")
179
 
180
+ elif task == "Translate with Whisper":
181
  if st.button("Translate to English"):
182
  results = inferecence(loaded_model, input_file, task)
183
  col3, col4 = st.columns(2)
 
222
  st.success("You can download the transcript in .srt format, edit it (if you need to) and upload it to YouTube to create subtitles for your video.")
223
  with col10:
224
  st.info("Streamlit refreshes after the download button is clicked. The data is cached so you can download the transcript again without having to transcribe the video again.")
225
+
226
+ elif task == "Translate with Sema":
227
+ default_language = "French"
228
+ target = st.selectbox("Select Language", list(flores_codes.keys()), index=list(flores_codes.keys()).index(default_language))
229
+ target_code = flores_codes[target]
230
+
231
  else:
232
  st.error("Please select a task.")
233
 
234
 
235
  if __name__ == "__main__":
236
  main()
237
+ st.markdown("###### ")
238
+ st.markdown("###### Powered by [sema © 2024](https://www.sema.wiki)")