Spaces:
Build error
Build error
Commit
·
f508160
1
Parent(s):
c18bad5
Changed app_file name.
Browse files- Interface.py +0 -75
Interface.py
DELETED
@@ -1,75 +0,0 @@
|
|
1 |
-
import glob
|
2 |
-
import random
|
3 |
-
import os
|
4 |
-
import soundfile as sf
|
5 |
-
import streamlit as st
|
6 |
-
from pydub import AudioSegment
|
7 |
-
|
8 |
-
from modules.diarization.nemo_diarization import diarization
|
9 |
-
|
10 |
-
st.title('Call Transcription demo')
|
11 |
-
st.subheader('This simple demo shows the possibilities of the ASR and NLP in the task of '
|
12 |
-
'automatic speech recognition and diarization. It works with mp3, ogg and wav files. You can randomly '
|
13 |
-
'pickup a set of images from the built-in database or try uploading your own files.')
|
14 |
-
|
15 |
-
|
16 |
-
if st.button('Try random samples from the database'):
|
17 |
-
folder = "data/datasets/crema_d_diarization_chunks"
|
18 |
-
os.makedirs(folder, exist_ok=True)
|
19 |
-
list_all_audio = glob.glob("data/datasets/crema_d_diarization_chunks/*.wav")
|
20 |
-
chosen_files = sorted(random.sample(list_all_audio, 1))
|
21 |
-
file_name = os.path.basename(chosen_files[0]).split(".")[0]
|
22 |
-
audio_file = open(chosen_files[0], 'rb')
|
23 |
-
audio_bytes = audio_file.read()
|
24 |
-
st.audio(audio_bytes)
|
25 |
-
f = sf.SoundFile(chosen_files[0])
|
26 |
-
st.write("Starting transcription. Estimated processing time: %0.1f seconds" % (f.frames / (f.samplerate * 5)))
|
27 |
-
result = diarization(chosen_files[0])
|
28 |
-
with open("info/transcripts/pred_rttms/" + file_name + ".txt") as f:
|
29 |
-
transcript = f.read()
|
30 |
-
st.write("Transcription completed.")
|
31 |
-
st.write("Number of speakers: %s" % result[file_name]["speaker_count"])
|
32 |
-
st.write("Sentences: %s" % len(result[file_name]["sentences"]))
|
33 |
-
st.write("Words: %s" % len(result[file_name]["words"]))
|
34 |
-
st.download_button(
|
35 |
-
label="Download audio transcript",
|
36 |
-
data=transcript,
|
37 |
-
file_name='transcript.txt',
|
38 |
-
mime='text/csv',
|
39 |
-
)
|
40 |
-
|
41 |
-
uploaded_file = st.file_uploader("Choose your recording with a speech",
|
42 |
-
accept_multiple_files=False, type=["mp3", "wav", "ogg"])
|
43 |
-
if uploaded_file is not None:
|
44 |
-
folder = "data/user_data/"
|
45 |
-
os.makedirs(folder, exist_ok=True)
|
46 |
-
for f in glob.glob(folder + '*'):
|
47 |
-
os.remove(f)
|
48 |
-
save_path = folder + uploaded_file.name
|
49 |
-
if ".mp3" in uploaded_file:
|
50 |
-
sound = AudioSegment.from_mp3(uploaded_file)
|
51 |
-
elif ".ogg" in uploaded_file:
|
52 |
-
sound = AudioSegment.from_ogg(uploaded_file)
|
53 |
-
else:
|
54 |
-
sound = AudioSegment.from_wav(uploaded_file)
|
55 |
-
sound.export(save_path, format="wav", parameters=["-ac", "1"])
|
56 |
-
file_name = os.path.basename(save_path).split(".")[0]
|
57 |
-
audio_file = open(save_path, 'rb')
|
58 |
-
audio_bytes = audio_file.read()
|
59 |
-
st.audio(audio_bytes)
|
60 |
-
f = sf.SoundFile(save_path)
|
61 |
-
st.write("Starting transcription. Estimated processing time: %0.0f minutes and %02.0f seconds"
|
62 |
-
% ((f.frames / (f.samplerate * 3) // 60), (f.frames / (f.samplerate * 3) % 60)))
|
63 |
-
result = diarization(save_path)
|
64 |
-
with open("info/transcripts/pred_rttms/" + file_name + ".txt") as f:
|
65 |
-
transcript = f.read()
|
66 |
-
st.write("Transcription completed.")
|
67 |
-
st.write("Number of speakers: %s" % result[file_name]["speaker_count"])
|
68 |
-
st.write("Sentences: %s" % len(result[file_name]["sentences"]))
|
69 |
-
st.write("Words: %s" % len(result[file_name]["words"]))
|
70 |
-
st.download_button(
|
71 |
-
label="Download audio transcript",
|
72 |
-
data=transcript,
|
73 |
-
file_name='transcript.txt',
|
74 |
-
mime='text/csv',
|
75 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|