pustozerov
commited on
Commit
·
c18bad5
1
Parent(s):
ceaa240
Temporary remove samples.
Browse files- .gitattributes +5 -0
- Interface.py +75 -0
.gitattributes
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
*.bin filter=lfs diff=lfs merge=lfs -text
|
@@ -29,3 +30,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
29 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
30 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
31 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
1 |
+
<<<<<<< HEAD
|
2 |
*.7z filter=lfs diff=lfs merge=lfs -text
|
3 |
*.arrow filter=lfs diff=lfs merge=lfs -text
|
4 |
*.bin filter=lfs diff=lfs merge=lfs -text
|
|
|
30 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
31 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
32 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
33 |
+
=======
|
34 |
+
*.wav filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*.ogg filter=lfs diff=lfs merge=lfs -text
|
36 |
+
>>>>>>> 02dca0a (Temporary remove samples.)
|
Interface.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import glob
|
2 |
+
import random
|
3 |
+
import os
|
4 |
+
import soundfile as sf
|
5 |
+
import streamlit as st
|
6 |
+
from pydub import AudioSegment
|
7 |
+
|
8 |
+
from modules.diarization.nemo_diarization import diarization
|
9 |
+
|
10 |
+
st.title('Call Transcription demo')
|
11 |
+
st.subheader('This simple demo shows the possibilities of the ASR and NLP in the task of '
|
12 |
+
'automatic speech recognition and diarization. It works with mp3, ogg and wav files. You can randomly '
|
13 |
+
'pickup a set of images from the built-in database or try uploading your own files.')
|
14 |
+
|
15 |
+
|
16 |
+
if st.button('Try random samples from the database'):
|
17 |
+
folder = "data/datasets/crema_d_diarization_chunks"
|
18 |
+
os.makedirs(folder, exist_ok=True)
|
19 |
+
list_all_audio = glob.glob("data/datasets/crema_d_diarization_chunks/*.wav")
|
20 |
+
chosen_files = sorted(random.sample(list_all_audio, 1))
|
21 |
+
file_name = os.path.basename(chosen_files[0]).split(".")[0]
|
22 |
+
audio_file = open(chosen_files[0], 'rb')
|
23 |
+
audio_bytes = audio_file.read()
|
24 |
+
st.audio(audio_bytes)
|
25 |
+
f = sf.SoundFile(chosen_files[0])
|
26 |
+
st.write("Starting transcription. Estimated processing time: %0.1f seconds" % (f.frames / (f.samplerate * 5)))
|
27 |
+
result = diarization(chosen_files[0])
|
28 |
+
with open("info/transcripts/pred_rttms/" + file_name + ".txt") as f:
|
29 |
+
transcript = f.read()
|
30 |
+
st.write("Transcription completed.")
|
31 |
+
st.write("Number of speakers: %s" % result[file_name]["speaker_count"])
|
32 |
+
st.write("Sentences: %s" % len(result[file_name]["sentences"]))
|
33 |
+
st.write("Words: %s" % len(result[file_name]["words"]))
|
34 |
+
st.download_button(
|
35 |
+
label="Download audio transcript",
|
36 |
+
data=transcript,
|
37 |
+
file_name='transcript.txt',
|
38 |
+
mime='text/csv',
|
39 |
+
)
|
40 |
+
|
41 |
+
uploaded_file = st.file_uploader("Choose your recording with a speech",
|
42 |
+
accept_multiple_files=False, type=["mp3", "wav", "ogg"])
|
43 |
+
if uploaded_file is not None:
|
44 |
+
folder = "data/user_data/"
|
45 |
+
os.makedirs(folder, exist_ok=True)
|
46 |
+
for f in glob.glob(folder + '*'):
|
47 |
+
os.remove(f)
|
48 |
+
save_path = folder + uploaded_file.name
|
49 |
+
if ".mp3" in uploaded_file:
|
50 |
+
sound = AudioSegment.from_mp3(uploaded_file)
|
51 |
+
elif ".ogg" in uploaded_file:
|
52 |
+
sound = AudioSegment.from_ogg(uploaded_file)
|
53 |
+
else:
|
54 |
+
sound = AudioSegment.from_wav(uploaded_file)
|
55 |
+
sound.export(save_path, format="wav", parameters=["-ac", "1"])
|
56 |
+
file_name = os.path.basename(save_path).split(".")[0]
|
57 |
+
audio_file = open(save_path, 'rb')
|
58 |
+
audio_bytes = audio_file.read()
|
59 |
+
st.audio(audio_bytes)
|
60 |
+
f = sf.SoundFile(save_path)
|
61 |
+
st.write("Starting transcription. Estimated processing time: %0.0f minutes and %02.0f seconds"
|
62 |
+
% ((f.frames / (f.samplerate * 3) // 60), (f.frames / (f.samplerate * 3) % 60)))
|
63 |
+
result = diarization(save_path)
|
64 |
+
with open("info/transcripts/pred_rttms/" + file_name + ".txt") as f:
|
65 |
+
transcript = f.read()
|
66 |
+
st.write("Transcription completed.")
|
67 |
+
st.write("Number of speakers: %s" % result[file_name]["speaker_count"])
|
68 |
+
st.write("Sentences: %s" % len(result[file_name]["sentences"]))
|
69 |
+
st.write("Words: %s" % len(result[file_name]["words"]))
|
70 |
+
st.download_button(
|
71 |
+
label="Download audio transcript",
|
72 |
+
data=transcript,
|
73 |
+
file_name='transcript.txt',
|
74 |
+
mime='text/csv',
|
75 |
+
)
|