pustozerov commited on
Commit
c18bad5
·
1 Parent(s): ceaa240

Temporary remove samples.

Browse files
Files changed (2) hide show
  1. .gitattributes +5 -0
  2. Interface.py +75 -0
.gitattributes CHANGED
@@ -1,3 +1,4 @@
 
1
  *.7z filter=lfs diff=lfs merge=lfs -text
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
@@ -29,3 +30,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
29
  *.zip filter=lfs diff=lfs merge=lfs -text
30
  *.zst filter=lfs diff=lfs merge=lfs -text
31
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  *.7z filter=lfs diff=lfs merge=lfs -text
3
  *.arrow filter=lfs diff=lfs merge=lfs -text
4
  *.bin filter=lfs diff=lfs merge=lfs -text
 
30
  *.zip filter=lfs diff=lfs merge=lfs -text
31
  *.zst filter=lfs diff=lfs merge=lfs -text
32
  *tfevents* filter=lfs diff=lfs merge=lfs -text
33
+ =======
34
+ *.wav filter=lfs diff=lfs merge=lfs -text
35
+ *.ogg filter=lfs diff=lfs merge=lfs -text
36
+ >>>>>>> 02dca0a (Temporary remove samples.)
Interface.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import glob
2
+ import random
3
+ import os
4
+ import soundfile as sf
5
+ import streamlit as st
6
+ from pydub import AudioSegment
7
+
8
+ from modules.diarization.nemo_diarization import diarization
9
+
10
+ st.title('Call Transcription demo')
11
+ st.subheader('This simple demo shows the possibilities of the ASR and NLP in the task of '
12
+ 'automatic speech recognition and diarization. It works with mp3, ogg and wav files. You can randomly '
13
+ 'pickup a set of images from the built-in database or try uploading your own files.')
14
+
15
+
16
+ if st.button('Try random samples from the database'):
17
+ folder = "data/datasets/crema_d_diarization_chunks"
18
+ os.makedirs(folder, exist_ok=True)
19
+ list_all_audio = glob.glob("data/datasets/crema_d_diarization_chunks/*.wav")
20
+ chosen_files = sorted(random.sample(list_all_audio, 1))
21
+ file_name = os.path.basename(chosen_files[0]).split(".")[0]
22
+ audio_file = open(chosen_files[0], 'rb')
23
+ audio_bytes = audio_file.read()
24
+ st.audio(audio_bytes)
25
+ f = sf.SoundFile(chosen_files[0])
26
+ st.write("Starting transcription. Estimated processing time: %0.1f seconds" % (f.frames / (f.samplerate * 5)))
27
+ result = diarization(chosen_files[0])
28
+ with open("info/transcripts/pred_rttms/" + file_name + ".txt") as f:
29
+ transcript = f.read()
30
+ st.write("Transcription completed.")
31
+ st.write("Number of speakers: %s" % result[file_name]["speaker_count"])
32
+ st.write("Sentences: %s" % len(result[file_name]["sentences"]))
33
+ st.write("Words: %s" % len(result[file_name]["words"]))
34
+ st.download_button(
35
+ label="Download audio transcript",
36
+ data=transcript,
37
+ file_name='transcript.txt',
38
+ mime='text/csv',
39
+ )
40
+
41
+ uploaded_file = st.file_uploader("Choose your recording with a speech",
42
+ accept_multiple_files=False, type=["mp3", "wav", "ogg"])
43
+ if uploaded_file is not None:
44
+ folder = "data/user_data/"
45
+ os.makedirs(folder, exist_ok=True)
46
+ for f in glob.glob(folder + '*'):
47
+ os.remove(f)
48
+ save_path = folder + uploaded_file.name
49
+ if ".mp3" in uploaded_file:
50
+ sound = AudioSegment.from_mp3(uploaded_file)
51
+ elif ".ogg" in uploaded_file:
52
+ sound = AudioSegment.from_ogg(uploaded_file)
53
+ else:
54
+ sound = AudioSegment.from_wav(uploaded_file)
55
+ sound.export(save_path, format="wav", parameters=["-ac", "1"])
56
+ file_name = os.path.basename(save_path).split(".")[0]
57
+ audio_file = open(save_path, 'rb')
58
+ audio_bytes = audio_file.read()
59
+ st.audio(audio_bytes)
60
+ f = sf.SoundFile(save_path)
61
+ st.write("Starting transcription. Estimated processing time: %0.0f minutes and %02.0f seconds"
62
+ % ((f.frames / (f.samplerate * 3) // 60), (f.frames / (f.samplerate * 3) % 60)))
63
+ result = diarization(save_path)
64
+ with open("info/transcripts/pred_rttms/" + file_name + ".txt") as f:
65
+ transcript = f.read()
66
+ st.write("Transcription completed.")
67
+ st.write("Number of speakers: %s" % result[file_name]["speaker_count"])
68
+ st.write("Sentences: %s" % len(result[file_name]["sentences"]))
69
+ st.write("Words: %s" % len(result[file_name]["words"]))
70
+ st.download_button(
71
+ label="Download audio transcript",
72
+ data=transcript,
73
+ file_name='transcript.txt',
74
+ mime='text/csv',
75
+ )