pustozerov commited on
Commit
f508160
·
1 Parent(s): c18bad5

Changed app_file name.

Browse files
Files changed (1) hide show
  1. Interface.py +0 -75
Interface.py DELETED
@@ -1,75 +0,0 @@
1
- import glob
2
- import random
3
- import os
4
- import soundfile as sf
5
- import streamlit as st
6
- from pydub import AudioSegment
7
-
8
- from modules.diarization.nemo_diarization import diarization
9
-
10
- st.title('Call Transcription demo')
11
- st.subheader('This simple demo shows the possibilities of the ASR and NLP in the task of '
12
- 'automatic speech recognition and diarization. It works with mp3, ogg and wav files. You can randomly '
13
- 'pickup a set of images from the built-in database or try uploading your own files.')
14
-
15
-
16
- if st.button('Try random samples from the database'):
17
- folder = "data/datasets/crema_d_diarization_chunks"
18
- os.makedirs(folder, exist_ok=True)
19
- list_all_audio = glob.glob("data/datasets/crema_d_diarization_chunks/*.wav")
20
- chosen_files = sorted(random.sample(list_all_audio, 1))
21
- file_name = os.path.basename(chosen_files[0]).split(".")[0]
22
- audio_file = open(chosen_files[0], 'rb')
23
- audio_bytes = audio_file.read()
24
- st.audio(audio_bytes)
25
- f = sf.SoundFile(chosen_files[0])
26
- st.write("Starting transcription. Estimated processing time: %0.1f seconds" % (f.frames / (f.samplerate * 5)))
27
- result = diarization(chosen_files[0])
28
- with open("info/transcripts/pred_rttms/" + file_name + ".txt") as f:
29
- transcript = f.read()
30
- st.write("Transcription completed.")
31
- st.write("Number of speakers: %s" % result[file_name]["speaker_count"])
32
- st.write("Sentences: %s" % len(result[file_name]["sentences"]))
33
- st.write("Words: %s" % len(result[file_name]["words"]))
34
- st.download_button(
35
- label="Download audio transcript",
36
- data=transcript,
37
- file_name='transcript.txt',
38
- mime='text/csv',
39
- )
40
-
41
- uploaded_file = st.file_uploader("Choose your recording with a speech",
42
- accept_multiple_files=False, type=["mp3", "wav", "ogg"])
43
- if uploaded_file is not None:
44
- folder = "data/user_data/"
45
- os.makedirs(folder, exist_ok=True)
46
- for f in glob.glob(folder + '*'):
47
- os.remove(f)
48
- save_path = folder + uploaded_file.name
49
- if ".mp3" in uploaded_file:
50
- sound = AudioSegment.from_mp3(uploaded_file)
51
- elif ".ogg" in uploaded_file:
52
- sound = AudioSegment.from_ogg(uploaded_file)
53
- else:
54
- sound = AudioSegment.from_wav(uploaded_file)
55
- sound.export(save_path, format="wav", parameters=["-ac", "1"])
56
- file_name = os.path.basename(save_path).split(".")[0]
57
- audio_file = open(save_path, 'rb')
58
- audio_bytes = audio_file.read()
59
- st.audio(audio_bytes)
60
- f = sf.SoundFile(save_path)
61
- st.write("Starting transcription. Estimated processing time: %0.0f minutes and %02.0f seconds"
62
- % ((f.frames / (f.samplerate * 3) // 60), (f.frames / (f.samplerate * 3) % 60)))
63
- result = diarization(save_path)
64
- with open("info/transcripts/pred_rttms/" + file_name + ".txt") as f:
65
- transcript = f.read()
66
- st.write("Transcription completed.")
67
- st.write("Number of speakers: %s" % result[file_name]["speaker_count"])
68
- st.write("Sentences: %s" % len(result[file_name]["sentences"]))
69
- st.write("Words: %s" % len(result[file_name]["words"]))
70
- st.download_button(
71
- label="Download audio transcript",
72
- data=transcript,
73
- file_name='transcript.txt',
74
- mime='text/csv',
75
- )