piecurus commited on
Commit
cab3a0b
1 Parent(s): da0005f

added functionality for long text+comments removed

Browse files
Files changed (1) hide show
  1. app.py +6 -74
app.py CHANGED
@@ -4,13 +4,6 @@
4
  # In[ ]:
5
 
6
 
7
- # conver mp3 to wav
8
- # ffmpeg -i test_5.mp3 -b:a 16000 test_5.wav
9
-
10
-
11
- # In[1]:
12
-
13
-
14
  #Importing all the necessary packages
15
  import nltk
16
  import librosa
@@ -21,7 +14,7 @@ from transformers import Wav2Vec2Tokenizer, Wav2Vec2ForCTC
21
  nltk.download("punkt")
22
 
23
 
24
- # In[2]:
25
 
26
 
27
  #Loading the model and the tokenizer
@@ -32,7 +25,7 @@ tokenizer = Wav2Vec2Tokenizer.from_pretrained(model_name)#omdel_name
32
  model = Wav2Vec2ForCTC.from_pretrained(model_name)
33
 
34
 
35
- # In[3]:
36
 
37
 
38
  def load_data(input_file):
@@ -50,7 +43,7 @@ def load_data(input_file):
50
  return speech
51
 
52
 
53
- # In[4]:
54
 
55
 
56
  def correct_casing(input_sentence):
@@ -60,7 +53,7 @@ def correct_casing(input_sentence):
60
  return (' '.join([s.replace(s[0],s[0].capitalize(),1) for s in sentences]))
61
 
62
 
63
- # In[5]:
64
 
65
 
66
  def asr_transcript(input_file):
@@ -80,7 +73,7 @@ def asr_transcript(input_file):
80
  return transcription
81
 
82
 
83
- # In[6]:
84
 
85
 
86
  def asr_transcript_long(input_file,tokenizer=tokenizer, model=model ):
@@ -112,32 +105,7 @@ def asr_transcript_long(input_file,tokenizer=tokenizer, model=model ):
112
 
113
  return transcript
114
 
115
- from pydub import AudioSegment
116
- from pydub.silence import split_on_silence
117
- from pydub.playback import play
118
-
119
- sound = AudioSegment.from_file("./test_2.wav", format="wav")
120
- chunks = split_on_silence(
121
- sound,
122
-
123
- # split on silences longer than 1000ms (1 sec)
124
- min_silence_len=5000,
125
-
126
- # anything under -16 dBFS is considered silence
127
- silence_thresh=-32,
128
-
129
- # keep 200 ms of leading/trailing silence
130
- keep_silence=500
131
- )#read the file
132
- speech, sample_rate = librosa.load('./test_2.wav')
133
- #make it 1-D
134
- if len(speech.shape) > 1:
135
- speech = speech[:,0] + speech[:,1]
136
- #Resampling at 16KHz since wav2vec2-base-960h is pretrained and fine-tuned on speech audio sampled at 16 KHz.
137
- if sample_rate !=16000:
138
- speech = librosa.resample(speech, sample_rate,16000)
139
- part_of_speech = librosa.effects.split(speech)idx = -1
140
- IPython.display.Audio(data=speech[part_of_speech[idx,0]:part_of_speech[idx,1]], rate=16000)
141
  # In[ ]:
142
 
143
 
@@ -149,39 +117,3 @@ gr.Interface(asr_transcript_long,
149
  description = "This application displays transcribed text for given audio input",
150
  examples = [["Test_File1.wav"], ["Test_File2.wav"], ["Test_File3.wav"]], theme="grass").launch()
151
 
152
-
153
- # In[ ]:
154
-
155
-
156
-
157
-
158
-
159
- # In[ ]:
160
-
161
-
162
-
163
-
164
-
165
- # In[ ]:
166
-
167
-
168
-
169
-
170
-
171
- # In[7]:
172
-
173
-
174
- #temp = asr_transcript_long('./test_2.wav')
175
-
176
-
177
- # In[ ]:
178
-
179
-
180
-
181
-
182
-
183
- # In[ ]:
184
-
185
-
186
-
187
-
 
4
  # In[ ]:
5
 
6
 
 
 
 
 
 
 
 
7
  #Importing all the necessary packages
8
  import nltk
9
  import librosa
 
14
  nltk.download("punkt")
15
 
16
 
17
+ # In[ ]:
18
 
19
 
20
  #Loading the model and the tokenizer
 
25
  model = Wav2Vec2ForCTC.from_pretrained(model_name)
26
 
27
 
28
+ # In[ ]:
29
 
30
 
31
  def load_data(input_file):
 
43
  return speech
44
 
45
 
46
+ # In[ ]:
47
 
48
 
49
  def correct_casing(input_sentence):
 
53
  return (' '.join([s.replace(s[0],s[0].capitalize(),1) for s in sentences]))
54
 
55
 
56
+ # In[ ]:
57
 
58
 
59
  def asr_transcript(input_file):
 
73
  return transcription
74
 
75
 
76
+ # In[ ]:
77
 
78
 
79
  def asr_transcript_long(input_file,tokenizer=tokenizer, model=model ):
 
105
 
106
  return transcript
107
 
108
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  # In[ ]:
110
 
111
 
 
117
  description = "This application displays transcribed text for given audio input",
118
  examples = [["Test_File1.wav"], ["Test_File2.wav"], ["Test_File3.wav"]], theme="grass").launch()
119