csukuangfj commited on
Commit
cfd7673
·
1 Parent(s): 16e9291

output all texts

Browse files
Files changed (3) hide show
  1. .gitattributes +1 -0
  2. app.py +15 -5
  3. decode.py +12 -1
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.wav filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -106,8 +106,8 @@ def process_uploaded_video_file(
106
 
107
  logging.info(f"Processing uploaded file: {in_filename}")
108
 
109
- ans = process(language, repo_id, add_punctuation, in_filename)
110
- return (in_filename, ans[0]), ans[0], ans[1], ans[2]
111
 
112
 
113
  def process_uploaded_audio_file(
@@ -142,8 +142,9 @@ def process(language: str, repo_id: str, add_punctuation: str, in_filename: str)
142
  else:
143
  punct = None
144
 
145
- result = decode(recognizer, vad, punct, in_filename)
146
  logging.info(result)
 
147
 
148
  srt_filename = Path(in_filename).with_suffix(".srt")
149
  with open(srt_filename, "w", encoding="utf-8") as f:
@@ -156,6 +157,7 @@ def process(language: str, repo_id: str, add_punctuation: str, in_filename: str)
156
  str(srt_filename),
157
  build_html_output("Done! Please download the SRT file", "result_item_success"),
158
  result,
 
159
  )
160
 
161
 
@@ -205,7 +207,10 @@ with demo:
205
 
206
  output_info_video = gr.HTML(label="Info")
207
  output_textbox_video = gr.Textbox(
208
- label="Recognized speech from uploaded video file"
 
 
 
209
  )
210
 
211
  with gr.TabItem("Upload audio from disk"):
@@ -222,7 +227,10 @@ with demo:
222
 
223
  output_info_audio = gr.HTML(label="Info")
224
  output_textbox_audio = gr.Textbox(
225
- label="Recognized speech from uploaded audio file"
 
 
 
226
  )
227
 
228
  upload_video_button.click(
@@ -238,6 +246,7 @@ with demo:
238
  output_srt_file_video,
239
  output_info_video,
240
  output_textbox_video,
 
241
  ],
242
  )
243
 
@@ -253,6 +262,7 @@ with demo:
253
  output_srt_file_audio,
254
  output_info_audio,
255
  output_textbox_audio,
 
256
  ],
257
  )
258
 
 
106
 
107
  logging.info(f"Processing uploaded file: {in_filename}")
108
 
109
+ ans, all_text = process(language, repo_id, add_punctuation, in_filename)
110
+ return (in_filename, ans[0]), ans[0], ans[1], ans[2], all_text
111
 
112
 
113
  def process_uploaded_audio_file(
 
142
  else:
143
  punct = None
144
 
145
+ result, all_text = decode(recognizer, vad, punct, in_filename)
146
  logging.info(result)
147
+ logging.info(all_text)
148
 
149
  srt_filename = Path(in_filename).with_suffix(".srt")
150
  with open(srt_filename, "w", encoding="utf-8") as f:
 
157
  str(srt_filename),
158
  build_html_output("Done! Please download the SRT file", "result_item_success"),
159
  result,
160
+ all_text,
161
  )
162
 
163
 
 
207
 
208
  output_info_video = gr.HTML(label="Info")
209
  output_textbox_video = gr.Textbox(
210
+ label="Recognized speech from uploaded video file (srt format)"
211
+ )
212
+ all_output_textbox_video = gr.Textbox(
213
+ label="Recognized speech from uploaded video file (all in one)"
214
  )
215
 
216
  with gr.TabItem("Upload audio from disk"):
 
227
 
228
  output_info_audio = gr.HTML(label="Info")
229
  output_textbox_audio = gr.Textbox(
230
+ label="Recognized speech from uploaded audio file (srt format)"
231
+ )
232
+ all_output_textbox_audio = gr.Textbox(
233
+ label="Recognized speech from uploaded audio file (all in one)"
234
  )
235
 
236
  upload_video_button.click(
 
246
  output_srt_file_video,
247
  output_info_video,
248
  output_textbox_video,
249
+ all_output_textbox_video,
250
  ],
251
  )
252
 
 
262
  output_srt_file_audio,
263
  output_info_audio,
264
  output_textbox_audio,
265
+ all_output_textbox_audio,
266
  ],
267
  )
268
 
decode.py CHANGED
@@ -81,6 +81,8 @@ def decode(
81
 
82
  logging.info("Started!")
83
 
 
 
84
  while True:
85
  # *2 because int16_t has two bytes
86
  data = process.stdout.read(frames_per_read * 2)
@@ -116,8 +118,17 @@ def decode(
116
 
117
  for seg, stream in zip(segments, streams):
118
  seg.text = stream.result.text.strip()
 
 
 
 
 
 
119
  if punct is not None:
120
  seg.text = punct.add_punctuation(seg.text)
121
  segment_list.append(seg)
 
 
 
122
 
123
- return "\n\n".join(f"{i}\n{seg}" for i, seg in enumerate(segment_list, 1))
 
81
 
82
  logging.info("Started!")
83
 
84
+ all_text = []
85
+
86
  while True:
87
  # *2 because int16_t has two bytes
88
  data = process.stdout.read(frames_per_read * 2)
 
118
 
119
  for seg, stream in zip(segments, streams):
120
  seg.text = stream.result.text.strip()
121
+ if not all_text:
122
+ all_text.append(seg.text)
123
+ elif len(all_text[-1][0].encode()) == 1 and len(seg.text[0].encode()) == 1:
124
+ all_text.append(" ")
125
+ all_text.append(seg.text)
126
+
127
  if punct is not None:
128
  seg.text = punct.add_punctuation(seg.text)
129
  segment_list.append(seg)
130
+ all_text = " ".join(all_text)
131
+ if punct is not None:
132
+ all_text = punct.add_punctuation(all_text)
133
 
134
+ return "\n\n".join(f"{i}\n{seg}" for i, seg in enumerate(segment_list, 1)), all_text