Commit
·
1c97ed5
1
Parent(s):
6934a38
feat: Functional gradio app
Browse files- .gitignore +1 -0
- Dockerfile +4 -1
- app/app.py +9 -4
- app/gradio_app.py +77 -53
.gitignore
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
# ODTP dev
|
2 |
odtp-input
|
3 |
odtp-output
|
|
|
4 |
|
5 |
# Mac crap
|
6 |
.DS_Store
|
|
|
1 |
# ODTP dev
|
2 |
odtp-input
|
3 |
odtp-output
|
4 |
+
odtp-logs
|
5 |
|
6 |
# Mac crap
|
7 |
.DS_Store
|
Dockerfile
CHANGED
@@ -66,4 +66,7 @@ RUN sed -i 's/\r$//' /odtp/odtp-component-client/odtp-app.sh
|
|
66 |
RUN sed -i 's/\r$//' /odtp/odtp-component-client/startup.sh
|
67 |
RUN sed -i 's/\r$//' /odtp/odtp-app/app.sh
|
68 |
|
69 |
-
ENTRYPOINT ["bash", "/odtp/odtp-component-client/startup.sh"]
|
|
|
|
|
|
|
|
66 |
RUN sed -i 's/\r$//' /odtp/odtp-component-client/startup.sh
|
67 |
RUN sed -i 's/\r$//' /odtp/odtp-app/app.sh
|
68 |
|
69 |
+
#ENTRYPOINT ["bash", "/odtp/odtp-component-client/startup.sh"]
|
70 |
+
ENTRYPOINT [ "python3", "/odtp/odtp-app/gradio_app.py" ]
|
71 |
+
|
72 |
+
# Create command to run the app that goes to an entrypoint basically the startup mode. Also I in order to work with an API I need some interface with an s3 to make it work?
|
app/app.py
CHANGED
@@ -321,7 +321,8 @@ def main(args):
|
|
321 |
if args.language:
|
322 |
whisper_options["language"] = args.language
|
323 |
writer_options = {"max_line_width":55, "max_line_count":2, "word_timestamps": False}
|
324 |
-
|
|
|
325 |
|
326 |
# Group consecutive segments of the same speaker
|
327 |
grouped_segments = []
|
@@ -330,9 +331,11 @@ def main(args):
|
|
330 |
current_end = None
|
331 |
|
332 |
for turn, _, speaker in diarization.itertracks(yield_label=True):
|
333 |
-
|
|
|
334 |
if turn.end - turn.start < 0.5: # Suppress short utterances (pyannote artifact)
|
335 |
-
|
|
|
336 |
continue
|
337 |
|
338 |
if speaker == current_speaker:
|
@@ -354,7 +357,8 @@ def main(args):
|
|
354 |
clip_audio(args.input_file, sample_rate, start, end, clip_path)
|
355 |
result = model.transcribe(start=start, end=end, options=whisper_options)
|
356 |
language = result['language']
|
357 |
-
|
|
|
358 |
writer(result, args.output_file, speaker, start, writer_options)
|
359 |
writer_json(generate_segments(result['segments'], speaker, language), args.output_json_file)
|
360 |
writer_json.finalize()
|
@@ -369,6 +373,7 @@ if __name__ == '__main__':
|
|
369 |
parser.add_argument('--input-file', type=str, required=True, help="Input audio file")
|
370 |
parser.add_argument('--output-file', type=str, required=True, help="Output file for the results (SRT or VTT)")
|
371 |
parser.add_argument('--output-json-file', type=str, required=True, help="Output file for the results (SRT or VTT)")
|
|
|
372 |
|
373 |
args = parser.parse_args()
|
374 |
main(args)
|
|
|
321 |
if args.language:
|
322 |
whisper_options["language"] = args.language
|
323 |
writer_options = {"max_line_width":55, "max_line_count":2, "word_timestamps": False}
|
324 |
+
if args.verbose=="True":
|
325 |
+
print("Process diarized blocks")
|
326 |
|
327 |
# Group consecutive segments of the same speaker
|
328 |
grouped_segments = []
|
|
|
331 |
current_end = None
|
332 |
|
333 |
for turn, _, speaker in diarization.itertracks(yield_label=True):
|
334 |
+
if args.verbose=="True":
|
335 |
+
print(speaker)
|
336 |
if turn.end - turn.start < 0.5: # Suppress short utterances (pyannote artifact)
|
337 |
+
if args.verbose=="True":
|
338 |
+
print(f"start={turn.start:.1f}s stop={turn.end:.1f}s IGNORED")
|
339 |
continue
|
340 |
|
341 |
if speaker == current_speaker:
|
|
|
357 |
clip_audio(args.input_file, sample_rate, start, end, clip_path)
|
358 |
result = model.transcribe(start=start, end=end, options=whisper_options)
|
359 |
language = result['language']
|
360 |
+
if args.verbose=="True":
|
361 |
+
print(f"start={start:.1f}s stop={end:.1f}s lang={language} {speaker}")
|
362 |
writer(result, args.output_file, speaker, start, writer_options)
|
363 |
writer_json(generate_segments(result['segments'], speaker, language), args.output_json_file)
|
364 |
writer_json.finalize()
|
|
|
373 |
parser.add_argument('--input-file', type=str, required=True, help="Input audio file")
|
374 |
parser.add_argument('--output-file', type=str, required=True, help="Output file for the results (SRT or VTT)")
|
375 |
parser.add_argument('--output-json-file', type=str, required=True, help="Output file for the results (SRT or VTT)")
|
376 |
+
parser.add_argument('--verbose', type=str, required=False, help="Printing status")
|
377 |
|
378 |
args = parser.parse_args()
|
379 |
main(args)
|
app/gradio_app.py
CHANGED
@@ -3,8 +3,8 @@ import tempfile
|
|
3 |
import os
|
4 |
import shutil
|
5 |
import subprocess
|
6 |
-
|
7 |
-
import
|
8 |
|
9 |
def create_temp_structure():
|
10 |
"""Create temporary ODTP folder structure"""
|
@@ -13,6 +13,11 @@ def create_temp_structure():
|
|
13 |
os.makedirs(os.path.join(temp_dir, "odtp-output"))
|
14 |
return temp_dir
|
15 |
|
|
|
|
|
|
|
|
|
|
|
16 |
def cleanup_temp(temp_dir):
|
17 |
"""Remove temporary folder structure"""
|
18 |
shutil.rmtree(temp_dir)
|
@@ -22,54 +27,64 @@ def process_audio(audio_file, model, task, language, hf_token):
|
|
22 |
# Create temp structure
|
23 |
temp_dir = create_temp_structure()
|
24 |
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
"
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
# Create BytesIO objects for downloads
|
62 |
-
srt_bytes = io.BytesIO(srt_content.encode('utf-8'))
|
63 |
-
srt_bytes.name = "output.srt"
|
64 |
-
json_bytes = io.BytesIO(json_content.encode('utf-8'))
|
65 |
-
json_bytes.name = "output.json"
|
66 |
-
|
67 |
-
# Return contents and BytesIO objects
|
68 |
-
return srt_content, json_content, srt_bytes, json_bytes
|
69 |
|
70 |
-
|
71 |
-
|
72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
|
74 |
# Define Gradio interface
|
75 |
with gr.Blocks() as demo:
|
@@ -103,6 +118,9 @@ with gr.Blocks() as demo:
|
|
103 |
submit_btn = gr.Button("Process Audio")
|
104 |
|
105 |
with gr.Column():
|
|
|
|
|
|
|
106 |
srt_output = gr.Textbox(
|
107 |
label="SRT Output",
|
108 |
lines=10
|
@@ -113,16 +131,18 @@ with gr.Blocks() as demo:
|
|
113 |
)
|
114 |
# Add download buttons
|
115 |
srt_download = gr.File(
|
116 |
-
label="Download SRT File"
|
|
|
117 |
)
|
118 |
json_download = gr.File(
|
119 |
-
label="Download JSON File"
|
|
|
120 |
)
|
121 |
|
122 |
submit_btn.click(
|
123 |
fn=process_audio,
|
124 |
inputs=[audio_input, model, task, language, hf_token],
|
125 |
-
outputs=[srt_output, json_output, srt_download, json_download]
|
126 |
)
|
127 |
|
128 |
if __name__ == "__main__":
|
@@ -132,4 +152,8 @@ if __name__ == "__main__":
|
|
132 |
share=False, # Disable temporary public URL
|
133 |
show_error=True, # Show detailed error messages
|
134 |
debug=True # Enable debug mode for development
|
135 |
-
)
|
|
|
|
|
|
|
|
|
|
3 |
import os
|
4 |
import shutil
|
5 |
import subprocess
|
6 |
+
import threading
|
7 |
+
import time
|
8 |
|
9 |
def create_temp_structure():
|
10 |
"""Create temporary ODTP folder structure"""
|
|
|
13 |
os.makedirs(os.path.join(temp_dir, "odtp-output"))
|
14 |
return temp_dir
|
15 |
|
16 |
+
def remove_later(path, delay):
|
17 |
+
time.sleep(delay)
|
18 |
+
if os.path.exists(path):
|
19 |
+
shutil.rmtree(path, ignore_errors=True)
|
20 |
+
|
21 |
def cleanup_temp(temp_dir):
|
22 |
"""Remove temporary folder structure"""
|
23 |
shutil.rmtree(temp_dir)
|
|
|
27 |
# Create temp structure
|
28 |
temp_dir = create_temp_structure()
|
29 |
|
30 |
+
start_time = time.time()
|
31 |
+
print(f"Processing started at: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))}")
|
32 |
+
|
33 |
+
# Copy input file
|
34 |
+
input_path = os.path.join(temp_dir, "odtp-input", "input.wav")
|
35 |
+
shutil.copy2(audio_file, input_path)
|
36 |
+
|
37 |
+
# Prepare output paths #TODO: Add uuid to output file names
|
38 |
+
output_base = audio_file.split("/")[-1].replace(".wav", "")
|
39 |
+
output_srt = os.path.join(temp_dir, "odtp-output", #temp_dir
|
40 |
+
f"{output_base}_{task}.srt")
|
41 |
+
output_json = os.path.join(temp_dir, "odtp-output",
|
42 |
+
f"{output_base}_{task}.json")
|
43 |
+
|
44 |
+
# Use HF_TOKEN from environment if not provided
|
45 |
+
if not hf_token:
|
46 |
+
hf_token = os.getenv("HF_TOKEN")
|
47 |
+
if not hf_token:
|
48 |
+
raise ValueError("Hugging Face token is required but not provided.")
|
49 |
+
|
50 |
+
# Build command
|
51 |
+
cmd = [
|
52 |
+
"python3", "/odtp/odtp-app/app.py",
|
53 |
+
"--model", model,
|
54 |
+
"--quantize",
|
55 |
+
"--hf-token", hf_token,
|
56 |
+
"--task", task,
|
57 |
+
"--input-file", input_path,
|
58 |
+
"--output-file", output_srt,
|
59 |
+
"--output-json-file", output_json,
|
60 |
+
"--verbose", "False"
|
61 |
+
]
|
62 |
+
|
63 |
+
if language != "auto":
|
64 |
+
cmd.extend(["--language", language])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
|
66 |
+
# Run transcription
|
67 |
+
subprocess.run(cmd, check=True)
|
68 |
+
|
69 |
+
# Read results
|
70 |
+
with open(output_srt, 'r', encoding='utf-8') as f:
|
71 |
+
srt_content = f.read()
|
72 |
+
with open(output_json, 'r', encoding='utf-8') as f:
|
73 |
+
json_content = f.read()
|
74 |
+
|
75 |
+
# Code to delete files after 300 seconds
|
76 |
+
threading.Thread(target=remove_later, args=(temp_dir, 300), daemon=True).start()
|
77 |
+
|
78 |
+
end_time = time.time()
|
79 |
+
print(f"Processing ended at: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))}")
|
80 |
+
|
81 |
+
total_duration = end_time - start_time
|
82 |
+
hours, remainder = divmod(total_duration, 3600)
|
83 |
+
minutes, seconds = divmod(remainder, 60)
|
84 |
+
total_duration_str = f"{int(hours)}h {int(minutes)}m {int(seconds)}s"
|
85 |
+
print(f"Total processing time: {total_duration_str}")
|
86 |
+
|
87 |
+
return total_duration_str, srt_content, json_content, output_srt, output_json
|
88 |
|
89 |
# Define Gradio interface
|
90 |
with gr.Blocks() as demo:
|
|
|
118 |
submit_btn = gr.Button("Process Audio")
|
119 |
|
120 |
with gr.Column():
|
121 |
+
information = gr.Text(
|
122 |
+
label="Information"
|
123 |
+
)
|
124 |
srt_output = gr.Textbox(
|
125 |
label="SRT Output",
|
126 |
lines=10
|
|
|
131 |
)
|
132 |
# Add download buttons
|
133 |
srt_download = gr.File(
|
134 |
+
label="Download SRT File",
|
135 |
+
type="binary"
|
136 |
)
|
137 |
json_download = gr.File(
|
138 |
+
label="Download JSON File",
|
139 |
+
type="binary"
|
140 |
)
|
141 |
|
142 |
submit_btn.click(
|
143 |
fn=process_audio,
|
144 |
inputs=[audio_input, model, task, language, hf_token],
|
145 |
+
outputs=[information, srt_output, json_output, srt_download, json_download]
|
146 |
)
|
147 |
|
148 |
if __name__ == "__main__":
|
|
|
152 |
share=False, # Disable temporary public URL
|
153 |
show_error=True, # Show detailed error messages
|
154 |
debug=True # Enable debug mode for development
|
155 |
+
)
|
156 |
+
|
157 |
+
|
158 |
+
|
159 |
+
# TODO: Slow printing on the command.
|