Shmuel commited on
Commit
63c9889
β€’
1 Parent(s): d1d60d4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +194 -1
app.py CHANGED
@@ -1,3 +1,196 @@
 
 
 
 
1
  import gradio as gr
 
 
 
2
 
3
- gr.load("models/ivrit-ai/whisper-13-v2-e2").launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import time
3
+ import moviepy.editor as mp
4
+ import psutil
5
  import gradio as gr
6
+ import spaces
7
+ from transformers import pipeline
8
+ from transformers.pipelines.audio_utils import ffmpeg_read
9
 
10
+ DEFAULT_MODEL_NAME = "ivrit-ai/whisper-13-v2-e2"
11
+ BATCH_SIZE = 8
12
+
13
+ print('start app')
14
+
15
+ device = 0 if torch.cuda.is_available() else "cpu"
16
+ if device == "cpu":
17
+ DEFAULT_MODEL_NAME = "ivrit-ai/whisper-13-v2-e2"
18
+
19
+ def load_pipeline(model_name):
20
+ return pipeline(
21
+ task="automatic-speech-recognition",
22
+ model=model_name,
23
+ chunk_length_s=30,
24
+ device=device,
25
+ )
26
+
27
+ pipe = load_pipeline(DEFAULT_MODEL_NAME)
28
+ openai_pipe=load_pipeline("openai/whisper-large-v3")
29
+ default_pipe = load_pipeline(DEFAULT_MODEL_NAME)
30
+
31
+ #pipe = None
32
+
33
+
34
+ from gpustat import GPUStatCollection
35
+
36
+ def update_gpu_status():
37
+ if torch.cuda.is_available() == False:
38
+ return "No Nvidia Device"
39
+ try:
40
+ gpu_stats = GPUStatCollection.new_query()
41
+ for gpu in gpu_stats:
42
+ # Assuming you want to monitor the first GPU, index 0
43
+ gpu_id = gpu.index
44
+ gpu_name = gpu.name
45
+ gpu_utilization = gpu.utilization
46
+ memory_used = gpu.memory_used
47
+ memory_total = gpu.memory_total
48
+ memory_utilization = (memory_used / memory_total) * 100
49
+ gpu_status=(f"GPU {gpu_id}: {gpu_name}, Utilization: {gpu_utilization}%, Memory Used: {memory_used}MB, Memory Total: {memory_total}MB, Memory Utilization: {memory_utilization:.2f}%")
50
+ return gpu_status
51
+
52
+ except Exception as e:
53
+ print(f"Error getting GPU stats: {e}")
54
+ return torch_update_gpu_status()
55
+
56
+ def torch_update_gpu_status():
57
+ if torch.cuda.is_available():
58
+ gpu_info = torch.cuda.get_device_name(0)
59
+ gpu_memory = torch.cuda.mem_get_info(0)
60
+ total_memory = gpu_memory[1] / (1024 * 1024)
61
+ free_memory=gpu_memory[0] /(1024 *1024)
62
+ used_memory = (gpu_memory[1] - gpu_memory[0]) / (1024 * 1024)
63
+
64
+ gpu_status = f"GPU: {gpu_info} Free Memory:{free_memory}MB Total Memory: {total_memory:.2f} MB Used Memory: {used_memory:.2f} MB"
65
+ else:
66
+ gpu_status = "No GPU available"
67
+ return gpu_status
68
+
69
+ def update_cpu_status():
70
+ import datetime
71
+ # Get the current time
72
+ current_time = datetime.datetime.now().time()
73
+ # Convert the time to a string
74
+ time_str = current_time.strftime("%H:%M:%S")
75
+
76
+ cpu_percent = psutil.cpu_percent()
77
+ cpu_status = f"CPU Usage: {cpu_percent}% {time_str}"
78
+ return cpu_status
79
+
80
+ def update_status():
81
+ gpu_status = update_gpu_status()
82
+ cpu_status = update_cpu_status()
83
+ sys_status=gpu_status+"\n\n"+cpu_status
84
+ return sys_status
85
+
86
+ def refresh_status():
87
+ return update_status()
88
+
89
+
90
+ @spaces.GPU
91
+ def transcribe(audio_path, model_name):
92
+ print(str(time.time())+' start transcribe ')
93
+
94
+ if audio_path is None:
95
+ raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
96
+
97
+ audio_path=audio_path.strip()
98
+ model_name=model_name.strip()
99
+
100
+ global pipe
101
+ if model_name != pipe.model.name_or_path:
102
+ print("old model is:"+ pipe.model.name_or_path )
103
+ if model_name=="ivrit-ai/whisper-13-v2-e2":
104
+ pipe=openai_pipe
105
+ print(str(time.time())+" use openai model " + pipe.model.name_or_path)
106
+ elif model_name==DEFAULT_MODEL_NAME:
107
+ pipe=default_pipe
108
+ print(str(time.time())+" use default model " + pipe.model.name_or_path)
109
+ else:
110
+ print(str(time.time())+' start load model ' + model_name)
111
+ pipe = load_pipeline(model_name)
112
+ print(str(time.time())+' finished load model ' + model_name)
113
+
114
+ start_time = time.time() # Record the start time
115
+ print(str(time.time())+' start processing and set recording start time point')
116
+ # Load the audio file and calculate its duration
117
+ audio = mp.AudioFileClip(audio_path)
118
+ audio_duration = audio.duration
119
+ print(str(time.time())+' start pipe ')
120
+ text = pipe(audio_path, batch_size=BATCH_SIZE, generate_kwargs={"task": "transcribe"}, return_timestamps=True)["text"]
121
+ end_time = time.time() # Record the end time
122
+
123
+ transcription_time = end_time - start_time # Calculate the transcription time
124
+
125
+ # Create the transcription time output with additional information
126
+ transcription_time_output = (
127
+ f"Transcription Time: {transcription_time:.2f} seconds\n"
128
+ f"Audio Duration: {audio_duration:.2f} seconds\n"
129
+ f"Model Used: {model_name}\n"
130
+ f"Device Used: {'GPU' if torch.cuda.is_available() else 'CPU'}"
131
+ )
132
+
133
+ print(str(time.time())+' return transcribe '+ text )
134
+
135
+ return text, transcription_time_output
136
+
137
+ @spaces.GPU
138
+ def handle_upload_audio(audio_path,model_name,old_transcription=''):
139
+ print('old_trans:' + old_transcription)
140
+ (text,transcription_time_output)=transcribe(audio_path,model_name)
141
+ return text+'\n\n'+old_transcription, transcription_time_output
142
+
143
+ graudio=gr.Audio(type="filepath",show_download_button=True)
144
+ grmodel_textbox=gr.Textbox(
145
+ label="Model Name",
146
+ value=DEFAULT_MODEL_NAME,
147
+ placeholder="Enter the model name",
148
+ info="Some available models: distil-whisper/distil-large-v3 distil-whisper/distil-medium.en Systran/faster-distil-whisper-large-v3 Systran/faster-whisper-large-v3 Systran/faster-whisper-medium openai/whisper-tiny, openai/whisper-base, openai/whisper-medium, openai/whisper-large-v3",
149
+ )
150
+ groutputs=[gr.TextArea(label="Transcription",elem_id="transcription_textarea",interactive=True,lines=20,show_copy_button=True),
151
+ gr.TextArea(label="Transcription Info",interactive=True,show_copy_button=True)]
152
+
153
+ mf_transcribe = gr.Interface(
154
+ fn=handle_upload_audio,
155
+ inputs=[
156
+ graudio, #"numpy" or filepath
157
+ #gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
158
+ grmodel_textbox,
159
+ ],
160
+ outputs=groutputs,
161
+ theme="huggingface",
162
+ title="Whisper Transcription",
163
+ description=(
164
+ "Scroll to Bottom to show system status. "
165
+ "Transcribe long-form microphone or audio file after uploaded audio! "
166
+ ),
167
+ allow_flagging="never",
168
+ )
169
+
170
+
171
+ demo = gr.Blocks()
172
+
173
+
174
+ with demo:
175
+ gr.TabbedInterface([mf_transcribe, ], ["Audio",])
176
+
177
+ with gr.Row():
178
+ refresh_button = gr.Button("Refresh Status") # Create a refresh button
179
+
180
+ sys_status_output = gr.Textbox(label="System Status", interactive=False)
181
+
182
+
183
+ # Link the refresh button to the refresh_status function
184
+ refresh_button.click(refresh_status, None, [sys_status_output])
185
+
186
+ # Load the initial status using update_status function
187
+ demo.load(update_status, inputs=None, outputs=[sys_status_output], every=2, queue=False)
188
+
189
+ graudio.stop_recording(handle_upload_audio,inputs=[graudio,grmodel_textbox,groutputs[0]],outputs=groutputs)
190
+ graudio.upload(handle_upload_audio,inputs=[graudio,grmodel_textbox,groutputs[0]],outputs=groutputs)
191
+
192
+
193
+ # Launch the Gradio app
194
+ demo.launch(share=True)
195
+
196
+ print('launched\n\n')