Spaces:

Add-Vishnu
/

Whisper_CPP_ASR_CLI

Runtime error

App Files Files Community

Whisper_CPP_ASR_CLI / app.py

Add-Vishnu

Changed audio to wav

7998ae9 over 1 year ago

raw

history blame

2.66 kB

	import gradio as gr
	import soundfile as sf
	import tempfile
	import shutil
	import os
	import librosa
	import time
	import numpy as np
	import subprocess

	# command = r"""wine './whisper_blas_bin_v1_3_0/main.exe' -h"""
	# wine_command = """sudo apt-get install wine"""
	command2 = """chmod +777 ./whisper_blas_bin_v1_3_0/main.exe"""
	# wine_c = subprocess.run(wine_command,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE, text=True)
	perm = subprocess.run(command2, shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE, text=True)
	# result = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
	# print("Wine Installation: ",wine_c)
	print("Access Installation: ",perm)


	def resample_to_16k(audio, orig_sr):
	y_resampled = librosa.resample(y=audio, orig_sr=orig_sr, target_sr = 16000)
	return y_resampled

	def transcribe(audio):
	sr,y = audio
	y = y.astype(np.float32)
	y /= np.max(np.abs(y))
	y_resampled = resample_to_16k(y, sr)


	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
	temp_audio_path = temp_audio.name
	sf.write(temp_audio_path, y_resampled, 16000)



	# command = rf"""wine './whisper_blas_bin_v1_3_0/main.exe' -m './whisper_blas_bin_v1_3_0/models/ggml-model-whisper-small.en.bin' -osrt -f '{temp_audio_path}' -nt""" # English only
	command = rf"""wine './whisper_blas_bin_v1_3_0/main.exe' -m './whisper_blas_bin_v1_3_0/models/ggml-model-whisper-base.bin' -osrt -f '{temp_audio_path}' -nt""" # Multilingual

	start_time = time.time()
	result = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
	end_time = time.time()
	print("Output",result.stdout)
	print("Error",result.stderr)
	transcription = result.stdout
	print(transcription)

	print("--------------------------")
	print(f"Execution time: {end_time - start_time} seconds")
	return transcription, (end_time - start_time)



	demo = gr.Interface(
	transcribe,
	inputs = "microphone",
	# gr.Audio(sources=["microphone"]),
	outputs = [gr.Textbox(label="CLI_Transcription"),gr.Textbox(label="Time taken for Transcription")],
	examples=["./Samples/Hindi_1.mp3","./Samples/Hindi_2.mp3","./Samples/Tamil_1.mp3","./Samples/Tamil_2.mp3","./Samples/Marathi_1.mp3","./Samples/Marathi_2.mp3","./Samples/Nepal_1.mp3","./Samples/Nepal_2.mp3","./Samples/Telugu_1.wav","./Samples/Telugu_2.wav","./Samples/Malayalam_1.wav","./Samples/Malayalam_2.wav","./Samples/Gujarati_1.wav","./Samples/Gujarati_2.wav","./Samples/Bengali_1.wav","./Samples/Bengali_2.wav"]

	)

	demo.launch()