import gradio as gr import requests import wave import pyaudio import soundfile as sf import os # API URL and headers API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3" headers = {"Authorization": f"Bearer {HF_TOKEN}"} # Audio configuration FORMAT = pyaudio.paInt16 CHANNELS = 1 RATE = 16000 # Whisper models expect 16kHz CHUNK = 1024 class AudioRecorder: def __init__(self): self.is_recording = False self.frames = [] self.audio = pyaudio.PyAudio() def start_recording(self): """Starts audio recording.""" self.is_recording = True self.frames = [] self.stream = self.audio.open( format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK ) def record_chunk(self): """Records a chunk of audio.""" if self.is_recording: data = self.stream.read(CHUNK, exception_on_overflow=False) self.frames.append(data) def stop_recording(self): """Stops the audio recording.""" self.is_recording = False self.stream.stop_stream() self.stream.close() def save_audio(self, filename="output.wav"): """Saves the recorded audio to a WAV file.""" with wave.open(filename, 'wb') as wf: wf.setnchannels(CHANNELS) wf.setsampwidth(self.audio.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes(b''.join(self.frames)) # Convert to FLAC flac_filename = "output.flac" data, samplerate = sf.read(filename) sf.write(flac_filename, data, samplerate, format='FLAC') return flac_filename def close(self): self.audio.terminate() recorder = AudioRecorder() def start_recording(): recorder.start_recording() return "Recording started." def record_audio(): recorder.record_chunk() return "Recording in progress..." def stop_and_transcribe(): try: recorder.stop_recording() flac_file = recorder.save_audio() with open(flac_file, "rb") as f: response = requests.post( API_URL, headers=headers, data=f.read() ) if response.status_code == 200: result = response.json() return result.get("text", "No transcription available.") else: return f"API error: {response.status_code}" except Exception as e: return f"Error: {str(e)}" finally: if os.path.exists("output.wav"): os.remove("output.wav") if os.path.exists("output.flac"): os.remove("output.flac") # Define Gradio interface def build_interface(): with gr.Blocks() as demo: gr.Markdown("# Speech-to-Text Transcription with Whisper") with gr.Row(): start_button = gr.Button("Start Recording") stop_button = gr.Button("Stop and Transcribe") transcription_output = gr.Textbox(label="Transcription") start_button.click(start_recording, outputs=None) stop_button.click(stop_and_transcribe, outputs=transcription_output) return demo if __name__ == "__main__": interface = build_interface() interface.launch()