import gradio as gr
import requests
import wave
import pyaudio
import soundfile as sf
import os


# API URL and headers
API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
headers = {"Authorization": f"Bearer {HF_TOKEN}"}

# Audio configuration
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000  # Whisper models expect 16kHz
CHUNK = 1024

class AudioRecorder:
    def __init__(self):
        self.is_recording = False
        self.frames = []
        self.audio = pyaudio.PyAudio()
        
    def start_recording(self):
        """Starts audio recording."""
        self.is_recording = True
        self.frames = []
        self.stream = self.audio.open(
            format=FORMAT,
            channels=CHANNELS,
            rate=RATE,
            input=True,
            frames_per_buffer=CHUNK
        )

    def record_chunk(self):
        """Records a chunk of audio."""
        if self.is_recording:
            data = self.stream.read(CHUNK, exception_on_overflow=False)
            self.frames.append(data)

    def stop_recording(self):
        """Stops the audio recording."""
        self.is_recording = False
        self.stream.stop_stream()
        self.stream.close()

    def save_audio(self, filename="output.wav"):
        """Saves the recorded audio to a WAV file."""
        with wave.open(filename, 'wb') as wf:
            wf.setnchannels(CHANNELS)
            wf.setsampwidth(self.audio.get_sample_size(FORMAT))
            wf.setframerate(RATE)
            wf.writeframes(b''.join(self.frames))
        
        # Convert to FLAC
        flac_filename = "output.flac"
        data, samplerate = sf.read(filename)
        sf.write(flac_filename, data, samplerate, format='FLAC')
        return flac_filename

    def close(self):
        self.audio.terminate()

recorder = AudioRecorder()

def start_recording():
    recorder.start_recording()
    return "Recording started."

def record_audio():
    recorder.record_chunk()
    return "Recording in progress..."

def stop_and_transcribe():
    try:
        recorder.stop_recording()
        flac_file = recorder.save_audio()

        with open(flac_file, "rb") as f:
            response = requests.post(
                API_URL,
                headers=headers,
                data=f.read()
            )

        if response.status_code == 200:
            result = response.json()
            return result.get("text", "No transcription available.")
        else:
            return f"API error: {response.status_code}"
    except Exception as e:
        return f"Error: {str(e)}"
    finally:
        if os.path.exists("output.wav"):
            os.remove("output.wav")
        if os.path.exists("output.flac"):
            os.remove("output.flac")

# Define Gradio interface
def build_interface():
    with gr.Blocks() as demo:
        gr.Markdown("# Speech-to-Text Transcription with Whisper")

        with gr.Row():
            start_button = gr.Button("Start Recording")
            stop_button = gr.Button("Stop and Transcribe")

        transcription_output = gr.Textbox(label="Transcription")

        start_button.click(start_recording, outputs=None)
        stop_button.click(stop_and_transcribe, outputs=transcription_output)

    return demo

if __name__ == "__main__":
    interface = build_interface()
    interface.launch()