import cv2
import easyocr
import numpy as np
from gtts import gTTS
import os
import pygame
import gradio as gr

# Initialize OCR reader and TTS system
reader = easyocr.Reader(['tr'])
pygame.mixer.init()

def capture_and_process():
    # Open webcam, capture frame, and save it
    capture = cv2.VideoCapture(0)
    ret, frame = capture.read()
    capture.release()
    
    if not ret:
        return "Failed to capture image", None
    
    # Save the captured image
    filename = 'captured_image.png'
    cv2.imwrite(filename, frame)
    
    # Run OCR on the saved image
    results = reader.readtext(filename)
    
    # Prepare text-to-speech for each detected text
    detected_text = []
    for result in results:
        if result[1].strip() == "":
            continue
        text = result[1]
        detected_text.append(text)
        
        # Convert text to speech and play it
        tts = gTTS(text=text.lower(), lang='tr')
        tts.save("output.mp3")
        pygame.mixer.music.load("output.mp3")
        pygame.mixer.music.play()
        
        # Wait until the speech is done
        while pygame.mixer.music.get_busy():
            pygame.time.Clock().tick(10)
    
    # Return the captured image and detected text
    return detected_text, frame[..., ::-1]  # Convert BGR to RGB for display in Gradio

# Gradio interface
interface = gr.Interface(
    fn=capture_and_process, 
    inputs=None, 
    outputs=[gr.outputs.Textbox(label="Detected Text"), gr.outputs.Image(type="numpy", label="Captured Image")],
    live=True
)

# Launch the app
if __name__ == "__main__":
    interface.launch()