import cv2 import easyocr import numpy as np from gtts import gTTS import os import pygame import gradio as gr # Initialize OCR reader and TTS system reader = easyocr.Reader(['tr']) pygame.mixer.init() def capture_and_process(): # Open webcam, capture frame, and save it capture = cv2.VideoCapture(0) ret, frame = capture.read() capture.release() if not ret: return "Failed to capture image", None # Save the captured image filename = 'captured_image.png' cv2.imwrite(filename, frame) # Run OCR on the saved image results = reader.readtext(filename) # Prepare text-to-speech for each detected text detected_text = [] for result in results: if result[1].strip() == "": continue text = result[1] detected_text.append(text) # Convert text to speech and play it tts = gTTS(text=text.lower(), lang='tr') tts.save("output.mp3") pygame.mixer.music.load("output.mp3") pygame.mixer.music.play() # Wait until the speech is done while pygame.mixer.music.get_busy(): pygame.time.Clock().tick(10) # Return the captured image and detected text return detected_text, frame[..., ::-1] # Convert BGR to RGB for display in Gradio # Gradio interface interface = gr.Interface( fn=capture_and_process, inputs=None, outputs=[gr.outputs.Textbox(label="Detected Text"), gr.outputs.Image(type="numpy", label="Captured Image")], live=True ) # Launch the app if __name__ == "__main__": interface.launch()