app / app.py
kedimestan's picture
Create app.py
36cf9cf verified
raw
history blame contribute delete
No virus
1.62 kB
import cv2
import easyocr
import numpy as np
from gtts import gTTS
import os
import pygame
import gradio as gr
# Initialize OCR reader and TTS system
reader = easyocr.Reader(['tr'])
pygame.mixer.init()
def capture_and_process():
# Open webcam, capture frame, and save it
capture = cv2.VideoCapture(0)
ret, frame = capture.read()
capture.release()
if not ret:
return "Failed to capture image", None
# Save the captured image
filename = 'captured_image.png'
cv2.imwrite(filename, frame)
# Run OCR on the saved image
results = reader.readtext(filename)
# Prepare text-to-speech for each detected text
detected_text = []
for result in results:
if result[1].strip() == "":
continue
text = result[1]
detected_text.append(text)
# Convert text to speech and play it
tts = gTTS(text=text.lower(), lang='tr')
tts.save("output.mp3")
pygame.mixer.music.load("output.mp3")
pygame.mixer.music.play()
# Wait until the speech is done
while pygame.mixer.music.get_busy():
pygame.time.Clock().tick(10)
# Return the captured image and detected text
return detected_text, frame[..., ::-1] # Convert BGR to RGB for display in Gradio
# Gradio interface
interface = gr.Interface(
fn=capture_and_process,
inputs=None,
outputs=[gr.outputs.Textbox(label="Detected Text"), gr.outputs.Image(type="numpy", label="Captured Image")],
live=True
)
# Launch the app
if __name__ == "__main__":
interface.launch()