kedimestan commited on
Commit
36cf9cf
β€’
1 Parent(s): b0d1a94

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -0
app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import easyocr
3
+ import numpy as np
4
+ from gtts import gTTS
5
+ import os
6
+ import pygame
7
+ import gradio as gr
8
+
9
+ # Initialize OCR reader and TTS system
10
+ reader = easyocr.Reader(['tr'])
11
+ pygame.mixer.init()
12
+
13
+ def capture_and_process():
14
+ # Open webcam, capture frame, and save it
15
+ capture = cv2.VideoCapture(0)
16
+ ret, frame = capture.read()
17
+ capture.release()
18
+
19
+ if not ret:
20
+ return "Failed to capture image", None
21
+
22
+ # Save the captured image
23
+ filename = 'captured_image.png'
24
+ cv2.imwrite(filename, frame)
25
+
26
+ # Run OCR on the saved image
27
+ results = reader.readtext(filename)
28
+
29
+ # Prepare text-to-speech for each detected text
30
+ detected_text = []
31
+ for result in results:
32
+ if result[1].strip() == "":
33
+ continue
34
+ text = result[1]
35
+ detected_text.append(text)
36
+
37
+ # Convert text to speech and play it
38
+ tts = gTTS(text=text.lower(), lang='tr')
39
+ tts.save("output.mp3")
40
+ pygame.mixer.music.load("output.mp3")
41
+ pygame.mixer.music.play()
42
+
43
+ # Wait until the speech is done
44
+ while pygame.mixer.music.get_busy():
45
+ pygame.time.Clock().tick(10)
46
+
47
+ # Return the captured image and detected text
48
+ return detected_text, frame[..., ::-1] # Convert BGR to RGB for display in Gradio
49
+
50
+ # Gradio interface
51
+ interface = gr.Interface(
52
+ fn=capture_and_process,
53
+ inputs=None,
54
+ outputs=[gr.outputs.Textbox(label="Detected Text"), gr.outputs.Image(type="numpy", label="Captured Image")],
55
+ live=True
56
+ )
57
+
58
+ # Launch the app
59
+ if __name__ == "__main__":
60
+ interface.launch()