Sarath0x8f commited on
Commit
d69917c
·
verified ·
1 Parent(s): 84247c4

Upload 9 files

Browse files
Files changed (9) hide show
  1. Audio/output.wav +0 -0
  2. Audio/translate.wav +0 -0
  3. ObjCharRec.py +25 -0
  4. SpllingChecker.py +8 -0
  5. app.py +53 -0
  6. demo_app.py +48 -0
  7. main.py +15 -0
  8. requirements.txt +0 -0
  9. translate_speak.py +50 -0
Audio/output.wav ADDED
Binary file (168 kB). View file
 
Audio/translate.wav ADDED
Binary file (221 kB). View file
 
ObjCharRec.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from paddleocr import PaddleOCR
2
+
3
+ import translate_speak
4
+
5
+
6
+ def ocr_with_paddle(img):
7
+ """
8
+ Paddle OCR
9
+ """
10
+ try:
11
+ finaltext = ''
12
+ ocr = PaddleOCR(lang='en', use_angle_cls=True)
13
+ result = ocr.ocr(img)
14
+
15
+ for i in range(len(result[0])):
16
+ text = result[0][i][1][0]
17
+ finaltext += ' ' + text
18
+
19
+ audio_path = translate_speak.audio_streaming(txt=finaltext, to=1)
20
+ return finaltext, audio_path
21
+ except:
22
+ return "An err occurred upload image"
23
+
24
+ if __name__ == "__main__":
25
+ print(ocr_with_paddle('Images/download.jpeg'))
SpllingChecker.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from textblob import TextBlob
2
+
3
+ s = "i m lve in wth you"
4
+ print("original text: "+str(s))
5
+
6
+ b = TextBlob(s)
7
+
8
+ print("corrected text: "+str(b.correct()))
app.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import ObjCharRec
3
+ from deep_translator import GoogleTranslator
4
+
5
+ import demo_app
6
+ import translate_speak
7
+
8
+ langs_list = GoogleTranslator().get_supported_languages()
9
+ langs_dict = GoogleTranslator().get_supported_languages(as_dict=True)
10
+
11
+ with gr.Blocks() as main_interface:
12
+ gr.Markdown("# OCR")
13
+ with gr.Tabs():
14
+ with gr.TabItem("Intro"):
15
+ pass
16
+
17
+ with gr.TabItem("Simple OCR"):
18
+ gr.Markdown("Paddle OCR")
19
+ with gr.Row():
20
+ with gr.Column():
21
+ image_input = gr.Image(label="Upload Image")
22
+ with gr.Row():
23
+ clear_btn = gr.ClearButton()
24
+ submit_btn = gr.Button("Submit")
25
+ output_text = gr.Text(label="Output")
26
+
27
+ submit_btn.click(fn=ObjCharRec.ocr_with_paddle, inputs=image_input, outputs=output_text)
28
+ clear_btn.click(lambda :[None, None], outputs=[image_input, output_text])
29
+
30
+ with gr.TabItem("translator"):
31
+ with gr.Row():
32
+ with gr.Column():
33
+ image_input = gr.Image(label="Upload Image")
34
+ with gr.Row():
35
+ clear_btn = gr.ClearButton()
36
+ submit_btn = gr.Button("Submit")
37
+ with gr.Column():
38
+ with gr.Row():
39
+ output_text = gr.Text(label="Output")
40
+ audio_out = gr.Audio(label="Streamed Audio")
41
+ lang_drop = gr.Dropdown(langs_dict, label="language", interactive=True)
42
+ translate_btn = gr.Button("Translate")
43
+ with gr.Row():
44
+ translated_txt = gr.Text(label="translated text")
45
+ translated_out = gr.Audio(label="Streamed Audio")
46
+
47
+
48
+ submit_btn.click(fn=ObjCharRec.ocr_with_paddle, inputs=image_input, outputs=[output_text, audio_out])
49
+ translate_btn.click(fn=translate_speak.translate_txt, inputs=[lang_drop, output_text], outputs=[translated_txt, translated_out])
50
+ clear_btn.click(lambda :[None]*5, outputs=[image_input, output_text, translated_txt, translated_out, audio_out])
51
+
52
+ if __name__ == "__main__":
53
+ main_interface.launch()
demo_app.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import soundfile as sf
3
+ import numpy as np
4
+ import tempfile
5
+ import os
6
+
7
+ # Define the file path of the audio file you want to play directly
8
+ direct_audio_file_path = "Audio/translated_audio.wav" # Replace this with the actual file path
9
+
10
+ # Function to handle audio streaming
11
+ def audio_streaming(audio=None):
12
+ # If an audio file is provided as input, use it; otherwise, use the direct file path
13
+ if audio is None:
14
+ audio = direct_audio_file_path
15
+
16
+ # Load the audio file
17
+ data, samplerate = sf.read(audio)
18
+
19
+ # Ensure data is in float32 format
20
+ data = np.array(data, dtype=np.float32)
21
+
22
+ # Save to a temporary file that Gradio can use for audio playback
23
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
24
+ sf.write(tmp_file.name, data, samplerate)
25
+ temp_audio_path = tmp_file.name
26
+
27
+ # Return the file path to Gradio
28
+ return temp_audio_path
29
+
30
+ # Gradio interface
31
+ with gr.Blocks() as demo:
32
+ gr.Markdown("### Audio Streaming App")
33
+
34
+ # Button to play audio from the predefined file path
35
+ play_button = gr.Button("Play Direct Audio")
36
+
37
+ # Define output for streamed audio
38
+ audio_output = gr.Audio(label="Streamed Audio")
39
+
40
+ # Set up the Gradio interface to handle the button click
41
+ play_button.click(
42
+ fn=audio_streaming,
43
+ inputs=None, # No input needed for direct play
44
+ outputs=audio_output
45
+ )
46
+
47
+ if __name__ == "__main__":
48
+ demo.launch()
main.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ObjCharRec import ocr_with_paddle
2
+ from textblob import TextBlob
3
+
4
+ def demo():
5
+ paths = ['Images/download.jpeg', 'Images/download.png', 'Images/hq720.jpg', 'Images/testocr.png']
6
+
7
+ l = []
8
+ for img in paths:
9
+ text = ocr_with_paddle(img)
10
+ txtblob = TextBlob(text)
11
+ l.append(str(txtblob.correct()))
12
+ return l
13
+
14
+ if __name__ == "__main__":
15
+ print(demo())
requirements.txt ADDED
Binary file (156 Bytes). View file
 
translate_speak.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from gtts import gTTS
3
+ from deep_translator import GoogleTranslator
4
+ import soundfile as sf
5
+ import tempfile
6
+ import numpy as np
7
+ import gtts
8
+
9
+ output_path = 'Audio/output.wav'
10
+ translate_path = 'Audio/translate.wav'
11
+
12
+ def audio_streaming(txt=None, lang='en', to=None):
13
+ # If an audio file is provided as input, use it; otherwise, use the direct file path
14
+ speak = gTTS(text=txt, lang=lang, slow=False)
15
+ if to == 1:
16
+ audio = output_path
17
+ else:
18
+ audio = translate_path
19
+ speak.save(audio)
20
+
21
+ # Load the audio file
22
+ data, samplerate = sf.read(audio)
23
+
24
+ # Ensure data is in float32 format
25
+ data = np.array(data, dtype=np.float32)
26
+
27
+ # Save to a temporary file that Gradio can use for audio playback
28
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
29
+ sf.write(tmp_file.name, data, samplerate)
30
+ temp_audio_path = tmp_file.name
31
+
32
+ # Return the file path to Gradio
33
+ return temp_audio_path
34
+
35
+ def translate_txt(lang, text):
36
+ translator = GoogleTranslator(source="en", target=lang)
37
+ translated_text = translator.translate(text)
38
+ audio_path = audio_streaming(translated_text, lang='en', to=2)
39
+
40
+ return translated_text, audio_path
41
+
42
+ if __name__ == "__main__":
43
+ # print(audio_streaming("hello world"))
44
+ # os.system(f"start {audio_streaming('hello world!')}")
45
+ translate = set(GoogleTranslator().get_supported_languages(as_dict=True))
46
+ speak = set(gtts.lang.tts_langs())
47
+ not_speak = translate - speak
48
+ print(not_speak, len(not_speak))
49
+
50
+