# -*- coding: utf-8 -*- """Untitled26.ipynb Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/16MPkNsEDa6YvZI14Zf6yJ-FlaFaFRrx2 """ from gtts import gTTS from googletrans import Translator from deep_translator import GoogleTranslator import requests from PIL import Image from transformers import BlipProcessor, BlipForConditionalGeneration import os from io import BytesIO import gradio as gr import tempfile from gtts import gTTS processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") def caption_and_translate(image, target_language): inputs = processor(image, return_tensors="pt") out = model.generate(**inputs) caption = processor.decode(out[0], skip_special_tokens=True) translated_caption = GoogleTranslator(source='en', target=target_language).translate(caption) # Create an audio file for the translated caption tts = gTTS(translated_caption, lang=target_language) audio_file_path = "translated_caption_audio.mp3" tts.save(audio_file_path) return caption, translated_caption, audio_file_path image_input = gr.components.Image(type="pil") target_language_input = gr.components.Dropdown( choices=["ar"], label="Target Language" ) outputs = [ gr.components.Textbox(label="Caption"), gr.components.Textbox(label="Translated Caption"), gr.components.Audio(type="filepath", label="Translated Caption Audio") ] iface = gr.Interface( fn=caption_and_translate, inputs=[image_input, target_language_input], outputs=outputs, title="Translation Arabic Image Captioning with Arabic Voice Speech", description="Our application operates by utilizing image recognition techniques to identify and comprehend the content of an image. Following this, a relevant caption is generated through advanced natural language processing algorithms. The generated caption is then translated into Arabic language using translation tools and techniques. Finally, the Arabic translated caption is converted into a voice speech output through text-to-speech technology.", article=""" Developed by: Saad Alabdulsalam Abdullah Alfawaz Abdulaziz Matar contact info : Email Saad :saadbinabdullah121@gmail.com Email Abdullah:alfawaz.ab1@gmail.com Email Abdulaziz:qpazoz@gmail.com """ ) iface.launch()