# -*- coding: utf-8 -*-
"""Untitled26.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/16MPkNsEDa6YvZI14Zf6yJ-FlaFaFRrx2
"""

from gtts import gTTS
from googletrans import Translator
from deep_translator import GoogleTranslator
import requests
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration
import os
from io import BytesIO
import gradio as gr
import tempfile
from gtts import gTTS

processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
def caption_and_translate(image, target_language):
    inputs = processor(image, return_tensors="pt")
    out = model.generate(**inputs)
    caption = processor.decode(out[0], skip_special_tokens=True)

    translated_caption = GoogleTranslator(source='en', target=target_language).translate(caption)

    # Create an audio file for the translated caption
    tts = gTTS(translated_caption, lang=target_language)
    audio_file_path = "translated_caption_audio.mp3"
    tts.save(audio_file_path)

    return caption, translated_caption, audio_file_path

image_input = gr.components.Image(type="pil")
target_language_input = gr.components.Dropdown(
    choices=["ar"],
    label="Target Language"
)

outputs = [
    gr.components.Textbox(label="Caption"),
    gr.components.Textbox(label="Translated Caption"),
    gr.components.Audio(type="filepath", label="Translated Caption Audio")
]

iface = gr.Interface(
    fn=caption_and_translate,
    inputs=[image_input, target_language_input],
    outputs=outputs,
    title="Translation Arabic Image Captioning with Arabic Voice Speech",
    description="Our application operates by utilizing image recognition techniques to identify and comprehend the content of an image. Following this, a relevant caption is generated through advanced natural language processing algorithms. The generated caption is then translated into Arabic language using translation tools and techniques. Finally, the Arabic translated caption is converted into a voice speech output through text-to-speech technology.",
    article="""
    
    Developed by:
    Saad Alabdulsalam
    Abdullah Alfawaz
    Abdulaziz Matar
    
    contact info :
    Email Saad :saadbinabdullah121@gmail.com
    Email Abdullah:alfawaz.ab1@gmail.com
    Email Abdulaziz:qpazoz@gmail.com
    """
)

iface.launch()