import os
from transformers import BlipProcessor, BlipForConditionalGeneration
import gradio as gr

# Load the Hugging Face token from the environment using the secret name
HUGGINGFACE_TOKEN = os.getenv("Image_classification")

# Load the processor and model with the token
processor = BlipProcessor.from_pretrained(
    "quadranttechnologies/qhub-blip-image-captioning-finetuned", 
    use_auth_token=HUGGINGFACE_TOKEN
)
model = BlipForConditionalGeneration.from_pretrained(
    "quadranttechnologies/qhub-blip-image-captioning-finetuned", 
    use_auth_token=HUGGINGFACE_TOKEN
)

# Function to generate captions for uploaded images
def generate_caption(image):
    try:
        # Prepare the image inputs for the model
        inputs = processor(image, return_tensors="pt")

        # Generate the caption
        outputs = model.generate(**inputs)
        caption = processor.decode(outputs[0], skip_special_tokens=True)
        return caption
    except Exception as e:
        return f"Error generating caption: {e}"

# Set up the Gradio interface
interface = gr.Interface(
    fn=generate_caption,
    inputs=gr.Image(type="pil"),  # Accepts image uploads
    outputs="text",               # Displays generated captions as text
    title="Image Captioning Model",
    description="Upload an image to generate a caption using the fine-tuned BLIP model."
)

# Launch the Gradio app
if __name__ == "__main__":
    interface.launch(share=True)