import streamlit as st
from transformers import AutoTokenizer, AutoModel, pipeline as transformers_pipeline, AutoModelForCausalLM
from diffusers import DiffusionPipeline
import requests
from PIL import Image
import io
import torch
import torch.nn.functional as F
import pandas as pd

# Function for mean pooling of embeddings
def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0]  # First element of model_output contains all token embeddings
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
    sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
    return sum_embeddings / sum_mask

# Load model and tokenizer from HuggingFace Hub for sentence embeddings
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')

def load_image(input_type, uploaded_file=None, image_url=""):
    """
    Loads an image from an uploaded file or URL.
    """
    if input_type == "Upload Image" and uploaded_file is not None:
        return Image.open(io.BytesIO(uploaded_file.getvalue()))
    elif input_type == "Image URL" and image_url:
        try:
            response = requests.get(image_url)
            return Image.open(io.BytesIO(response.content))
        except Exception as e:
            st.error(f"Error loading image from URL: {e}")
            return None

def image_to_caption(image, input_type, uploaded_file, image_url):
    """
    Generates a caption for the given image.
    """
    image_to_text_pipeline = transformers_pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
    if input_type == "Upload Image" and uploaded_file:
        return image_to_text_pipeline(uploaded_file.getvalue())[0]['generated_text']
    elif input_type == "Image URL" and image_url:
        return image_to_text_pipeline(image_url)[0]['generated_text']
    return ""

def select_closest_sentence(generated_text):
    """
    Selects the sentence closest in meaning to the generated_text.
    """
    # Load CSV data
    df = pd.read_csv('toys_and_games_reviews.csv', encoding='ISO-8859-1')
    sentences = df.iloc[:, -1].tolist()  # Assuming the last column contains sentences

    # Tokenize and compute embeddings for sentences from CSV
    encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
    with torch.no_grad():
        model_output = model(**encoded_input)
    sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
    sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
    
    # Tokenize and compute embedding for the generated_text
    encoded_new_sentence = tokenizer([generated_text], padding=True, truncation=True, return_tensors='pt')
    with torch.no_grad():
        model_output_new_sentence = model(**encoded_new_sentence)
    new_sentence_embedding = mean_pooling(model_output_new_sentence, encoded_new_sentence['attention_mask'])
    new_sentence_embedding = F.normalize(new_sentence_embedding, p=2, dim=1)

    # Find the most similar sentence in your corpus
    most_similar_idx = F.cosine_similarity(new_sentence_embedding, sentence_embeddings).topk(1).indices.item()
    most_similar_sentence = sentences[most_similar_idx]

    return most_similar_sentence

def generate_text_from_caption(caption):
    """
    Generates text based on the provided caption.
    """
    text_generator = transformers_pipeline('text-generation', model='pranavpsv/genre-story-generator-v2')
    generated = text_generator(caption, max_length=100, num_return_sequences=1)
    return generated[0]['generated_text']

def main():
    st.title('Image to Story to Image Converter')

    # User interface for input selection
    input_type = st.radio("Select input type:", ("Upload Image", "Image URL"))
    uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) if input_type == "Upload Image" else None
    image_url = st.text_input("Enter the image URL