Spaces:
Runtime error
Runtime error
from transformers import pipeline, AutoTokenizer, AutoModel | |
import torch | |
import torch.nn.functional as F | |
import streamlit as st | |
from PIL import Image | |
import requests | |
from io import BytesIO | |
import numpy as np | |
import pandas as pd | |
# Mean Pooling - Take attention mask into account for correct averaging | |
def mean_pooling(model_output, attention_mask): | |
token_embeddings = model_output[0] # First element of model_output contains all token embeddings | |
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() | |
return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9) | |
# Initialize the tokenizer and model | |
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2') | |
model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2') | |
# Initialize the image-to-text pipeline | |
image_to_text = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning") | |
st.title('Image Captioning and Review Visualization Application') | |
def get_embeddings(sentences): | |
# Tokenize sentences | |
encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt') | |
# Compute token embeddings | |
with torch.no_grad(): | |
model_output = model(**encoded_input) | |
# Perform pooling | |
sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask']) | |
# Normalize embeddings | |
return F.normalize(sentence_embeddings, p=2, dim=1) | |
# Load the dataset | |
def load_dataset(): | |
df = pd.read_csv('toys_and_games_reviews.csv') | |
review_texts = df['reviewText'].dropna().tolist() | |
review_embeddings = get_embeddings(review_texts).numpy() | |
return review_texts, review_embeddings | |
review_texts, review_embeddings = load_dataset() | |
# Find top N similar reviews | |
def find_top_n_similar_reviews(query_embedding, review_embeddings, review_texts, top_n=3): | |
similarities = torch.mm(query_embedding, review_embeddings.T).squeeze(0) | |
top_n_indices = torch.topk(similarities, top_n).indices.tolist() | |
return [review_texts[i] for i in top_n_indices] | |
input_type = st.radio("Select input type:", ("Upload Image", "Image URL", "Text")) | |
image = None | |
text_input = "" | |
# Image upload handling | |
if input_type == "Upload Image": | |
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) | |
if uploaded_file is not None: | |
image = Image.open(uploaded_file) | |
st.image(image, caption='Uploaded Image', use_column_width=True) | |
elif input_type == "Image URL": | |
image_url = st.text_input("Enter the image URL here:", "") | |
if image_url: | |
try: | |
response = requests.get(image_url) | |
image = Image.open(BytesIO(response.content)) | |
st.image(image, caption='Image from URL', use_column_width=True) | |
except Exception as e: | |
st.error(f"Error loading image from URL: {e}") | |
image = None | |
elif input_type == "Text": | |
text_input = st.text_area("Enter text here:", "") | |
if st.button('Generate Caption'): | |
if input_type in ["Upload Image", "Image URL"] and image: | |
with st.spinner("Generating caption..."): | |
result = image_to_text(image_url if input_type == "Image URL" else uploaded_file) | |
result_text = result[0]['generated_text'] if result else "Failed to generate caption." | |
elif input_type == "Text" and text_input: | |
result_text = text_input | |
else: | |
st.warning("Please upload an image, enter an image URL or input text") | |
result_text = "" | |
if result_text: | |
st.success(f'Generated Caption: {result_text}') | |
query_embedding = get_embeddings([result_text]).numpy() | |
similar_reviews = find_top_n_similar_reviews(torch.tensor(query_embedding).float(), torch.tensor(review_embeddings).float(), review_texts) | |
st.write("Similar Reviews Based on the Caption:") | |
for review in similar_reviews: | |
st.write(review) | |