Jim_Aiden / app_failedTesting_20240316.py
AidenYan's picture
Rename app.py to app_failedTesting_20240316.py
a69636e verified
raw
history blame
4.02 kB
from transformers import pipeline, AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F
import streamlit as st
from PIL import Image
import requests
from io import BytesIO
import numpy as np
import pandas as pd
# Mean Pooling - Take attention mask into account for correct averaging
def mean_pooling(model_output, attention_mask):
token_embeddings = model_output[0] # First element of model_output contains all token embeddings
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
# Initialize the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
# Initialize the image-to-text pipeline
image_to_text = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
st.title('Image Captioning and Review Visualization Application')
def get_embeddings(sentences):
# Tokenize sentences
encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
# Compute token embeddings
with torch.no_grad():
model_output = model(**encoded_input)
# Perform pooling
sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
# Normalize embeddings
return F.normalize(sentence_embeddings, p=2, dim=1)
# Load the dataset
@st.cache(allow_output_mutation=True)
def load_dataset():
df = pd.read_csv('toys_and_games_reviews.csv')
review_texts = df['reviewText'].dropna().tolist()
review_embeddings = get_embeddings(review_texts).numpy()
return review_texts, review_embeddings
review_texts, review_embeddings = load_dataset()
# Find top N similar reviews
def find_top_n_similar_reviews(query_embedding, review_embeddings, review_texts, top_n=3):
similarities = torch.mm(query_embedding, review_embeddings.T).squeeze(0)
top_n_indices = torch.topk(similarities, top_n).indices.tolist()
return [review_texts[i] for i in top_n_indices]
input_type = st.radio("Select input type:", ("Upload Image", "Image URL", "Text"))
image = None
text_input = ""
# Image upload handling
if input_type == "Upload Image":
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
if uploaded_file is not None:
image = Image.open(uploaded_file)
st.image(image, caption='Uploaded Image', use_column_width=True)
elif input_type == "Image URL":
image_url = st.text_input("Enter the image URL here:", "")
if image_url:
try:
response = requests.get(image_url)
image = Image.open(BytesIO(response.content))
st.image(image, caption='Image from URL', use_column_width=True)
except Exception as e:
st.error(f"Error loading image from URL: {e}")
image = None
elif input_type == "Text":
text_input = st.text_area("Enter text here:", "")
if st.button('Generate Caption'):
if input_type in ["Upload Image", "Image URL"] and image:
with st.spinner("Generating caption..."):
result = image_to_text(image_url if input_type == "Image URL" else uploaded_file)
result_text = result[0]['generated_text'] if result else "Failed to generate caption."
elif input_type == "Text" and text_input:
result_text = text_input
else:
st.warning("Please upload an image, enter an image URL or input text")
result_text = ""
if result_text:
st.success(f'Generated Caption: {result_text}')
query_embedding = get_embeddings([result_text]).numpy()
similar_reviews = find_top_n_similar_reviews(torch.tensor(query_embedding).float(), torch.tensor(review_embeddings).float(), review_texts)
st.write("Similar Reviews Based on the Caption:")
for review in similar_reviews:
st.write(review)