import requests import streamlit as st from PIL import Image from transformers import CLIPModel, CLIPProcessor st.title("Most Similar Kanye West Song") # read lyrics from all kanye west songs with open("kanye_lyrics.txt", "r") as f: all_lyrics = f.read().split("\n\n\n\n") # load model and processor model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32") processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32") # get user input url = st.text_input("Enter an image url: ", "https://i.redd.it/0vsrxk3gcfr91.jpg") # get image from url image = Image.open(requests.get(url, stream=True).raw) st.image(url, width=700) # get the image-text similarity score inputs = processor(text=all_lyrics, images=image, return_tensors="pt", padding=True, truncation=True) outputs = model(**inputs) logits_per_image = outputs.logits_per_image # this is the image-text similarity score probs = logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities # get the most similar kanye song most_similar_song = all_lyrics[probs.argmax().item()] st.write(most_similar_song)