import gradio as gr import torch import os from PIL import Image import pandas as pd from transformers import CLIPProcessor, CLIPModel checkpoint = "vincentclaes/emoji-predictor" # x_, _, files = next(os.walk("./emojis")) # no_of_emojis = range(len(files)) # emojis_as_images = [Image.open(f"emojis/{i}.png") for i in no_of_emojis] adjectives = pd.read_table("./adjectives.txt", header=None)[0].to_list() K = 4 processor = CLIPProcessor.from_pretrained(checkpoint) model = CLIPModel.from_pretrained(checkpoint) def concat_images(*images): """Generate composite of all supplied images. https://stackoverflow.com/a/71315656/1771155 """ # Get the widest width. width = max(image.width for image in images) # Add up all the heights. height = max(image.height for image in images) # set the correct size of width and heigtht of composite. composite = Image.new("RGB", (2 * width, 2 * height)) assert K == 4, "We expect 4 suggestions, other numbers won't work." for i, image in enumerate(images): if i == 0: composite.paste(image, (0, 0)) elif i == 1: composite.paste(image, (width, 0)) elif i == 2: composite.paste(image, (0, height)) elif i == 3: composite.paste(image, (width, height)) return composite def get_tag(emoji, tags, model=model, processor=processor, K=4): if tags: tags = tags.strip().split(",") else: tags = adjectives inputs = processor( text=tags, images=emoji, return_tensors="pt", padding=True, truncation=True ) outputs = model(**inputs) # we take the softmax to get the label probabilities probs = outputs.logits_per_text.softmax(dim=0) probs_formatted = torch.tensor([prob[0] for prob in probs]) values, indices = probs_formatted.topk(K) return "Tag (confidence): " + ", ".join( [f"{tags[i]} ({round(v.item(), 2)})" for v, i in zip(values, indices)] ) title = "Tagging an Emoji" description = """You provide an Emoji and our few-shot fine tuned CLIP model will suggest some tags that are appropriate.\n We use the 228 most common adjectives in english](https://grammar.yourdictionary.com/parts-of-speech/adjectives/list-of-adjective-words.html).\n You can also specify your own custom tags, for example; love,hate,fun,bitterness. """ examples = [[f"emojis/{i}.png"] for i in range(32)] text = gr.inputs.Textbox( placeholder="Enter a text and we will try to predict an emoji..." ) gr.Interface( fn=get_tag, inputs=[ gr.components.Image(type="pil", label="emoji"), gr.components.Textbox( label="tags", placeholder="Provide a comma seperated list of tags; tag1,tag2,tag3,...", ), ], outputs=gr.Textbox(), examples=examples, examples_per_page=32, title=title, description=description, ).launch()