Spaces:
Runtime error
Runtime error
File size: 2,903 Bytes
6a8ca1f 04fc1f1 6a8ca1f e9cc0b5 6a8ca1f de50a7e 04fc1f1 ee5e19e 134e8f7 db2ea29 ee5e19e db2ea29 3f71d24 db2ea29 471f9af 3f71d24 f2ab852 3f71d24 f2ab852 3f71d24 134e8f7 6a8ca1f ee5e19e 471f9af fefde70 6a8ca1f 69cfbe8 e9ecb71 69cfbe8 6a8ca1f f2ab852 471f9af 3f71d24 6a8ca1f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import spaces
import torch
import re
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
from PIL import Image
if torch.cuda.is_available():
device, dtype = "cuda", torch.float16
else:
device, dtype = "cpu", torch.float32
model_id = "vikhyatk/moondream2"
revision = "2024-04-02"
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
moondream = AutoModelForCausalLM.from_pretrained(
model_id, trust_remote_code=True, revision=revision, torch_dtype=dtype
).to(device=device)
moondream.eval()
@spaces.GPU
def answer_questions(image_tuples, prompt_text):
result = ""
Q_and_A = ""
prompts = [p.strip() for p in prompt_text.split(',')] # Splitting and cleaning prompts
image_embeds = [img[0] for img in image_tuples if img[0] is not None] # Extracting images from tuples, ignoring None
print(f"\nprompts: {prompts}\n\n")
answers = []
for prompt in prompts:
image_answers = moondream.batch_answer(
images=[img.convert("RGB") for img in image_embeds],
prompts=[prompt] * len(image_embeds),
tokenizer=tokenizer,
)
answers.append(image_answers)
for i, prompt in enumerate(prompts):
Q_and_A += f"###Q: {prompt}\n"
for j, image_tuple in enumerate(image_tuples):
image_name = f"image{j+1}"
answer_text = answers[i][j] # Retrieve the answer for the i-th prompt for the j-th image
Q_and_A += f"**{image_name} A:**\n{answer_text}\n\n"
result = {'headers': prompts, 'data': answers} # Updated result handling
print(f"result\n{result}\n\nQ_and_A\n{Q_and_A}\n\n")
return Q_and_A, result
with gr.Blocks() as demo:
gr.Markdown("# moondream2 unofficial batch processing demo")
gr.Markdown("1. Select images\n2. Enter one or more prompts separated by commas. Ex: Describe this image, What is in this image?\n\n")
gr.Markdown("**Currently each image will be sent as a batch with the prompts thus asking each promp on each image**")
gr.Markdown("*Running on free CPU space tier currently so results may take a bit to process compared to duplicating space and using GPU space hardware*")
gr.Markdown("## π moondream2\nA tiny vision language model. [GitHub](https://github.com/vikhyatk/moondream)")
with gr.Row():
img = gr.Gallery(label="Upload Images", type="pil")
with gr.Row():
prompt = gr.Textbox(label="Input Prompts", placeholder="Enter prompts (one prompt for each image provided) separated by commas. Ex: Describe this image, What is in this image?", lines=8)
with gr.Row():
submit = gr.Button("Submit")
output = gr.Markdown(label="Questions and Answers")
output2 = gr.Dataframe(label="Structured Dataframe", type="array",wrap=True)
submit.click(answer_questions, [img, prompt], [output, output2])
demo.queue().launch()
|