akshit-g's picture
add : app.py
af62a64
raw
history blame
1.72 kB
from datasets import load_dataset
from tqdm import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer
from ..hf import detect_device
MODEL_ID = "vikhyatk/moondream2"
DEVICE, DTYPE = detect_device()
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
moondream = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
trust_remote_code=True,
attn_implementation="flash_attention_2",
torch_dtype=DTYPE,
device_map={"": DEVICE},
)
moondream.eval()
pope_dataset = load_dataset("vikhyatk/POPE", split="test")
stats = {
"random": (0, 0),
"popular": (0, 0),
"adversarial": (0, 0),
}
for row in tqdm(pope_dataset):
image = row["image"]
enc_image = moondream.encode_image(image)
for split in ["adversarial", "popular", "random"]:
for qa in row[split]:
question = qa["question"]
answer = qa["answer"]
prompt = f"{question}\nAnswer yes or no."
model_answer = moondream.answer_question(enc_image, prompt, tokenizer)
if model_answer.lower() == answer.lower():
stats[split] = (stats[split][0] + 1, stats[split][1] + 1)
else:
stats[split] = (stats[split][0], stats[split][1] + 1)
print(
"Random:",
stats["random"][0],
"/",
stats["random"][1],
":",
stats["random"][0] * 100.0 / stats["random"][1],
)
print(
"Popular:",
stats["popular"][0],
"/",
stats["popular"][1],
":",
stats["popular"][0] * 100.0 / stats["popular"][1],
)
print(
"Adversarial:",
stats["adversarial"][0],
"/",
stats["adversarial"][1],
":",
stats["adversarial"][0] * 100.0 / stats["adversarial"][1],
)