File size: 1,244 Bytes
5756dad
6dafc63
5756dad
6dafc63
77b3326
 
6dafc63
 
 
 
77b3326
2f92f19
 
 
 
 
 
77b3326
 
 
 
 
 
 
 
 
 
5a0207c
77b3326
adf8326
6dafc63
 
2f92f19
 
5756dad
 
77b3326
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer

# Moondream does not support the HuggingFace pipeline system, so we have to do it manually
moondream_id = "vikhyatk/moondream2"
moondream_revision = "2024-04-02"
moondream_tokenizer = AutoTokenizer.from_pretrained(moondream_id, code_revision=moondream_revision)
moondream_model = AutoModelForCausalLM.from_pretrained(
    moondream_id, trust_remote_code=True, code_revision=moondream_revision
)


def answer_question(_img, _prompt):
    image_embeds = moondream_model.encode_image(_img)
    return moondream_model.answer_question(image_embeds, _prompt, moondream_tokenizer)


with gr.Blocks() as app:
    gr.Markdown(
        """
        # Food Identifier
        
        Final project for IAT 481 at Simon Fraser University, Spring 2024.
        """
    )
    with gr.Row():
        prompt = gr.Textbox(label="Input", value="Describe this image.")
        submit = gr.Button("Submit")
    with gr.Row():
        img = gr.Image(label="Image", type="pil")
        output = gr.TextArea(label="Output")

    submit.click(answer_question, [img, prompt], output)
    prompt.submit(answer_question, [img, prompt], output)

if __name__ == "__main__":
    app.launch()