akshit-g commited on
Commit
6c84df1
·
1 Parent(s): bc54caf

add : app.py

Browse files
Files changed (2) hide show
  1. app.py +101 -0
  2. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import time
3
+ from threading import Thread
4
+
5
+ import gradio as gr
6
+ import torch
7
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
8
+
9
+ from moondream.hf import LATEST_REVISION, detect_device
10
+
11
+ parser = argparse.ArgumentParser()
12
+ parser.add_argument("--cpu", action="store_true")
13
+ args = parser.parse_args()
14
+
15
+ if args.cpu:
16
+ device = torch.device("cpu")
17
+ dtype = torch.float32
18
+ else:
19
+ device, dtype = detect_device()
20
+ if device != torch.device("cpu"):
21
+ print("Using device:", device)
22
+ print("If you run into issues, pass the `--cpu` flag to this script.")
23
+ print()
24
+
25
+ model_id = "vikhyatk/moondream2"
26
+ tokenizer = AutoTokenizer.from_pretrained(model_id, revision=LATEST_REVISION)
27
+ moondream = AutoModelForCausalLM.from_pretrained(
28
+ model_id, trust_remote_code=True, revision=LATEST_REVISION
29
+ ).to(device=device, dtype=dtype)
30
+ moondream.eval()
31
+
32
+
33
+ def answer_question(img, prompt):
34
+ image_embeds = moondream.encode_image(img)
35
+ streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
36
+ thread = Thread(
37
+ target=moondream.answer_question,
38
+ kwargs={
39
+ "image_embeds": image_embeds,
40
+ "question": prompt,
41
+ "tokenizer": tokenizer,
42
+ "streamer": streamer,
43
+ },
44
+ )
45
+ thread.start()
46
+
47
+ buffer = ""
48
+ for new_text in streamer:
49
+ buffer += new_text
50
+ yield buffer
51
+
52
+
53
+ with gr.Blocks() as demo:
54
+ gr.Markdown("# See For Me")
55
+
56
+ gr.HTML(
57
+ """
58
+ <style type="text/css">
59
+ .md_output p {
60
+ padding-top: 1rem;
61
+ font-size: 1.2rem !important;
62
+ }
63
+ </style>
64
+ """
65
+ )
66
+
67
+ with gr.Row():
68
+ prompt = gr.Textbox(
69
+ label="Prompt",
70
+ value="What's going on? Respond with a single sentence.",
71
+ interactive=True,
72
+ )
73
+ with gr.Row():
74
+ img = gr.Image(type="pil", label="Upload an Image", streaming=True)
75
+ output = gr.Markdown(elem_classes=["md_output"])
76
+
77
+ latest_img = None
78
+ latest_prompt = prompt.value
79
+
80
+ @img.change(inputs=[img])
81
+ def img_change(img):
82
+ global latest_img
83
+ latest_img = img
84
+
85
+ @prompt.change(inputs=[prompt])
86
+ def prompt_change(prompt):
87
+ global latest_prompt
88
+ latest_prompt = prompt
89
+
90
+ @demo.load(outputs=[output])
91
+ def live_video():
92
+ while True:
93
+ if latest_img is None:
94
+ time.sleep(0.1)
95
+ else:
96
+ for text in answer_question(latest_img, latest_prompt):
97
+ if len(text) > 0:
98
+ yield text
99
+
100
+
101
+ demo.queue().launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ accelerate==0.32.1
2
+ huggingface-hub==0.24.0
3
+ Pillow==10.4.0
4
+ torch==2.3.1
5
+ torchvision==0.18.1
6
+ transformers==4.44.0
7
+ einops==0.8.0
8
+ gradio==4.38.1