Dacho688 commited on
Commit
3ce0980
·
unverified ·
1 Parent(s): 63908c5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -0
app.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from huggingface_hub import login
4
+ from transformers import load_tool
5
+ from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
6
+ import torch
7
+ from PIL import Image
8
+ import spaces
9
+
10
+ #login(os.getenv("HUGGINGFACEHUB_API_TOKEN"))
11
+ processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf")
12
+ model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True)
13
+ model.to("cuda")
14
+
15
+ @spaces.GPU(duration=40)
16
+ def DocChat(question, history):
17
+ print(question)
18
+ if question["files"]:
19
+ image = question["files"][-1]["path"]
20
+ else:
21
+ # if there's no image uploaded for this turn, look for images in the past turns
22
+ # kept inside tuples, take the last one
23
+ for hist in history:
24
+ if type(hist[0])==tuple:
25
+ image = hist[0][0]
26
+
27
+ if image is None:
28
+ gr.Error("You need to upload an image for LLaVA to work.")
29
+
30
+ prompt=f"[INST] <image>\n{question['text']} [/INST]"
31
+ image = Image.open(image).convert("RGB")
32
+ inputs = processor(prompt, image, return_tensors="pt").to("cuda")
33
+
34
+ output = model.generate(**inputs, max_new_tokens=500)
35
+ outputmsg = processor.decode(output[0], skip_special_tokens=True)
36
+
37
+ generated_text_without_prompt = outputmsg[len(prompt)-5:]
38
+ yield generated_text_without_prompt
39
+
40
+ demo = gr.ChatInterface(fn=DocChat, title="Image Chatbot", description="Chat with your images/documents with LLaVA NeXT.",
41
+ stop_btn="Stop Generation", multimodal=True)
42
+
43
+ if __name__ == "__main__":
44
+ demo.launch(debug=True)