maxiw commited on
Commit
4f199bf
1 Parent(s): a0bd6fb

added app implementation and reqs

Browse files
Files changed (2) hide show
  1. app.py +96 -4
  2. requirements.txt +5 -0
app.py CHANGED
@@ -1,7 +1,99 @@
1
  import gradio as gr
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from transformers import AutoModelForVision2Seq, AutoTokenizer, AutoImageProcessor, StoppingCriteria
3
+ import spaces
4
+ import torch
5
+ from PIL import Image
6
 
7
+ models = {
8
+ "Salesforce/xgen-mm-phi3-mini-instruct-r-v1": AutoModelForVision2Seq.from_pretrained("Salesforce/xgen-mm-phi3-mini-instruct-r-v1", trust_remote_code=True).to("cuda").eval(),
9
+ }
10
 
11
+ processors = {
12
+ "Salesforce/xgen-mm-phi3-mini-instruct-r-v1": AutoImageProcessor.from_pretrained("Salesforce/xgen-mm-phi3-mini-instruct-r-v1", trust_remote_code=True),
13
+ }
14
+
15
+ tokenizers = {
16
+ "Salesforce/xgen-mm-phi3-mini-instruct-r-v1": AutoTokenizer.from_pretrained("Salesforce/xgen-mm-phi3-mini-instruct-r-v1", trust_remote_code=True, use_fast=False, legacy=False)
17
+ }
18
+
19
+
20
+ DESCRIPTION = "# [XGen-MM Demo](https://huggingface.co/Salesforce/xgen-mm-phi3-mini-instruct-r-v1)"
21
+
22
+
23
+ def apply_prompt_template(prompt):
24
+ s = (
25
+ '<|system|>\nA chat between a curious user and an artificial intelligence assistant. '
26
+ "The assistant gives helpful, detailed, and polite answers to the user's questions.<|end|>\n"
27
+ f'<|user|>\n<image>\n{prompt}<|end|>\n<|assistant|>\n'
28
+ )
29
+ return s
30
+
31
+
32
+ class EosListStoppingCriteria(StoppingCriteria):
33
+ def __init__(self, eos_sequence = [32007]):
34
+ self.eos_sequence = eos_sequence
35
+
36
+ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
37
+ last_ids = input_ids[:,-len(self.eos_sequence):].tolist()
38
+ return self.eos_sequence in last_ids
39
+
40
+
41
+ @spaces.GPU
42
+ def run_example(image, text_input=None, model_id="Salesforce/xgen-mm-phi3-mini-instruct-r-v1"):
43
+ model = models[model_id]
44
+ processor = processors[model_id]
45
+ tokenizer = tokenizers[model_id]
46
+ tokenizer = model.update_special_tokens(tokenizer)
47
+
48
+ image = Image.fromarray(image).convert("RGB")
49
+ prompt = apply_prompt_template(text_input)
50
+ language_inputs = tokenizer([prompt], return_tensors="pt")
51
+
52
+ inputs = processor([image], return_tensors="pt", image_aspect_ratio='anyres')
53
+ inputs.update(language_inputs)
54
+ inputs = {name: tensor.cuda() for name, tensor in inputs.items()}
55
+
56
+ generated_text = model.generate(**inputs, image_size=[image.size],
57
+ pad_token_id=tokenizer.pad_token_id,
58
+ do_sample=False, max_new_tokens=768, top_p=None, num_beams=1,
59
+ stopping_criteria = [EosListStoppingCriteria()],
60
+ )
61
+
62
+ prediction = tokenizer.decode(generated_text[0], skip_special_tokens=True).split("<|end|>")[0]
63
+ return prediction
64
+ css = """
65
+ #output {
66
+ height: 500px;
67
+ overflow: auto;
68
+ border: 1px solid #ccc;
69
+ }
70
+ """
71
+
72
+ with gr.Blocks(css=css) as demo:
73
+ gr.Markdown(DESCRIPTION)
74
+ with gr.Tab(label="XGen-MM Input"):
75
+ with gr.Row():
76
+ with gr.Column():
77
+ input_img = gr.Image(label="Input Picture")
78
+ model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value="Salesforce/xgen-mm-phi3-mini-instruct-r-v1")
79
+ text_input = gr.Textbox(label="Question")
80
+ submit_btn = gr.Button(value="Submit")
81
+ with gr.Column():
82
+ output_text = gr.Textbox(label="Output Text")
83
+
84
+ gr.Examples(
85
+ examples=[
86
+ ["image1.jpg", "ScreenQA", "What is the version of the settings?"],
87
+ ["image1.jpg", "ScreenQA", "What is the state of use lower resolution images?"],
88
+ ["image2.jpg", "ScreenQA", "How much is the discount for the product?"]
89
+ ],
90
+ inputs=[input_img, text_input],
91
+ outputs=[output_text],
92
+ fn=run_example,
93
+ cache_examples=True,
94
+ label="Try examples"
95
+ )
96
+
97
+ submit_btn.click(run_example, [input_img, text_input, model_selector], [output_text])
98
+
99
+ demo.launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ spaces
2
+ transformers
3
+ open_clip_torch
4
+ einops
5
+ einops_exts