Spaces:

maxiw
/

XGen-MM

Running on Zero

App Files Files Community

maxiw commited on Aug 19

Commit

4f199bf

•

1 Parent(s): a0bd6fb

added app implementation and reqs

Browse files

Files changed (2) hide show

app.py +96 -4
requirements.txt +5 -0

app.py CHANGED Viewed

@@ -1,7 +1,99 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

 import gradio as gr
+from transformers import AutoModelForVision2Seq, AutoTokenizer, AutoImageProcessor, StoppingCriteria
+import spaces
+import torch
+from PIL import Image
+models = {
+    "Salesforce/xgen-mm-phi3-mini-instruct-r-v1": AutoModelForVision2Seq.from_pretrained("Salesforce/xgen-mm-phi3-mini-instruct-r-v1", trust_remote_code=True).to("cuda").eval(),
+}
+processors = {
+    "Salesforce/xgen-mm-phi3-mini-instruct-r-v1": AutoImageProcessor.from_pretrained("Salesforce/xgen-mm-phi3-mini-instruct-r-v1", trust_remote_code=True),
+}
+tokenizers = {
+    "Salesforce/xgen-mm-phi3-mini-instruct-r-v1": AutoTokenizer.from_pretrained("Salesforce/xgen-mm-phi3-mini-instruct-r-v1", trust_remote_code=True, use_fast=False, legacy=False)
+}
+DESCRIPTION = "# [XGen-MM Demo](https://huggingface.co/Salesforce/xgen-mm-phi3-mini-instruct-r-v1)"
+def apply_prompt_template(prompt):
+    s = (
+        '<|system|>\nA chat between a curious user and an artificial intelligence assistant. '
+        "The assistant gives helpful, detailed, and polite answers to the user's questions.<|end|>\n"
+        f'<|user|>\n<image>\n{prompt}<|end|>\n<|assistant|>\n'
+    )
+    return s
+class EosListStoppingCriteria(StoppingCriteria):
+    def __init__(self, eos_sequence = [32007]):
+        self.eos_sequence = eos_sequence
+    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
+        last_ids = input_ids[:,-len(self.eos_sequence):].tolist()
+        return self.eos_sequence in last_ids
+@spaces.GPU
+def run_example(image, text_input=None, model_id="Salesforce/xgen-mm-phi3-mini-instruct-r-v1"):
+    model = models[model_id]
+    processor = processors[model_id]
+    tokenizer = tokenizers[model_id]
+    tokenizer = model.update_special_tokens(tokenizer)
+    image = Image.fromarray(image).convert("RGB")
+    prompt = apply_prompt_template(text_input)
+    language_inputs = tokenizer([prompt], return_tensors="pt")
+    inputs = processor([image], return_tensors="pt", image_aspect_ratio='anyres')
+    inputs.update(language_inputs)
+    inputs = {name: tensor.cuda() for name, tensor in inputs.items()}
+    generated_text = model.generate(**inputs, image_size=[image.size],
+        pad_token_id=tokenizer.pad_token_id,
+        do_sample=False, max_new_tokens=768, top_p=None, num_beams=1,
+        stopping_criteria = [EosListStoppingCriteria()],
+    )
+    prediction = tokenizer.decode(generated_text[0], skip_special_tokens=True).split("<|end|>")[0]
+    return prediction
+css = """
+  #output {
+    height: 500px;
+    overflow: auto;
+    border: 1px solid #ccc;
+  }
+"""
+with gr.Blocks(css=css) as demo:
+    gr.Markdown(DESCRIPTION)
+    with gr.Tab(label="XGen-MM Input"):
+        with gr.Row():
+            with gr.Column():
+                input_img = gr.Image(label="Input Picture")
+                model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value="Salesforce/xgen-mm-phi3-mini-instruct-r-v1")
+                text_input = gr.Textbox(label="Question")
+                submit_btn = gr.Button(value="Submit")
+            with gr.Column():
+                output_text = gr.Textbox(label="Output Text")
+        gr.Examples(
+            examples=[
+                ["image1.jpg", "ScreenQA", "What is the version of the settings?"],
+                ["image1.jpg", "ScreenQA", "What is the state of use lower resolution images?"],
+                ["image2.jpg", "ScreenQA", "How much is the discount for the product?"]
+            ],
+            inputs=[input_img, text_input],
+            outputs=[output_text],
+            fn=run_example,
+            cache_examples=True,
+            label="Try examples"
+        )
+        submit_btn.click(run_example, [input_img, text_input, model_selector], [output_text])
+demo.launch(debug=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+spaces
+transformers
+open_clip_torch
+einops
+einops_exts