maxiw commited on
Commit
e25f9d4
·
1 Parent(s): f6c705c

added v1.5 model

Browse files
Files changed (1) hide show
  1. app.py +44 -18
app.py CHANGED
@@ -6,18 +6,21 @@ from PIL import Image
6
 
7
  models = {
8
  "Salesforce/xgen-mm-phi3-mini-instruct-r-v1": AutoModelForVision2Seq.from_pretrained("Salesforce/xgen-mm-phi3-mini-instruct-r-v1", trust_remote_code=True).to("cuda").eval(),
 
9
  }
10
 
11
  processors = {
12
  "Salesforce/xgen-mm-phi3-mini-instruct-r-v1": AutoImageProcessor.from_pretrained("Salesforce/xgen-mm-phi3-mini-instruct-r-v1", trust_remote_code=True),
 
13
  }
14
 
15
  tokenizers = {
16
- "Salesforce/xgen-mm-phi3-mini-instruct-r-v1": AutoTokenizer.from_pretrained("Salesforce/xgen-mm-phi3-mini-instruct-r-v1", trust_remote_code=True, use_fast=False, legacy=False)
 
17
  }
18
 
19
 
20
- DESCRIPTION = "# [XGen-MM Demo](https://huggingface.co/Salesforce/xgen-mm-phi3-mini-instruct-r-v1)"
21
 
22
 
23
  def apply_prompt_template(prompt):
@@ -39,25 +42,48 @@ class EosListStoppingCriteria(StoppingCriteria):
39
 
40
 
41
  @spaces.GPU
42
- def run_example(image, text_input=None, model_id="Salesforce/xgen-mm-phi3-mini-instruct-r-v1"):
43
  model = models[model_id]
44
  processor = processors[model_id]
45
  tokenizer = tokenizers[model_id]
46
  tokenizer = model.update_special_tokens(tokenizer)
47
 
48
- image = Image.fromarray(image).convert("RGB")
49
- prompt = apply_prompt_template(text_input)
50
- language_inputs = tokenizer([prompt], return_tensors="pt")
51
-
52
- inputs = processor([image], return_tensors="pt", image_aspect_ratio='anyres')
53
- inputs.update(language_inputs)
54
- inputs = {name: tensor.cuda() for name, tensor in inputs.items()}
55
-
56
- generated_text = model.generate(**inputs, image_size=[image.size],
57
- pad_token_id=tokenizer.pad_token_id,
58
- do_sample=False, max_new_tokens=768, top_p=None, num_beams=1,
59
- stopping_criteria = [EosListStoppingCriteria()],
60
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  prediction = tokenizer.decode(generated_text[0], skip_special_tokens=True).split("<|end|>")[0]
63
  return prediction
@@ -71,11 +97,11 @@ css = """
71
 
72
  with gr.Blocks(css=css) as demo:
73
  gr.Markdown(DESCRIPTION)
74
- with gr.Tab(label="XGen-MM Input"):
75
  with gr.Row():
76
  with gr.Column():
77
  input_img = gr.Image(label="Input Picture")
78
- model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value="Salesforce/xgen-mm-phi3-mini-instruct-r-v1")
79
  text_input = gr.Textbox(label="Question")
80
  submit_btn = gr.Button(value="Submit")
81
  with gr.Column():
 
6
 
7
  models = {
8
  "Salesforce/xgen-mm-phi3-mini-instruct-r-v1": AutoModelForVision2Seq.from_pretrained("Salesforce/xgen-mm-phi3-mini-instruct-r-v1", trust_remote_code=True).to("cuda").eval(),
9
+ "Salesforce/xgen-mm-phi3-mini-instruct-interleave-r-v1.5": AutoModelForVision2Seq.from_pretrained("Salesforce/xgen-mm-phi3-mini-instruct-interleave-r-v1.5", trust_remote_code=True).to("cuda").eval()
10
  }
11
 
12
  processors = {
13
  "Salesforce/xgen-mm-phi3-mini-instruct-r-v1": AutoImageProcessor.from_pretrained("Salesforce/xgen-mm-phi3-mini-instruct-r-v1", trust_remote_code=True),
14
+ "Salesforce/xgen-mm-phi3-mini-instruct-interleave-r-v1.5": AutoImageProcessor.from_pretrained("Salesforce/xgen-mm-phi3-mini-instruct-interleave-r-v1.5", trust_remote_code=True)
15
  }
16
 
17
  tokenizers = {
18
+ "Salesforce/xgen-mm-phi3-mini-instruct-r-v1": AutoTokenizer.from_pretrained("Salesforce/xgen-mm-phi3-mini-instruct-r-v1", trust_remote_code=True, use_fast=False, legacy=False),
19
+ "Salesforce/xgen-mm-phi3-mini-instruct-interleave-r-v1.5": AutoTokenizer.from_pretrained("Salesforce/xgen-mm-phi3-mini-instruct-interleave-r-v1.5", trust_remote_code=True, use_fast=False, legacy=False)
20
  }
21
 
22
 
23
+ DESCRIPTION = "# [xGen-MM Demo](https://huggingface.co/collections/Salesforce/xgen-mm-1-models-662971d6cecbf3a7f80ecc2e)"
24
 
25
 
26
  def apply_prompt_template(prompt):
 
42
 
43
 
44
  @spaces.GPU
45
+ def run_example(image, text_input=None, model_id="Salesforce/xgen-mm-phi3-mini-instruct-interleave-r-v1.5"):
46
  model = models[model_id]
47
  processor = processors[model_id]
48
  tokenizer = tokenizers[model_id]
49
  tokenizer = model.update_special_tokens(tokenizer)
50
 
51
+ if model_id == "Salesforce/xgen-mm-phi3-mini-instruct-r-v1":
52
+ image = Image.fromarray(image).convert("RGB")
53
+ prompt = apply_prompt_template(text_input)
54
+ language_inputs = tokenizer([prompt], return_tensors="pt")
55
+
56
+ inputs = processor([image], return_tensors="pt", image_aspect_ratio='anyres')
57
+ inputs.update(language_inputs)
58
+ inputs = {name: tensor.cuda() for name, tensor in inputs.items()}
59
+
60
+ generated_text = model.generate(**inputs, image_size=[image.size],
61
+ pad_token_id=tokenizer.pad_token_id,
62
+ do_sample=False, max_new_tokens=768, top_p=None, num_beams=1,
63
+ stopping_criteria = [EosListStoppingCriteria()],
64
+ )
65
+ else:
66
+ image_list = []
67
+ image_sizes = []
68
+
69
+ img = Image.fromarray(image).convert("RGB")
70
+ image_list.append(processor([img], image_aspect_ratio='anyres')["pixel_values"].cuda())
71
+ image_sizes.append(img.size)
72
+
73
+ inputs = {
74
+ "pixel_values": [image_list]
75
+ }
76
+ prompt = apply_prompt_template(text_input)
77
+ language_inputs = tokenizer([prompt], return_tensors="pt")
78
+ inputs.update(language_inputs)
79
+
80
+ for name, value in inputs.items():
81
+ if isinstance(value, torch.Tensor):
82
+ inputs[name] = value.cuda()
83
+ generated_text = model.generate(**inputs, image_size=[image_sizes],
84
+ pad_token_id=tokenizer.pad_token_id,
85
+ do_sample=False, max_new_tokens=1024, top_p=None, num_beams=1,
86
+ )
87
 
88
  prediction = tokenizer.decode(generated_text[0], skip_special_tokens=True).split("<|end|>")[0]
89
  return prediction
 
97
 
98
  with gr.Blocks(css=css) as demo:
99
  gr.Markdown(DESCRIPTION)
100
+ with gr.Tab(label="xGen-MM Input"):
101
  with gr.Row():
102
  with gr.Column():
103
  input_img = gr.Image(label="Input Picture")
104
+ model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value="Salesforce/xgen-mm-phi3-mini-instruct-interleave-r-v1.5")
105
  text_input = gr.Textbox(label="Question")
106
  submit_btn = gr.Button(value="Submit")
107
  with gr.Column():