abalakrishnaTRI commited on
Commit
6ba6dce
1 Parent(s): bb834c6

support fused backbones and update MODEL_ID_TO_NAME

Browse files
Files changed (2) hide show
  1. interactive_demo.py +7 -1
  2. serve/__init__.py +13 -13
interactive_demo.py CHANGED
@@ -152,7 +152,13 @@ class ModelWorker:
152
  # Assume `image_transform` is a HF ImageProcessor...
153
  pixel_values = self.image_processor(images[0].convert("RGB"), return_tensors="pt")["pixel_values"][0]
154
 
155
- generated_text = self.vlm.generate_answer(torch.unsqueeze(pixel_values.cuda(), 0), question_prompt)[0]
 
 
 
 
 
 
156
  generated_text = generated_text.split("USER")[0].split("ASSISTANT")[0]
157
  yield json.dumps({"text": ori_prompt + generated_text, "error_code": 0}).encode() + b"\0"
158
 
 
152
  # Assume `image_transform` is a HF ImageProcessor...
153
  pixel_values = self.image_processor(images[0].convert("RGB"), return_tensors="pt")["pixel_values"][0]
154
 
155
+ if type(pixel_values) is dict:
156
+ for k in pixel_values.keys():
157
+ pixel_values[k] = torch.unsqueeze(pixel_values[k].cuda(), 0)
158
+ else:
159
+ pixel_values = torch.unsqueeze(pixel_values.cuda(), 0)
160
+
161
+ generated_text = self.vlm.generate_answer(pixel_values, question_prompt)[0]
162
  generated_text = generated_text.split("USER")[0].split("ASSISTANT")[0]
163
  yield json.dumps({"text": ori_prompt + generated_text, "error_code": 0}).encode() + b"\0"
164
 
serve/__init__.py CHANGED
@@ -5,31 +5,31 @@ from collections import OrderedDict
5
  MODEL_ID_TO_NAME = OrderedDict(
6
  [
7
  (
8
- "llava-lvis4v-lrv+lvis4v-lrv-resize-naive-clip-vit-l-14-336px-no-align-2-epochs-llama2pure+13b+stage-finetune+x7",
9
- "Prism-CLIP 13B",
10
  ),
11
  (
12
- "llava-lvis4v-lrv+lvis4v-lrv-resize-naive-clip-vit-l-14-336px-no-align-2-epochs-llama2pure+7b+stage-finetune+x7",
13
- "Prism-CLIP 7B",
14
  ),
15
  (
16
- "resize-naive-clip-vit-l-14-336px-no-align-llama2pure+13b+stage-finetune+x7",
17
- "Prism-CLIP 13B (Controlled)",
18
  ),
19
  (
20
- "resize-naive-clip-vit-l-14-336px-no-align-llama2pure+7b+stage-finetune+x7",
21
- "Prism-CLIP 7B (Controlled)",
22
  ),
23
  (
24
- "resize-naive-clip-vit-l-14-336px-no-align+13b+stage-finetune+x7",
25
- "Prism-CLIP 13B (Controlled) - Chat",
26
  ),
27
  (
28
- "resize-naive-clip-vit-l-14-336px-no-align+7b+stage-finetune+x7",
29
- "Prism-CLIP 7B (Controlled) - Chat",
30
  ),
31
- ("llava-v1.5-7b", "LLaVA 1.5: 7B"),
32
  ("llava-v1.5-13b", "LLaVA 1.5: 13B"),
 
33
  ]
34
  )
35
 
 
5
  MODEL_ID_TO_NAME = OrderedDict(
6
  [
7
  (
8
+ "llava-lvis4v-lrv+redux-lvis4v-lrv-resize-naive-dinosiglip-vit-so-14-384px-no-align+13b+stage-finetune+x7",
9
+ "PrismaticVLM 13B - Chat",
10
  ),
11
  (
12
+ "llava-lvis4v-lrv+redux-lvis4v-lrv-resize-naive-dinosiglip-vit-so-14-384px-no-align+7b+stage-finetune+x7",
13
+ "PrismaticVLM 7B - Chat",
14
  ),
15
  (
16
+ "llava-lvis4v-lrv+redux-lvis4v-lrv-resize-naive-dinosiglip-vit-so-14-384px-no-align-llama2pure+13b+stage-finetune+x7",
17
+ "PrismaticVLM 13B",
18
  ),
19
  (
20
+ "llava-lvis4v-lrv+redux-lvis4v-lrv-resize-naive-dinosiglip-vit-so-14-384px-no-align-llama2pure+7b+stage-finetune+x7",
21
+ "PrismaticVLM 7B",
22
  ),
23
  (
24
+ "redux-resize-naive-dinosiglip-vit-so-14-384px-no-align-llama2pure+13b+stage-finetune+x7",
25
+ "PrismaticVLM 13B (Controlled)",
26
  ),
27
  (
28
+ "redux-resize-naive-dinosiglip-vit-so-14-384px-no-align-llama2pure+7b+stage-finetune+x7",
29
+ "PrismaticVLM 7B (Controlled)",
30
  ),
 
31
  ("llava-v1.5-13b", "LLaVA 1.5: 13B"),
32
+ ("llava-v1.5-7b", "LLaVA 1.5: 7B"),
33
  ]
34
  )
35