vilarin commited on
Commit
300e23e
1 Parent(s): e9f4550

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -23
app.py CHANGED
@@ -2,13 +2,12 @@ import torch
2
  from PIL import Image
3
  import gradio as gr
4
  import spaces
5
- from transformers import AutoModel, AutoTokenizer
6
  import os
7
 
8
 
9
 
10
- os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
11
- MODEL_LIST = ["openbmb/MiniCPM-Llama3-V-2_5","openbmb/MiniCPM-Llama3-V-2_5-int4"]
12
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
13
  MODEL_ID = os.environ.get("MODEL_ID")
14
  MODEL_NAME = MODEL_ID.split("/")[-1]
@@ -28,7 +27,8 @@ CSS = """
28
 
29
  model = AutoModel.from_pretrained(
30
  MODEL_ID,
31
- torch_dtype=torch.float16,
 
32
  trust_remote_code=True
33
  ).to(0)
34
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
@@ -36,45 +36,48 @@ model.eval()
36
 
37
 
38
  @spaces.GPU()
39
- def stream_chat(message, history: list, temperature: float, max_new_tokens: int):
40
  print(f'message is - {message}')
41
  print(f'history is - {history}')
42
  conversation = []
43
  if message["files"]:
44
  image = Image.open(message["files"][-1]).convert('RGB')
45
- conversation.append({"role": "user", "content": message['text']})
46
  else:
47
  if len(history) == 0:
48
  raise gr.Error("Please upload an image first.")
49
  image = None
50
  else:
51
- image = Image.open(history[0][0][0])
52
  for prompt, answer in history:
53
  if answer is None:
54
  conversation.extend([{"role": "user", "content": prompt},{"role": "assistant", "content": ""}])
55
  else:
56
  conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
57
- conversation.append({"role": "user", "content": message['text']})
58
  print(f"Conversation is -\n{conversation}")
59
 
 
 
 
60
  generate_kwargs = dict(
61
- image=image,
62
- msgs=conversation,
63
- max_new_tokens=max_new_tokens,
 
64
  temperature=temperature,
65
- sampling=True,
66
- tokenizer=tokenizer,
67
- stream=True
68
  )
69
- if temperature == 0:
70
- generate_kwargs["sampling"] = False
71
-
72
- response = model.chat(**generate_kwargs)
73
 
74
- buffer = ""
75
- for new_text in response:
76
- buffer += new_text
77
- yield buffer
 
 
 
 
78
 
79
 
80
 
@@ -117,7 +120,7 @@ with gr.Blocks(css=CSS) as demo:
117
  maximum=4096,
118
  step=1,
119
  value=1024,
120
- label="Max new tokens",
121
  render=False,
122
  ),
123
  ],
 
2
  from PIL import Image
3
  import gradio as gr
4
  import spaces
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer
6
  import os
7
 
8
 
9
 
10
+ MODEL_LIST = ["THUDM/glm-4v-9b"]
 
11
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
12
  MODEL_ID = os.environ.get("MODEL_ID")
13
  MODEL_NAME = MODEL_ID.split("/")[-1]
 
27
 
28
  model = AutoModel.from_pretrained(
29
  MODEL_ID,
30
+ torch_dtype=torch.bfloat16,
31
+ low_cpu_mem_usage=True,
32
  trust_remote_code=True
33
  ).to(0)
34
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
 
36
 
37
 
38
  @spaces.GPU()
39
+ def stream_chat(message, history: list, temperature: float, max_length: int):
40
  print(f'message is - {message}')
41
  print(f'history is - {history}')
42
  conversation = []
43
  if message["files"]:
44
  image = Image.open(message["files"][-1]).convert('RGB')
45
+ conversation.append({"role": "user", "image": image, "content": message['text']})
46
  else:
47
  if len(history) == 0:
48
  raise gr.Error("Please upload an image first.")
49
  image = None
50
  else:
51
+ image = Image.open(history[0][1])
52
  for prompt, answer in history:
53
  if answer is None:
54
  conversation.extend([{"role": "user", "content": prompt},{"role": "assistant", "content": ""}])
55
  else:
56
  conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
57
+ conversation.append({"role": "user", "image": image, "content": message['text']})
58
  print(f"Conversation is -\n{conversation}")
59
 
60
+ input_ids = tokenizer.apply_chat_template(conversation, tokenize=True, add_generation_prompt=True, return_tensors="pt", return_dict=True).to(model.device)
61
+ streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
62
+
63
  generate_kwargs = dict(
64
+ max_length=max_length,
65
+ streamer=streamer,
66
+ do_sample=True,
67
+ top_k=1,
68
  temperature=temperature,
69
+ repetition_penalty=1.2,
 
 
70
  )
71
+ gen_kwargs = {**input_ids, **generate_kwargs}
 
 
 
72
 
73
+ with torch.no_grad():
74
+ thread = Thread(target=model.generate, kwargs=gen_kwargs)
75
+ thread.start()
76
+ buffer = ""
77
+ for new_text in streamer:
78
+ buffer += new_text
79
+ yield buffer
80
+
81
 
82
 
83
 
 
120
  maximum=4096,
121
  step=1,
122
  value=1024,
123
+ label="Max Length",
124
  render=False,
125
  ),
126
  ],