vilarin commited on
Commit
ab89095
1 Parent(s): cc9dc77

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -28
app.py CHANGED
@@ -1,11 +1,11 @@
1
- from threading import Thread
2
- import torch
3
  from PIL import Image
4
  import gradio as gr
5
  import spaces
6
- from transformers import AutoModel, AutoTokenizer, TextIteratorStreamer
7
  import os
8
- import time
 
 
 
9
 
10
 
11
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
@@ -27,24 +27,34 @@ CSS = """
27
  }
28
  """
29
 
30
- model = AutoModel.from_pretrained(
31
- MODEL_ID,
32
- torch_dtype=torch.float16,
33
- trust_remote_code=True
34
- ).to(0)
35
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
36
- model.eval()
37
 
 
 
 
 
 
 
38
 
39
 
40
  @spaces.GPU(queue=False)
41
  def stream_chat(message, history: list, temperature: float, max_new_tokens: int):
42
  print(f'message is - {message}')
43
  print(f'history is - {history}')
44
- conversation = []
 
45
  if message["files"]:
46
  image = Image.open(message["files"][-1]).convert('RGB')
47
- conversation.append({"role": "user", "content": message['text']})
 
 
 
 
 
 
48
  else:
49
  if len(history) == 0:
50
  raise gr.Error("Please upload an image first.")
@@ -53,25 +63,39 @@ def stream_chat(message, history: list, temperature: float, max_new_tokens: int)
53
  image = Image.open(history[0][0][0])
54
  for prompt, answer in history:
55
  if answer is None:
56
- conversation.extend([{"role": "user", "content": prompt},{"role": "assistant", "content": ""}])
 
 
 
 
 
 
 
 
 
57
  else:
58
- conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
59
- conversation.append({"role": "user", "content": message['text']})
60
- print(f"Conversation is -\n{conversation}")
61
-
62
- generate_kwargs = dict(
63
- image=image,
64
- msgs=conversation,
65
- max_new_tokens=max_new_tokens,
 
 
 
 
 
 
 
 
66
  temperature=temperature,
67
- sampling=True,
68
- tokenizer=tokenizer,
69
  )
70
- if temperature == 0:
71
- generate_kwargs["sampling"] = False
72
 
73
- response = model.chat(**generate_kwargs)
74
- return response
75
 
76
 
77
  chatbot = gr.Chatbot(height=450)
 
 
 
1
  from PIL import Image
2
  import gradio as gr
3
  import spaces
 
4
  import os
5
+ from huggingface_hub import hf_hub_download
6
+ import base64
7
+ from llama_cpp import Llama
8
+ from llama_cpp.llama_chat_format import MoondreamChatHandler
9
 
10
 
11
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
 
27
  }
28
  """
29
 
30
+ chat_handler = MoondreamChatHandler.from_pretrained(
31
+ repo_id="openbmb/MiniCPM-Llama3-V-2_5-gguf",
32
+ filename="*mmproj*",
33
+ )
 
 
 
34
 
35
+ llm = Llama.from_pretrained(
36
+ repo_id="openbmb/MiniCPM-Llama3-V-2_5-gguf",
37
+ filename="ggml-model-Q5_K_M.gguf",
38
+ chat_handler=chat_handler,
39
+ n_ctx=2048, # n_ctx should be increased to accommodate the image embedding
40
+ )
41
 
42
 
43
  @spaces.GPU(queue=False)
44
  def stream_chat(message, history: list, temperature: float, max_new_tokens: int):
45
  print(f'message is - {message}')
46
  print(f'history is - {history}')
47
+ messages = []
48
+
49
  if message["files"]:
50
  image = Image.open(message["files"][-1]).convert('RGB')
51
+ messages.append({
52
+ "role": "user",
53
+ "content": [
54
+ {"type": "text", "text": message['text']},
55
+ {"type": "image_url", "image_url":{"url": image}}
56
+ ]
57
+ })
58
  else:
59
  if len(history) == 0:
60
  raise gr.Error("Please upload an image first.")
 
63
  image = Image.open(history[0][0][0])
64
  for prompt, answer in history:
65
  if answer is None:
66
+ messages.extend([{
67
+ "role": "user",
68
+ "content": [
69
+ {"type": "text", "text": prompt},
70
+ {"type": "image_url", "image_url": {"url": image}}
71
+ ]
72
+ },{
73
+ "role": "assistant",
74
+ "content": ""
75
+ }])
76
  else:
77
+ messages.extend([{
78
+ "role": "user",
79
+ "content": [
80
+ {"type": "text", "text": prompt},
81
+ {"type": "image_url", "image_url": {"url": image}}
82
+ ]
83
+ }, {
84
+ "role": "assistant",
85
+ "content": answer
86
+ }])
87
+ messages.append({"role": "user", "content": message['text']})
88
+ print(f"Messages is -\n{messages}")
89
+
90
+
91
+ response = llm.create_chat_completion(
92
+ messages = messages,
93
  temperature=temperature,
94
+ max_tokens=max_new_tokens,
95
+ stream=True
96
  )
 
 
97
 
98
+ return response["choices"][0]["text"]
 
99
 
100
 
101
  chatbot = gr.Chatbot(height=450)