vilarin commited on
Commit
c98b207
1 Parent(s): bb45d22

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +164 -165
app.py CHANGED
@@ -1,166 +1,165 @@
1
- from threading import Thread
2
- import torch
3
- from PIL import Image
4
- import gradio as gr
5
- import spaces
6
- from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, TextIteratorStreamer
7
- import os
8
- from huggingface_hub import hf_hub_download
9
-
10
-
11
-
12
- os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
13
-
14
- HF_TOKEN = os.environ.get("HF_TOKEN", None)
15
- MODEL_ID = os.environ.get("MODEL_ID")
16
- MODEL_NAME = MODEL_ID.split("/")[-1]
17
-
18
- TITLE = "<h1><center>VL-Chatbox</center></h1>"
19
-
20
- DESCRIPTION = "<h3><center>MODEL LOADED: " + MODEL_NAME + "</center></h3>"
21
-
22
- DEFAULT_SYSTEM = "You named Chatbox. You are a good assitant."
23
-
24
- CSS = """
25
- .duplicate-button {
26
- margin: auto !important;
27
- color: white !important;
28
- background: black !important;
29
- border-radius: 100vh !important;
30
- }
31
- """
32
-
33
- filenames = [
34
- ".gitattributes",
35
- "generation_config.json",
36
- "model-00001-of-00004.safetensors",
37
- "model-00002-of-00004.safetensors",
38
- "model-00003-of-00004.safetensors",
39
- "model-00004-of-00004.safetensors",
40
- "model.safetensors.index.json",
41
- "special_tokens_map.json",
42
- "tokenizer.json",
43
- "tokenizer_config.json"
44
- ]
45
-
46
- for filename in filenames:
47
- downloaded_model_path = hf_hub_download(
48
- repo_id=MODEL_ID,
49
- filename=filename,
50
- local_dir="model"
51
- )
52
-
53
- # def no_logger():
54
- # logging.config.dictConfig({
55
- # 'version': 1,
56
- # 'disable_existing_loggers': True,
57
- # })
58
-
59
-
60
- # List of domains
61
-
62
- MODEL_PATH = "./model/"
63
-
64
- model = AutoModelForCausalLM.from_pretrained(
65
- MODEL_PATH,
66
- torch_dtype=torch.float16,
67
- low_cpu_mem_usage=True,
68
- ).to(0)
69
- tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
70
- vision_tower = model.get_vision_tower()
71
- vision_tower.load_model()
72
- vision_tower.to(device="cuda", dtype=torch.float16)
73
- image_processor = vision_tower.image_processor
74
- tokenizer.pad_token = tokenizer.eos_token
75
-
76
- # Define terminators (if applicable, adjust as needed)
77
- terminators = [
78
- tokenizer.eos_token_id,
79
- tokenizer.convert_tokens_to_ids("<|eot_id|>")
80
- ]
81
-
82
-
83
-
84
-
85
- @spaces.GPU
86
- def stream_chat(message, history: list, system: str, temperature: float, max_new_tokens: int):
87
- print(message)
88
- conversation = [{"role": "system", "content": system or DEFAULT_SYSTEM}]
89
- for prompt, answer in history:
90
- conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
91
-
92
- conversation.append({"role": "user", "content": message['text']})
93
- if message["files"]:
94
- image = Image.open(message["files"][0]).convert('RGB')
95
- # Process the conversation text
96
- inputs = model.build_conversation_input_ids(tokenizer, query=message['text'], image=image, image_processor=image_processor)
97
- input_ids = inputs["input_ids"].to(device='cuda', non_blocking=True)
98
- images = inputs["image"].to(dtype=torch.float16, device='cuda', non_blocking=True)
99
- else:
100
- input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt").to(model.device)
101
- images = None
102
-
103
- streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
104
-
105
- generate_kwargs = dict(
106
- input_ids=input_ids,
107
- streamer=streamer,
108
- max_new_tokens=max_new_tokens,
109
- temperature=temperature,
110
- do_sample=True,
111
- eos_token_id=terminators,
112
- images=images
113
- )
114
- if temperature == 0:
115
- generate_kwargs["do_sample"] = False
116
-
117
- t = Thread(target=model.generate, kwargs=generate_kwargs)
118
- t.start()
119
- output = ""
120
- for new_token in streamer:
121
- output += new_token
122
- yield output
123
-
124
-
125
- chatbot = gr.Chatbot(height=450)
126
- chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload file...", show_label=False)
127
-
128
- with gr.Blocks(css=CSS) as demo:
129
- gr.HTML(TITLE)
130
- gr.HTML(DESCRIPTION)
131
- gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
132
- gr.ChatInterface(
133
- fn=stream_chat,
134
- multimodal=True,
135
- chatbot=chatbot,
136
- textbox=chat_input,
137
- fill_height=True,
138
- additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
139
- additional_inputs=[
140
- gr.Text(
141
- value="",
142
- label="System",
143
- render=False,
144
- ),
145
- gr.Slider(
146
- minimum=0,
147
- maximum=1,
148
- step=0.1,
149
- value=0.8,
150
- label="Temperature",
151
- render=False,
152
- ),
153
- gr.Slider(
154
- minimum=128,
155
- maximum=4096,
156
- step=1,
157
- value=1024,
158
- label="Max new tokens",
159
- render=False,
160
- ),
161
- ],
162
- )
163
-
164
-
165
- if __name__ == "__main__":
166
  demo.queue(api_open=False).launch(show_api=False, share=False)
 
1
+ from threading import Thread
2
+ import torch
3
+ from PIL import Image
4
+ import gradio as gr
5
+ import spaces
6
+ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, TextIteratorStreamer
7
+ import os
8
+ from huggingface_hub import hf_hub_download
9
+
10
+
11
+
12
+ os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
13
+
14
+ HF_TOKEN = os.environ.get("HF_TOKEN", None)
15
+ MODEL_ID = os.environ.get("MODEL_ID")
16
+ MODEL_NAME = MODEL_ID.split("/")[-1]
17
+
18
+ TITLE = "<h1><center>VL-Chatbox</center></h1>"
19
+
20
+ DESCRIPTION = "<h3><center>MODEL LOADED: " + MODEL_NAME + "</center></h3>"
21
+
22
+ DEFAULT_SYSTEM = "You named Chatbox. You are a good assitant."
23
+
24
+ CSS = """
25
+ .duplicate-button {
26
+ margin: auto !important;
27
+ color: white !important;
28
+ background: black !important;
29
+ border-radius: 100vh !important;
30
+ }
31
+ """
32
+
33
+ filenames = [
34
+ ".gitattributes",
35
+ "generation_config.json",
36
+ "model-00001-of-00004.safetensors",
37
+ "model-00002-of-00004.safetensors",
38
+ "model-00003-of-00004.safetensors",
39
+ "model-00004-of-00004.safetensors",
40
+ "model.safetensors.index.json",
41
+ "special_tokens_map.json",
42
+ "tokenizer.json",
43
+ "tokenizer_config.json"
44
+ ]
45
+
46
+ for filename in filenames:
47
+ downloaded_model_path = hf_hub_download(
48
+ repo_id=MODEL_ID,
49
+ filename=filename,
50
+ local_dir="model"
51
+ )
52
+
53
+ # def no_logger():
54
+ # logging.config.dictConfig({
55
+ # 'version': 1,
56
+ # 'disable_existing_loggers': True,
57
+ # })
58
+
59
+
60
+ # List of domains
61
+
62
+ model = AutoModelForCausalLM.from_pretrained(
63
+ './model/',
64
+ torch_dtype=torch.float16,
65
+ low_cpu_mem_usage=True,
66
+ local_files_only=True
67
+ ).to(0)
68
+ tokenizer = AutoTokenizer.from_pretrained('./model/',local_files_only=True)
69
+ vision_tower = model.get_vision_tower()
70
+ vision_tower.load_model()
71
+ vision_tower.to(device="cuda", dtype=torch.float16)
72
+ image_processor = vision_tower.image_processor
73
+ tokenizer.pad_token = tokenizer.eos_token
74
+
75
+ # Define terminators (if applicable, adjust as needed)
76
+ terminators = [
77
+ tokenizer.eos_token_id,
78
+ tokenizer.convert_tokens_to_ids("<|eot_id|>")
79
+ ]
80
+
81
+
82
+
83
+
84
+ @spaces.GPU
85
+ def stream_chat(message, history: list, system: str, temperature: float, max_new_tokens: int):
86
+ print(message)
87
+ conversation = [{"role": "system", "content": system or DEFAULT_SYSTEM}]
88
+ for prompt, answer in history:
89
+ conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
90
+
91
+ conversation.append({"role": "user", "content": message['text']})
92
+ if message["files"]:
93
+ image = Image.open(message["files"][0]).convert('RGB')
94
+ # Process the conversation text
95
+ inputs = model.build_conversation_input_ids(tokenizer, query=message['text'], image=image, image_processor=image_processor)
96
+ input_ids = inputs["input_ids"].to(device='cuda', non_blocking=True)
97
+ images = inputs["image"].to(dtype=torch.float16, device='cuda', non_blocking=True)
98
+ else:
99
+ input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt").to(model.device)
100
+ images = None
101
+
102
+ streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
103
+
104
+ generate_kwargs = dict(
105
+ input_ids=input_ids,
106
+ streamer=streamer,
107
+ max_new_tokens=max_new_tokens,
108
+ temperature=temperature,
109
+ do_sample=True,
110
+ eos_token_id=terminators,
111
+ images=images
112
+ )
113
+ if temperature == 0:
114
+ generate_kwargs["do_sample"] = False
115
+
116
+ t = Thread(target=model.generate, kwargs=generate_kwargs)
117
+ t.start()
118
+ output = ""
119
+ for new_token in streamer:
120
+ output += new_token
121
+ yield output
122
+
123
+
124
+ chatbot = gr.Chatbot(height=450)
125
+ chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload file...", show_label=False)
126
+
127
+ with gr.Blocks(css=CSS) as demo:
128
+ gr.HTML(TITLE)
129
+ gr.HTML(DESCRIPTION)
130
+ gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
131
+ gr.ChatInterface(
132
+ fn=stream_chat,
133
+ multimodal=True,
134
+ chatbot=chatbot,
135
+ textbox=chat_input,
136
+ fill_height=True,
137
+ additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
138
+ additional_inputs=[
139
+ gr.Text(
140
+ value="",
141
+ label="System",
142
+ render=False,
143
+ ),
144
+ gr.Slider(
145
+ minimum=0,
146
+ maximum=1,
147
+ step=0.1,
148
+ value=0.8,
149
+ label="Temperature",
150
+ render=False,
151
+ ),
152
+ gr.Slider(
153
+ minimum=128,
154
+ maximum=4096,
155
+ step=1,
156
+ value=1024,
157
+ label="Max new tokens",
158
+ render=False,
159
+ ),
160
+ ],
161
+ )
162
+
163
+
164
+ if __name__ == "__main__":
 
165
  demo.queue(api_open=False).launch(show_api=False, share=False)