gxy commited on
Commit
aa98bc6
1 Parent(s): 4f0ce6e

FEAT: first commit

Browse files
README.md CHANGED
@@ -3,7 +3,9 @@ title: Ziya BLIP2 14B Visual V1 Demo
3
  emoji: 😻
4
  colorFrom: gray
5
  colorTo: red
6
- sdk: docker
 
 
7
  pinned: false
8
  license: apache-2.0
9
  ---
 
3
  emoji: 😻
4
  colorFrom: gray
5
  colorTo: red
6
+ sdk: gradio
7
+ sdk_version: 3.27.0
8
+ app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
  ---
app.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # this code modify from https://huggingface.co/spaces/lykeven/visualglm-6b
3
+ import gradio as gr
4
+ import re
5
+ from PIL import Image
6
+ import torch
7
+ from io import BytesIO
8
+ import hashlib
9
+ import os
10
+ from transformers import LlamaForCausalLM, LlamaTokenizer, BlipImageProcessor, BitsAndBytesConfig, AutoModelForCausalLM
11
+
12
+ DESCRIPTION = '''# <a href="https://huggingface.co/IDEA-CCNL/Ziya-BLIP2-14B-Visual-v1">Ziya-Blip2-14B</a>'''
13
+
14
+ MAINTENANCE_NOTICE1 = 'Hint 1: If the app report "Something went wrong, connection error out", please turn off your proxy and retry.\nHint 2: If you upload a large size of image like 10MB, it may take some time to upload and process. Please be patient and wait.'
15
+ MAINTENANCE_NOTICE2 = '提示1: 如果应用报了“Something went wrong, connection error out”的错误,请关闭代理并重试。\n提示2: 如果你上传了很大的图片,比如10MB大小,那将需要一些时间来上传和处理,请耐心等待。'
16
+
17
+ NOTES = 'This app is adapted from <a href="https://huggingface.co/IDEA-CCNL/Ziya-BLIP2-14B-Visual-v1">https://huggingface.co/IDEA-CCNL/Ziya-BLIP2-14B-Visual-v1</a>. It would be recommended to check out the repo if you want to see the detail of our model. And most of the codes attach to this demo are modify from <a href="https://huggingface.co/spaces/lykeven/visualglm-6b">lykeven/visualglm-6b</a>.'
18
+
19
+ import json
20
+
21
+ default_chatbox = []
22
+
23
+
24
+ def is_chinese(text):
25
+ zh_pattern = re.compile(u'[\u4e00-\u9fa5]+')
26
+ return zh_pattern.search(text)
27
+
28
+ AUTH_TOKEN = os.getenv("AUTH_TOKEN")
29
+
30
+ LM_MODEL_PATH = "gxy/Ziya-LLaMA-13B-v1"
31
+ lm_model = LlamaForCausalLM.from_pretrained(
32
+ LM_MODEL_PATH,
33
+ device_map="auto",
34
+ torch_dtype=torch.float16,
35
+ use_auth_token=AUTH_TOKEN,
36
+ quantization_config=BitsAndBytesConfig(load_in_8bit=True))
37
+ tokenizer = LlamaTokenizer.from_pretrained(LM_MODEL_PATH)
38
+
39
+ # visual model
40
+ OPENAI_CLIP_MEAN = [0.48145466, 0.4578275, 0.40821073]
41
+ OPENAI_CLIP_STD = [0.26862954, 0.26130258, 0.27577711]
42
+ # demo.py is in the project path, so we can use local path ".". Otherwise you should use "IDEA-CCNL/Ziya-BLIP2-14B-Visual-v1"
43
+ model = AutoModelForCausalLM.from_pretrained(
44
+ "IDEA-CCNL/Ziya-BLIP2-14B-Visual-v1",
45
+ trust_remote_code=True,
46
+ torch_dtype=torch.float16)
47
+ model.cuda() # if you use on cpu, comment this line
48
+ model.language_model = lm_model
49
+ image_size = model.config.vision_config.image_size
50
+ image_processor = BlipImageProcessor(
51
+ size={"height": image_size, "width": image_size},
52
+ image_mean=OPENAI_CLIP_MEAN,
53
+ image_std=OPENAI_CLIP_STD,
54
+ )
55
+
56
+ def post(
57
+ input_text,
58
+ temperature,
59
+ top_p,
60
+ image_prompt,
61
+ result_previous,
62
+ hidden_image
63
+ ):
64
+ result_text = [(ele[0], ele[1]) for ele in result_previous]
65
+ previous_querys = []
66
+ previous_outputs = []
67
+ for i in range(len(result_text)-1, -1, -1):
68
+ if result_text[i][0] == "":
69
+ del result_text[i]
70
+ else:
71
+ previous_querys.append(result_text[i][0])
72
+ previous_outputs.append(result_text[i][1])
73
+
74
+ is_zh = is_chinese(input_text)
75
+
76
+ if image_prompt is None:
77
+ print("Image empty")
78
+ if is_zh:
79
+ result_text.append((input_text, '图片为空!请上传图片并重试。'))
80
+ else:
81
+ result_text.append((input_text, 'Image empty! Please upload a image and retry.'))
82
+ return input_text, result_text, hidden_image
83
+ elif input_text == "":
84
+ print("Text empty")
85
+ result_text.append((input_text, 'Text empty! Please enter text and retry.'))
86
+ return "", result_text, hidden_image
87
+
88
+ generate_config = {
89
+ "max_new_tokens": 128,
90
+ "top_p": top_p,
91
+ "temperature": temperature,
92
+ "repetition_penalty": 1.18,
93
+ }
94
+ img = Image.open(image_prompt)
95
+ pixel_values = image_processor(
96
+ img,
97
+ return_tensors="pt").pixel_values.to(
98
+ model.device).to(model.dtype)
99
+ output_buffer = BytesIO()
100
+ img.save(output_buffer, "PNG")
101
+ byte_data = output_buffer.getvalue()
102
+ md = hashlib.md5()
103
+ md.update(byte_data)
104
+ img_hash = md.hexdigest()
105
+ if img_hash != hidden_image:
106
+ previous_querys = []
107
+ previous_outputs = []
108
+ result_text = []
109
+
110
+ answer = model.chat(
111
+ tokenizer=tokenizer,
112
+ pixel_values=pixel_values,
113
+ query=input_text,
114
+ previous_querys=previous_querys,
115
+ previous_outputs=previous_outputs,
116
+ **generate_config,
117
+ )
118
+
119
+ result_text.append((input_text, answer))
120
+ print(result_text)
121
+ return "", result_text, img_hash
122
+
123
+
124
+ def clear_fn(value):
125
+ return "", default_chatbox, None
126
+
127
+ def clear_fn2(value):
128
+ return default_chatbox
129
+
130
+ def io_fn(a, b, c):
131
+ print(f"call io_fn")
132
+ return a, b
133
+
134
+
135
+ def change_language(value):
136
+ if value == "Change hint to English":
137
+ return "提示变为中文", MAINTENANCE_NOTICE1
138
+ else:
139
+ return "Change hint to English", MAINTENANCE_NOTICE2
140
+
141
+
142
+ def main():
143
+ gr.close_all()
144
+ examples = []
145
+ with open("./examples/example_inputs.jsonl") as f:
146
+ for line in f:
147
+ data = json.loads(line)
148
+ examples.append(data)
149
+
150
+
151
+ with gr.Blocks(css='style.css') as demo:
152
+
153
+ with gr.Row():
154
+ with gr.Column(scale=4.5):
155
+ with gr.Group():
156
+ input_text = gr.Textbox(label='Input Text', placeholder='Please enter text prompt below and press ENTER.')
157
+ with gr.Row():
158
+ run_button = gr.Button('Generate')
159
+ clear_button = gr.Button('Clear')
160
+
161
+ image_prompt = gr.Image(type="filepath", label="Image Prompt", value=None)
162
+ with gr.Row():
163
+ temperature = gr.Slider(maximum=1, value=0.7, minimum=0, label='Temperature')
164
+ top_p = gr.Slider(maximum=1, value=0.1, minimum=0, label='Top P')
165
+ with gr.Group():
166
+ with gr.Row():
167
+ with gr.Column(scale=7):
168
+ maintenance_notice = gr.Markdown(MAINTENANCE_NOTICE1)
169
+ with gr.Column(scale=2):
170
+ change_button = gr.Button('Change hint to English', visible=False)
171
+ with gr.Column(scale=5.5):
172
+ result_text = gr.components.Chatbot(label='Multi-round conversation History', value=[]).style(height=550)
173
+ hidden_image_hash = gr.Textbox(visible=False)
174
+
175
+ gr_examples = gr.Examples(examples=[[example["text"], example["image"]] for example in examples],
176
+ inputs=[input_text, image_prompt],
177
+ label="Example Inputs (Click to insert an examplet into the input box)",
178
+ examples_per_page=3)
179
+
180
+ gr.Markdown(NOTES)
181
+
182
+ print(gr.__version__)
183
+ run_button.click(fn=post,inputs=[input_text, temperature, top_p, image_prompt, result_text, hidden_image_hash],
184
+ outputs=[input_text, result_text, hidden_image_hash])
185
+ input_text.submit(fn=post,inputs=[input_text, temperature, top_p, image_prompt, result_text, hidden_image_hash],
186
+ outputs=[input_text, result_text, hidden_image_hash])
187
+ clear_button.click(fn=clear_fn, inputs=clear_button, outputs=[input_text, result_text, image_prompt])
188
+ image_prompt.upload(fn=clear_fn2, inputs=clear_button, outputs=[result_text])
189
+ image_prompt.clear(fn=clear_fn2, inputs=clear_button, outputs=[result_text])
190
+
191
+ print(gr.__version__)
192
+
193
+ demo.queue(concurrency_count=10)
194
+ demo.launch(server_name="0.0.0.0")
195
+
196
+
197
+ if __name__ == '__main__':
198
+ main()
examples/1.jpg ADDED
examples/2.jpg ADDED
examples/3.jpg ADDED
examples/example_inputs.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {"id":1, "text": "这是什么游戏", "image": "examples/1.jpg"}
2
+ {"id":2, "text": "这张图描述了什么", "image": "examples/2.jpg"}
3
+ {"id":3, "text": "这张图有什么奇怪的地方", "image": "examples/3.jpg"}
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ transformers
2
+ torch
3
+ https://github.com/GGGGGGXY/bitsandbytes/releases/download/0.39/bitsandbytes-0.39.0-py3-none-any.whl
style.css ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ h1 {
2
+ text-align: center;
3
+ }
4
+ img#visitor-badge {
5
+ display: block;
6
+ margin: auto;
7
+ }