lkm2835 commited on
Commit
4295bdc
β€’
1 Parent(s): 406f7d0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -90
app.py CHANGED
@@ -5,41 +5,73 @@ import gradio as gr
5
  import torch
6
  import spaces
7
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 
 
8
 
9
- MODEL_LIST = ["LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct"]
10
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
11
- MODEL = MODEL_LIST[0]
 
 
 
 
12
 
13
  DESCRIPTION = """\
14
  # <center> EXAONE 3.5: Series of Large Language Models for Real-world Use Cases </center>
15
 
16
- ##### We hope EXAONE continues to advance Expert AI with its effectiveness and bilingual skills.
17
-
18
- <center>This is an official demo of <a href=https://huggingface.co/LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct>LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct</a>, fine-tuned for instruction following.</center>
19
 
20
- <center>πŸ‘‹ For more details, please check <a href=https://www.lgresearch.ai/blog/view?seq=507>our blog</a> or <a href=https://arxiv.org/abs/2412.04862>technical report</a></center>
21
 
22
- #### <center> EXAONE-3.5-2.4B-Instruct and EXAONE-3.5-32B-Instruct Demo Coming Soon.. </center>
23
  """
24
 
25
- MAX_NEW_TOKENS = 2048
26
- DEFAULT_MAX_NEW_TOKENS = 512
27
- MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "16384"))
28
-
29
- device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
30
-
31
- tokenizer = AutoTokenizer.from_pretrained(MODEL)
32
- model = AutoModelForCausalLM.from_pretrained(
33
- MODEL,
34
- torch_dtype=torch.bfloat16,
35
- trust_remote_code=True,
36
- device_map="auto",
37
- )
38
 
39
- model.eval()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
 
42
- @spaces.GPU()
43
  def generate(
44
  message: str,
45
  chat_history: list[tuple[str, str]],
@@ -48,7 +80,9 @@ def generate(
48
  temperature: float = 0.6,
49
  top_p: float = 0.9,
50
  top_k: int = 50,
 
51
  ) -> Iterator[str]:
 
52
  messages = [{"role":"system","content": system_prompt}]
53
  print(f'message: {message}')
54
  print(f'chat_history: {chat_history}')
@@ -69,88 +103,64 @@ def generate(
69
  if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
70
  input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
71
  gr.Warning(f"Trimmed input from messages as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
72
- input_ids = input_ids.to(model.device)
 
 
 
73
 
74
- streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
75
- generate_kwargs = dict(
76
- {"input_ids": input_ids},
77
- streamer=streamer,
78
  max_new_tokens=max_new_tokens,
79
- do_sample=False if top_k == 1 else True,
80
  top_p=top_p,
81
  top_k=top_k,
82
  temperature=temperature,
83
- num_beams=1,
84
- repetition_penalty=1.0,
85
  )
86
- t = Thread(target=model.generate, kwargs=generate_kwargs)
87
- t.start()
88
 
89
- outputs = []
90
- for text in streamer:
91
- outputs.append(text)
92
- yield "".join(outputs)
93
 
 
94
 
95
- BOT_AVATAR = "EXAONE_logo.png"
96
 
97
- chatbot = gr.Chatbot(
98
- label="EXAONE-3.5-7.8B-Instruct",
99
- avatar_images=[None, BOT_AVATAR],
100
- layout="bubble",
101
- bubble_full_width=False
102
- )
 
 
 
 
 
103
 
104
  chat_interface = gr.ChatInterface(
105
- fn=generate,
106
- chatbot=chatbot,
107
- additional_inputs=[
108
- gr.Textbox(
109
- value="You are EXAONE model from LG AI Research, a helpful assistant.",
110
- label="System Prompt",
111
- render=False,
112
- ),
113
- gr.Slider(
114
- label="Max new tokens",
115
- minimum=1,
116
- maximum=MAX_NEW_TOKENS,
117
- step=1,
118
- value=DEFAULT_MAX_NEW_TOKENS,
119
- ),
120
- gr.Slider(
121
- label="Temperature",
122
- minimum=0.1,
123
- maximum=2.0,
124
- step=0.1,
125
- value=0.7,
126
- ),
127
- gr.Slider(
128
- label="Top-p (nucleus sampling)",
129
- minimum=0.05,
130
- maximum=1.0,
131
- step=0.05,
132
- value=0.9,
133
  ),
134
- gr.Slider(
135
- label="Top-k",
136
- minimum=1,
137
- maximum=1000,
138
- step=1,
139
- value=1,
140
- ),
141
- ],
142
- stop_btn=None,
143
- examples=[
144
- ["Explain who you are"],
145
- ["λ„ˆμ˜ μ†Œμ›μ„ 말해봐"],
146
- ],
147
- cache_examples=False,
148
- )
149
-
150
- with gr.Blocks(css="style.css", fill_height=True) as demo:
151
- gr.Markdown("""<p align="center"><img src="https://huggingface.co/spaces/LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct-Demo/resolve/main/EXAONE_Symbol%2BBI_3d.png" style="margin-right: 20px; height: 60px"/><p>""")
152
  gr.Markdown(DESCRIPTION)
 
 
 
 
 
 
 
 
153
  chat_interface.render()
154
 
 
155
  if __name__ == "__main__":
156
- demo.queue(max_size=20).launch()
 
5
  import torch
6
  import spaces
7
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
8
+ from huggingface_hub import InferenceClient
9
+
10
 
 
11
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
12
+ MODEL = "LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct"
13
+ MAX_NEW_TOKENS = 4096
14
+ DEFAULT_MAX_NEW_TOKENS = 512
15
+ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "16384"))
16
+
17
 
18
  DESCRIPTION = """\
19
  # <center> EXAONE 3.5: Series of Large Language Models for Real-world Use Cases </center>
20
 
21
+ ##### <center> We hope EXAONE continues to advance Expert AI with its effectiveness and bilingual skills. </center>
 
 
22
 
23
+ <center>πŸ‘‹ For more details, please check <a href=https://huggingface.co/collections/LGAI-EXAONE/exaone-35-674d0e1bb3dcd2ab6f39dbb4>EXAONE-3.5 collections</a>, <a href=https://www.lgresearch.ai/blog/view?seq=507>our blog</a> or <a href=https://www.lgresearch.ai/data/upload/tech_report/ko/Technical_report_EXAONE_3.5.pdf>technical report</a></center>
24
 
25
+ #### <center> EXAONE-3.5-32B-Instruct Demo Coming Soon.. </center>
26
  """
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
+ EXAMPLES = [
30
+ ["Explain how wonderful you are"],
31
+ ["슀슀둜λ₯Ό μžλž‘ν•΄ 봐"],
32
+ ]
33
+ BOT_AVATAR = "EXAONE_logo.png"
34
+ selected_model = gr.Radio(value="https://jps6tfdq34ydttbh.us-east4.gcp.endpoints.huggingface.cloud",visible=False)
35
+ ADDITIONAL_INPUTS = [
36
+ gr.Textbox(
37
+ value="You are EXAONE model from LG AI Research, a helpful assistant.",
38
+ label="System Prompt",
39
+ render=False,
40
+ ),
41
+ gr.Slider(
42
+ label="Max new tokens",
43
+ minimum=1,
44
+ maximum=MAX_NEW_TOKENS,
45
+ step=1,
46
+ value=DEFAULT_MAX_NEW_TOKENS,
47
+ ),
48
+ gr.Slider(
49
+ label="Temperature",
50
+ minimum=0.1,
51
+ maximum=2.0,
52
+ step=0.1,
53
+ value=0.7,
54
+ ),
55
+ gr.Slider(
56
+ label="Top-p (nucleus sampling)",
57
+ minimum=0.05,
58
+ maximum=1.0,
59
+ step=0.05,
60
+ value=0.9,
61
+ ),
62
+ gr.Slider(
63
+ label="Top-k",
64
+ minimum=1,
65
+ maximum=1000,
66
+ step=1,
67
+ value=1,
68
+ ),
69
+ selected_model
70
+ ]
71
+
72
+ tokenizer = AutoTokenizer.from_pretrained("LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct")
73
 
74
 
 
75
  def generate(
76
  message: str,
77
  chat_history: list[tuple[str, str]],
 
80
  temperature: float = 0.6,
81
  top_p: float = 0.9,
82
  top_k: int = 50,
83
+ selected_model: str = "https://jps6tfdq34ydttbh.us-east4.gcp.endpoints.huggingface.cloud",
84
  ) -> Iterator[str]:
85
+ print(f'model: {selected_model}')
86
  messages = [{"role":"system","content": system_prompt}]
87
  print(f'message: {message}')
88
  print(f'chat_history: {chat_history}')
 
103
  if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
104
  input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
105
  gr.Warning(f"Trimmed input from messages as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
106
+
107
+ messages = tokenizer.decode(input_ids[0])
108
+
109
+ client = InferenceClient(selected_model, token=HF_TOKEN)
110
 
111
+ gen_kwargs = dict(
 
 
 
112
  max_new_tokens=max_new_tokens,
 
113
  top_p=top_p,
114
  top_k=top_k,
115
  temperature=temperature,
116
+ stop=["[|endofturn|]"]
 
117
  )
 
 
118
 
119
+ output = client.text_generation(messages, **gen_kwargs)
 
 
 
120
 
121
+ return output
122
 
 
123
 
124
+ def radio1_change(model_size):
125
+ return f"<center><font size=5>EXAONE-3.5-{model_size}-instruct</center>"
126
+
127
+
128
+ def choices_model(model_size):
129
+ endpoint_url_dict = {
130
+ "2.4B": "https://jps6tfdq34ydttbh.us-east4.gcp.endpoints.huggingface.cloud", # L4
131
+ "7.8B": "https://wafz6im0d595g715.us-east-1.aws.endpoints.huggingface.cloud", # L40S
132
+ }
133
+ return endpoint_url_dict[model_size]
134
+
135
 
136
  chat_interface = gr.ChatInterface(
137
+ fn=generate,
138
+ chatbot=gr.Chatbot(
139
+ label="EXAONE-3.5-Instruct",
140
+ avatar_images=[None, BOT_AVATAR],
141
+ layout="bubble",
142
+ bubble_full_width=False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  ),
144
+ additional_inputs=ADDITIONAL_INPUTS,
145
+ stop_btn=None,
146
+ examples=EXAMPLES,
147
+ cache_examples=False,
148
+ )
149
+
150
+
151
+ with gr.Blocks(fill_height=True) as demo:
152
+ gr.Markdown("""<p align="center"><img src="https://huggingface.co/spaces/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct-Demo/resolve/main/EXAONE_Symbol%2BBI_3d.png" style="margin-right: 20px; height: 50px"/><p>""")
 
 
 
 
 
 
 
 
 
153
  gr.Markdown(DESCRIPTION)
154
+
155
+ markdown = gr.Markdown("<center><font size=5>EXAONE-3.5-2.4B-instruct</center>")
156
+ with gr.Row():
157
+ model_size = ["2.4B", "7.8B"]
158
+ radio1 = gr.Radio(choices=model_size, label="EXAONE-3.5-Instruct", value=model_size[0])
159
+
160
+ radio1.change(radio1_change, inputs=radio1, outputs=markdown)
161
+ radio1.change(choices_model, inputs=radio1, outputs=selected_model)
162
  chat_interface.render()
163
 
164
+
165
  if __name__ == "__main__":
166
+ demo.queue(max_size=25).launch()