yentinglin commited on
Commit
7a42d65
·
verified ·
1 Parent(s): d219c1f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -35
app.py CHANGED
@@ -1,10 +1,8 @@
1
  import os
2
-
3
  import gradio as gr
4
- from text_generation import Client
5
- from conversation import get_conv_template
6
  from transformers import AutoTokenizer
7
  from pymongo import MongoClient
 
8
 
9
  DB_NAME = os.getenv("MONGO_DBNAME", "taiwan-llm")
10
  USER = os.getenv("MONGO_USER")
@@ -60,7 +58,7 @@ LICENSE = """
60
  ## Licenses
61
 
62
  - Code is licensed under Apache 2.0 License.
63
- - Models are licensed under the LLAMA 2 Community License.
64
  - By using this model, you agree to the terms and conditions specified in the license.
65
  - By using this demo, you agree to share your input utterances with us to improve the model.
66
 
@@ -72,14 +70,12 @@ Taiwan-LLaMa project acknowledges the efforts of the [Meta LLaMa team](https://g
72
  DEFAULT_SYSTEM_PROMPT = "你是人工智慧助理,以下是用戶和人工智能助理之間的對話。你要對用戶的問題提供有用、安全、詳細和禮貌的回答。 您是由國立臺灣大學的林彥廷博士生為研究目的而建造的。"
73
 
74
  endpoint_url = os.environ.get("ENDPOINT_URL", "http://127.0.0.1:8080")
75
- client = Client(endpoint_url, timeout=120)
76
- eos_token = "</s>"
77
  MAX_MAX_NEW_TOKENS = 4096
78
  DEFAULT_MAX_NEW_TOKENS = 1536
79
 
80
  max_prompt_length = 8192 - MAX_MAX_NEW_TOKENS - 10
81
 
82
- model_name = "yentinglin/Taiwan-LLM-7B-v2.0-chat"
83
  tokenizer = AutoTokenizer.from_pretrained(model_name)
84
 
85
  with gr.Blocks() as demo:
@@ -142,34 +138,30 @@ with gr.Blocks() as demo:
142
  return "", history + [[user_message, None]]
143
 
144
 
145
- def bot(history, max_new_tokens, temperature, top_p, top_k, system_prompt):
146
- conv = get_conv_template("twllm_v2").copy()
147
- roles = {"human": conv.roles[0], "gpt": conv.roles[1]} # map human to USER and gpt to ASSISTANT
148
- conv.system = system_prompt
149
  for user, bot in history:
150
- conv.append_message(roles['human'], user)
151
- conv.append_message(roles["gpt"], bot)
152
- msg = conv.get_prompt()
153
- prompt_tokens = tokenizer.encode(msg)
154
- length_of_prompt = len(prompt_tokens)
155
- if length_of_prompt > max_prompt_length:
156
- msg = tokenizer.decode(prompt_tokens[-max_prompt_length + 1:])
157
 
158
  history[-1][1] = ""
159
- for response in client.generate_stream(
160
- msg,
161
- max_new_tokens=max_new_tokens,
162
- temperature=temperature,
163
- top_p=top_p,
164
- top_k=top_k,
165
- repetition_penalty=1.1,
166
- ):
167
- if not response.token.special:
168
- character = response.token.text
169
- history[-1][1] += character
170
- yield history
171
-
172
- # After generating the response, store the conversation history in MongoDB
 
 
 
173
  conversation_document = {
174
  "model_name": model_name,
175
  "history": history,
@@ -177,7 +169,6 @@ with gr.Blocks() as demo:
177
  "max_new_tokens": max_new_tokens,
178
  "temperature": temperature,
179
  "top_p": top_p,
180
- "top_k": top_k,
181
  }
182
  conversations_collection.insert_one(conversation_document)
183
 
@@ -266,5 +257,5 @@ with gr.Blocks() as demo:
266
 
267
  gr.Markdown(LICENSE)
268
 
269
- demo.queue(concurrency_count=4, max_size=128)
270
- demo.launch()
 
1
  import os
 
2
  import gradio as gr
 
 
3
  from transformers import AutoTokenizer
4
  from pymongo import MongoClient
5
+ import openai
6
 
7
  DB_NAME = os.getenv("MONGO_DBNAME", "taiwan-llm")
8
  USER = os.getenv("MONGO_USER")
 
58
  ## Licenses
59
 
60
  - Code is licensed under Apache 2.0 License.
61
+ - Models are licensed under the LLAMA Community License.
62
  - By using this model, you agree to the terms and conditions specified in the license.
63
  - By using this demo, you agree to share your input utterances with us to improve the model.
64
 
 
70
  DEFAULT_SYSTEM_PROMPT = "你是人工智慧助理,以下是用戶和人工智能助理之間的對話。你要對用戶的問題提供有用、安全、詳細和禮貌的回答。 您是由國立臺灣大學的林彥廷博士生為研究目的而建造的。"
71
 
72
  endpoint_url = os.environ.get("ENDPOINT_URL", "http://127.0.0.1:8080")
 
 
73
  MAX_MAX_NEW_TOKENS = 4096
74
  DEFAULT_MAX_NEW_TOKENS = 1536
75
 
76
  max_prompt_length = 8192 - MAX_MAX_NEW_TOKENS - 10
77
 
78
+ model_name = "yentinglin/Llama-3-Taiwan-8B-Instruct"
79
  tokenizer = AutoTokenizer.from_pretrained(model_name)
80
 
81
  with gr.Blocks() as demo:
 
138
  return "", history + [[user_message, None]]
139
 
140
 
141
+ def bot(history, max_new_tokens, temperature, top_p, system_prompt):
142
+ messages = [{"role": "system", "content": system_prompt}]
 
 
143
  for user, bot in history:
144
+ messages.append({"role": "user", "content": user})
145
+ messages.append({"role": "assistant", "content": bot})
 
 
 
 
 
146
 
147
  history[-1][1] = ""
148
+ response = openai.ChatCompletion.create(
149
+ model=model_name,
150
+ messages=messages,
151
+ max_tokens=max_new_tokens,
152
+ temperature=temperature,
153
+ top_p=top_p,
154
+ n=1,
155
+ stream=True,
156
+ )
157
+
158
+ for chunk in response:
159
+ if 'choices' in chunk:
160
+ delta = chunk['choices'][0]['delta']
161
+ if 'content' in delta:
162
+ history[-1][1] += delta['content']
163
+ yield history
164
+
165
  conversation_document = {
166
  "model_name": model_name,
167
  "history": history,
 
169
  "max_new_tokens": max_new_tokens,
170
  "temperature": temperature,
171
  "top_p": top_p,
 
172
  }
173
  conversations_collection.insert_one(conversation_document)
174
 
 
257
 
258
  gr.Markdown(LICENSE)
259
 
260
+ demo.queue(max_size=128)
261
+ demo.launch(max_threads=10)