Spaces:
Running
Running
yentinglin
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -1,10 +1,8 @@
|
|
1 |
import os
|
2 |
-
|
3 |
import gradio as gr
|
4 |
-
from text_generation import Client
|
5 |
-
from conversation import get_conv_template
|
6 |
from transformers import AutoTokenizer
|
7 |
from pymongo import MongoClient
|
|
|
8 |
|
9 |
DB_NAME = os.getenv("MONGO_DBNAME", "taiwan-llm")
|
10 |
USER = os.getenv("MONGO_USER")
|
@@ -60,7 +58,7 @@ LICENSE = """
|
|
60 |
## Licenses
|
61 |
|
62 |
- Code is licensed under Apache 2.0 License.
|
63 |
-
- Models are licensed under the LLAMA
|
64 |
- By using this model, you agree to the terms and conditions specified in the license.
|
65 |
- By using this demo, you agree to share your input utterances with us to improve the model.
|
66 |
|
@@ -72,14 +70,12 @@ Taiwan-LLaMa project acknowledges the efforts of the [Meta LLaMa team](https://g
|
|
72 |
DEFAULT_SYSTEM_PROMPT = "你是人工智慧助理,以下是用戶和人工智能助理之間的對話。你要對用戶的問題提供有用、安全、詳細和禮貌的回答。 您是由國立臺灣大學的林彥廷博士生為研究目的而建造的。"
|
73 |
|
74 |
endpoint_url = os.environ.get("ENDPOINT_URL", "http://127.0.0.1:8080")
|
75 |
-
client = Client(endpoint_url, timeout=120)
|
76 |
-
eos_token = "</s>"
|
77 |
MAX_MAX_NEW_TOKENS = 4096
|
78 |
DEFAULT_MAX_NEW_TOKENS = 1536
|
79 |
|
80 |
max_prompt_length = 8192 - MAX_MAX_NEW_TOKENS - 10
|
81 |
|
82 |
-
model_name = "yentinglin/
|
83 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
84 |
|
85 |
with gr.Blocks() as demo:
|
@@ -142,34 +138,30 @@ with gr.Blocks() as demo:
|
|
142 |
return "", history + [[user_message, None]]
|
143 |
|
144 |
|
145 |
-
def bot(history, max_new_tokens, temperature, top_p,
|
146 |
-
|
147 |
-
roles = {"human": conv.roles[0], "gpt": conv.roles[1]} # map human to USER and gpt to ASSISTANT
|
148 |
-
conv.system = system_prompt
|
149 |
for user, bot in history:
|
150 |
-
|
151 |
-
|
152 |
-
msg = conv.get_prompt()
|
153 |
-
prompt_tokens = tokenizer.encode(msg)
|
154 |
-
length_of_prompt = len(prompt_tokens)
|
155 |
-
if length_of_prompt > max_prompt_length:
|
156 |
-
msg = tokenizer.decode(prompt_tokens[-max_prompt_length + 1:])
|
157 |
|
158 |
history[-1][1] = ""
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
|
|
|
|
|
|
173 |
conversation_document = {
|
174 |
"model_name": model_name,
|
175 |
"history": history,
|
@@ -177,7 +169,6 @@ with gr.Blocks() as demo:
|
|
177 |
"max_new_tokens": max_new_tokens,
|
178 |
"temperature": temperature,
|
179 |
"top_p": top_p,
|
180 |
-
"top_k": top_k,
|
181 |
}
|
182 |
conversations_collection.insert_one(conversation_document)
|
183 |
|
@@ -266,5 +257,5 @@ with gr.Blocks() as demo:
|
|
266 |
|
267 |
gr.Markdown(LICENSE)
|
268 |
|
269 |
-
demo.queue(
|
270 |
-
demo.launch()
|
|
|
1 |
import os
|
|
|
2 |
import gradio as gr
|
|
|
|
|
3 |
from transformers import AutoTokenizer
|
4 |
from pymongo import MongoClient
|
5 |
+
import openai
|
6 |
|
7 |
DB_NAME = os.getenv("MONGO_DBNAME", "taiwan-llm")
|
8 |
USER = os.getenv("MONGO_USER")
|
|
|
58 |
## Licenses
|
59 |
|
60 |
- Code is licensed under Apache 2.0 License.
|
61 |
+
- Models are licensed under the LLAMA Community License.
|
62 |
- By using this model, you agree to the terms and conditions specified in the license.
|
63 |
- By using this demo, you agree to share your input utterances with us to improve the model.
|
64 |
|
|
|
70 |
DEFAULT_SYSTEM_PROMPT = "你是人工智慧助理,以下是用戶和人工智能助理之間的對話。你要對用戶的問題提供有用、安全、詳細和禮貌的回答。 您是由國立臺灣大學的林彥廷博士生為研究目的而建造的。"
|
71 |
|
72 |
endpoint_url = os.environ.get("ENDPOINT_URL", "http://127.0.0.1:8080")
|
|
|
|
|
73 |
MAX_MAX_NEW_TOKENS = 4096
|
74 |
DEFAULT_MAX_NEW_TOKENS = 1536
|
75 |
|
76 |
max_prompt_length = 8192 - MAX_MAX_NEW_TOKENS - 10
|
77 |
|
78 |
+
model_name = "yentinglin/Llama-3-Taiwan-8B-Instruct"
|
79 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
80 |
|
81 |
with gr.Blocks() as demo:
|
|
|
138 |
return "", history + [[user_message, None]]
|
139 |
|
140 |
|
141 |
+
def bot(history, max_new_tokens, temperature, top_p, system_prompt):
|
142 |
+
messages = [{"role": "system", "content": system_prompt}]
|
|
|
|
|
143 |
for user, bot in history:
|
144 |
+
messages.append({"role": "user", "content": user})
|
145 |
+
messages.append({"role": "assistant", "content": bot})
|
|
|
|
|
|
|
|
|
|
|
146 |
|
147 |
history[-1][1] = ""
|
148 |
+
response = openai.ChatCompletion.create(
|
149 |
+
model=model_name,
|
150 |
+
messages=messages,
|
151 |
+
max_tokens=max_new_tokens,
|
152 |
+
temperature=temperature,
|
153 |
+
top_p=top_p,
|
154 |
+
n=1,
|
155 |
+
stream=True,
|
156 |
+
)
|
157 |
+
|
158 |
+
for chunk in response:
|
159 |
+
if 'choices' in chunk:
|
160 |
+
delta = chunk['choices'][0]['delta']
|
161 |
+
if 'content' in delta:
|
162 |
+
history[-1][1] += delta['content']
|
163 |
+
yield history
|
164 |
+
|
165 |
conversation_document = {
|
166 |
"model_name": model_name,
|
167 |
"history": history,
|
|
|
169 |
"max_new_tokens": max_new_tokens,
|
170 |
"temperature": temperature,
|
171 |
"top_p": top_p,
|
|
|
172 |
}
|
173 |
conversations_collection.insert_one(conversation_document)
|
174 |
|
|
|
257 |
|
258 |
gr.Markdown(LICENSE)
|
259 |
|
260 |
+
demo.queue(max_size=128)
|
261 |
+
demo.launch(max_threads=10)
|