Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -178,6 +178,7 @@ Your response should adapt to the norms and customs of the respective language a
|
|
178 |
# ============ CONSTANT ============
|
179 |
# https://github.com/gradio-app/gradio/issues/884
|
180 |
MODEL_NAME = "SeaLLM-13B"
|
|
|
181 |
|
182 |
MODEL_TITLE = """
|
183 |
<div class="container" style="
|
@@ -231,21 +232,24 @@ MODEL_TITLE = """
|
|
231 |
# </span>
|
232 |
# """.strip()
|
233 |
|
234 |
-
|
|
|
|
|
|
|
235 |
<div style='display:flex; gap: 0.25rem; '>
|
236 |
<a href='https://github.com/SeaLLMs/SeaLLMs'><img src='https://img.shields.io/badge/Github-Code-success'></a>
|
237 |
<a href='https://huggingface.co/spaces/SeaLLMs/SeaLLM-Chat-13b'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue'></a>
|
238 |
<a href='https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Model-blue'></a>
|
239 |
</div>
|
240 |
<span style="font-size: larger">
|
241 |
-
|
242 |
-
Explore <a href="https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b" target="_blank">our article</a> for more
|
243 |
</span>
|
244 |
<br>
|
245 |
<span>
|
246 |
-
<span style="color: red">NOTE
|
247 |
-
By using our service, you are required to
|
248 |
-
not to use our service to generate any harmful, inappropriate or
|
249 |
The service collects user dialogue data for testing and performance improvement, and reserves the right to distribute it under
|
250 |
<a href="https://creativecommons.org/licenses/by/4.0/">(CC-BY)</a> or similar license. So do not enter any personal information!
|
251 |
</span>
|
@@ -731,17 +735,6 @@ def llama_chat_sys_input_seq_constructor(text, sys_prompt=SYSTEM_PROMPT_1, bos_t
|
|
731 |
return f"{bos_token}{B_INST} {B_SYS} {sys_prompt} {E_SYS} {text} {E_INST}"
|
732 |
|
733 |
|
734 |
-
def few_shot_prompt(
|
735 |
-
message: str,
|
736 |
-
history: List[Tuple[str, str]],
|
737 |
-
sys_prompt=SYSTEM_PROMPT_1,
|
738 |
-
bos_token=BOS_TOKEN,
|
739 |
-
eos_token=EOS_TOKEN,
|
740 |
-
include_end_instruct=True,
|
741 |
-
):
|
742 |
-
return f"{bos_token} {message}"
|
743 |
-
|
744 |
-
|
745 |
def llama_chat_multiturn_sys_input_seq_constructor(
|
746 |
message: str,
|
747 |
history: List[Tuple[str, str]],
|
@@ -1572,10 +1565,9 @@ def batch_inference(
|
|
1572 |
prompt_format_fn = llama_chat_multiturn_sys_input_seq_constructor
|
1573 |
elif prompt_mode == 'few-shot':
|
1574 |
from functools import partial
|
1575 |
-
|
1576 |
-
|
1577 |
-
|
1578 |
-
prompt_format_fn = few_shot_prompt
|
1579 |
else:
|
1580 |
raise gr.Error(f'Wrong mode {prompt_mode}')
|
1581 |
|
@@ -1607,7 +1599,6 @@ def batch_inference(
|
|
1607 |
for res, item in zip(responses, all_items):
|
1608 |
item['response'] = res
|
1609 |
|
1610 |
-
# save_path = "/mnt/workspace/workgroup/phi/test.json"
|
1611 |
save_path = BATCH_INFER_SAVE_TMP_FILE
|
1612 |
os.makedirs(os.path.dirname(save_path), exist_ok=True)
|
1613 |
with open(save_path, 'w', encoding='utf-8') as f:
|
@@ -1629,6 +1620,15 @@ each item has `prompt` key. We put guardrails to enhance safety, so do not input
|
|
1629 |
```
|
1630 |
"""
|
1631 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1632 |
|
1633 |
def launch():
|
1634 |
global demo, llm, DEBUG, LOG_FILE
|
@@ -1701,7 +1701,7 @@ def launch():
|
|
1701 |
|
1702 |
if QUANTIZATION == 'awq':
|
1703 |
print(F'Load model in int4 quantization')
|
1704 |
-
llm = LLM(model=model_path, dtype=
|
1705 |
else:
|
1706 |
llm = LLM(model=model_path, dtype=dtype, tensor_parallel_size=tensor_parallel, gpu_memory_utilization=gpu_memory_utilization)
|
1707 |
|
@@ -1751,7 +1751,7 @@ def launch():
|
|
1751 |
["upload_chat.json", "chat", 0.2, 1024, 0.5, 0, "[STOP],[END],<s>,</s>"],
|
1752 |
["upload_few_shot.json", "few-shot", 0.2, 128, 0.5, 0, "[STOP],[END],<s>,</s>,\\n"]
|
1753 |
],
|
1754 |
-
cache_examples=
|
1755 |
)
|
1756 |
|
1757 |
demo_chat = gr.ChatInterface(
|
@@ -1765,7 +1765,7 @@ def launch():
|
|
1765 |
],
|
1766 |
show_copy_button=True,
|
1767 |
),
|
1768 |
-
textbox=gr.Textbox(placeholder='Type message', lines=
|
1769 |
submit_btn=gr.Button(value='Submit', variant="primary", scale=0),
|
1770 |
# ! consider preventing the stop button
|
1771 |
# stop_btn=None,
|
@@ -1780,26 +1780,42 @@ def launch():
|
|
1780 |
# ! Remove the system prompt textbox to avoid jailbreaking
|
1781 |
# gr.Textbox(value=sys_prompt, label='System prompt', lines=8)
|
1782 |
],
|
|
|
|
|
1783 |
)
|
|
|
|
|
|
|
|
|
1784 |
demo = CustomTabbedInterface(
|
1785 |
interface_list=[demo_chat, demo_file_upload],
|
1786 |
tab_names=["Chat Interface", "Batch Inference"],
|
1787 |
title=f"{model_title}",
|
1788 |
-
description=
|
1789 |
)
|
1790 |
demo.title = MODEL_NAME
|
|
|
1791 |
with demo:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1792 |
gr.Markdown(cite_markdown)
|
1793 |
-
if DISPLAY_MODEL_PATH:
|
1794 |
-
|
1795 |
|
1796 |
if ENABLE_AGREE_POPUP:
|
1797 |
demo.load(None, None, None, _js=AGREE_POP_SCRIPTS)
|
1798 |
|
1799 |
-
|
1800 |
demo.queue()
|
1801 |
demo.launch(server_port=PORT)
|
1802 |
else:
|
|
|
|
|
|
|
1803 |
demo = gr.ChatInterface(
|
1804 |
response_fn,
|
1805 |
chatbot=ChatBot(
|
@@ -1811,12 +1827,12 @@ def launch():
|
|
1811 |
],
|
1812 |
show_copy_button=True,
|
1813 |
),
|
1814 |
-
textbox=gr.Textbox(placeholder='Type message', lines=
|
1815 |
submit_btn=gr.Button(value='Submit', variant="primary", scale=0),
|
1816 |
# ! consider preventing the stop button
|
1817 |
# stop_btn=None,
|
1818 |
title=f"{model_title}",
|
1819 |
-
description=
|
1820 |
additional_inputs=[
|
1821 |
gr.Number(value=temperature, label='Temperature (higher -> more random)'),
|
1822 |
gr.Number(value=max_tokens, label='Max generated tokens (increase if want more generation)'),
|
@@ -1826,6 +1842,8 @@ def launch():
|
|
1826 |
# ! Remove the system prompt textbox to avoid jailbreaking
|
1827 |
# gr.Textbox(value=sys_prompt, label='System prompt', lines=8)
|
1828 |
],
|
|
|
|
|
1829 |
)
|
1830 |
demo.title = MODEL_NAME
|
1831 |
with demo:
|
|
|
178 |
# ============ CONSTANT ============
|
179 |
# https://github.com/gradio-app/gradio/issues/884
|
180 |
MODEL_NAME = "SeaLLM-13B"
|
181 |
+
MODEL_NAME = str(os.environ.get("MODEL_NAME", "SeaLLM-13B"))
|
182 |
|
183 |
MODEL_TITLE = """
|
184 |
<div class="container" style="
|
|
|
232 |
# </span>
|
233 |
# """.strip()
|
234 |
|
235 |
+
# <a href="https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b" target="_blank">SeaLLM-13B-Chat</a> - a helpful chatbot assistant for Southeast Asian Languages. It supports English 🇬🇧, Vietnamese 🇻🇳, Indonesian 🇮🇩, Thai 🇹🇭, Malay 🇲🇾, Khmer🇰🇭, Lao🇱🇦, Tagalog🇵🇭 and Burmese🇲🇲.
|
236 |
+
|
237 |
+
|
238 |
+
MODEL_DESC = f"""
|
239 |
<div style='display:flex; gap: 0.25rem; '>
|
240 |
<a href='https://github.com/SeaLLMs/SeaLLMs'><img src='https://img.shields.io/badge/Github-Code-success'></a>
|
241 |
<a href='https://huggingface.co/spaces/SeaLLMs/SeaLLM-Chat-13b'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue'></a>
|
242 |
<a href='https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Model-blue'></a>
|
243 |
</div>
|
244 |
<span style="font-size: larger">
|
245 |
+
<a href="https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b" target="_blank">{MODEL_NAME}</a> - a helpful assistant for Southeast Asian Languages. It supports English 🇬🇧, Vietnamese 🇻🇳, Indonesian 🇮🇩, Thai 🇹🇭, Malay 🇲🇾, Khmer🇰🇭, Lao🇱🇦, Tagalog🇵🇭 and Burmese🇲🇲.
|
246 |
+
Explore <a href="https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b" target="_blank">our article</a> for more.
|
247 |
</span>
|
248 |
<br>
|
249 |
<span>
|
250 |
+
<span style="color: red">NOTE: The chatbot may produce false and harmful content and does not have up-to-date knowledge.</span>
|
251 |
+
By using our service, you are required to agree to our <a href="https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b/blob/main/LICENSE" target="_blank" style="color: red">Terms Of Use</a>, which includes
|
252 |
+
not to use our service to generate any harmful, inappropriate or illegal content that violates local and international laws.
|
253 |
The service collects user dialogue data for testing and performance improvement, and reserves the right to distribute it under
|
254 |
<a href="https://creativecommons.org/licenses/by/4.0/">(CC-BY)</a> or similar license. So do not enter any personal information!
|
255 |
</span>
|
|
|
735 |
return f"{bos_token}{B_INST} {B_SYS} {sys_prompt} {E_SYS} {text} {E_INST}"
|
736 |
|
737 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
738 |
def llama_chat_multiturn_sys_input_seq_constructor(
|
739 |
message: str,
|
740 |
history: List[Tuple[str, str]],
|
|
|
1565 |
prompt_format_fn = llama_chat_multiturn_sys_input_seq_constructor
|
1566 |
elif prompt_mode == 'few-shot':
|
1567 |
from functools import partial
|
1568 |
+
prompt_format_fn = partial(
|
1569 |
+
llama_chat_multiturn_sys_input_seq_constructor, include_end_instruct=False
|
1570 |
+
)
|
|
|
1571 |
else:
|
1572 |
raise gr.Error(f'Wrong mode {prompt_mode}')
|
1573 |
|
|
|
1599 |
for res, item in zip(responses, all_items):
|
1600 |
item['response'] = res
|
1601 |
|
|
|
1602 |
save_path = BATCH_INFER_SAVE_TMP_FILE
|
1603 |
os.makedirs(os.path.dirname(save_path), exist_ok=True)
|
1604 |
with open(save_path, 'w', encoding='utf-8') as f:
|
|
|
1620 |
```
|
1621 |
"""
|
1622 |
|
1623 |
+
CHAT_EXAMPLES = [
|
1624 |
+
["Hãy giải thích thuyết tương đối rộng."],
|
1625 |
+
["Tolong bantu saya menulis email ke lembaga pemerintah untuk mencari dukungan finansial untuk penelitian AI."],
|
1626 |
+
["ຂໍແຈ້ງ 5 ສະຖານທີ່ທ່ອງທ່ຽວໃນນະຄອນຫຼວງວຽງຈັນ"],
|
1627 |
+
]
|
1628 |
+
|
1629 |
+
|
1630 |
+
# performance items
|
1631 |
+
|
1632 |
|
1633 |
def launch():
|
1634 |
global demo, llm, DEBUG, LOG_FILE
|
|
|
1701 |
|
1702 |
if QUANTIZATION == 'awq':
|
1703 |
print(F'Load model in int4 quantization')
|
1704 |
+
llm = LLM(model=model_path, dtype="float16", tensor_parallel_size=tensor_parallel, gpu_memory_utilization=gpu_memory_utilization, quantization="awq")
|
1705 |
else:
|
1706 |
llm = LLM(model=model_path, dtype=dtype, tensor_parallel_size=tensor_parallel, gpu_memory_utilization=gpu_memory_utilization)
|
1707 |
|
|
|
1751 |
["upload_chat.json", "chat", 0.2, 1024, 0.5, 0, "[STOP],[END],<s>,</s>"],
|
1752 |
["upload_few_shot.json", "few-shot", 0.2, 128, 0.5, 0, "[STOP],[END],<s>,</s>,\\n"]
|
1753 |
],
|
1754 |
+
# cache_examples=True,
|
1755 |
)
|
1756 |
|
1757 |
demo_chat = gr.ChatInterface(
|
|
|
1765 |
],
|
1766 |
show_copy_button=True,
|
1767 |
),
|
1768 |
+
textbox=gr.Textbox(placeholder='Type message', lines=4, max_lines=128, min_width=200),
|
1769 |
submit_btn=gr.Button(value='Submit', variant="primary", scale=0),
|
1770 |
# ! consider preventing the stop button
|
1771 |
# stop_btn=None,
|
|
|
1780 |
# ! Remove the system prompt textbox to avoid jailbreaking
|
1781 |
# gr.Textbox(value=sys_prompt, label='System prompt', lines=8)
|
1782 |
],
|
1783 |
+
examples=CHAT_EXAMPLES,
|
1784 |
+
cache_examples=False
|
1785 |
)
|
1786 |
+
descriptions = model_desc
|
1787 |
+
if DISPLAY_MODEL_PATH:
|
1788 |
+
descriptions += f"<br> {path_markdown.format(model_path=model_path)}"
|
1789 |
+
|
1790 |
demo = CustomTabbedInterface(
|
1791 |
interface_list=[demo_chat, demo_file_upload],
|
1792 |
tab_names=["Chat Interface", "Batch Inference"],
|
1793 |
title=f"{model_title}",
|
1794 |
+
description=descriptions,
|
1795 |
)
|
1796 |
demo.title = MODEL_NAME
|
1797 |
+
callback = None
|
1798 |
with demo:
|
1799 |
+
if DATA_SET_REPO_PATH != "":
|
1800 |
+
try:
|
1801 |
+
from performance_plot import attach_plot_to_demo
|
1802 |
+
attach_plot_to_demo(demo)
|
1803 |
+
except Exception as e:
|
1804 |
+
print(f'Fail to load DEMO plot: {str(e)}')
|
1805 |
+
|
1806 |
gr.Markdown(cite_markdown)
|
1807 |
+
# if DISPLAY_MODEL_PATH:
|
1808 |
+
# gr.Markdown(path_markdown.format(model_path=model_path))
|
1809 |
|
1810 |
if ENABLE_AGREE_POPUP:
|
1811 |
demo.load(None, None, None, _js=AGREE_POP_SCRIPTS)
|
1812 |
|
|
|
1813 |
demo.queue()
|
1814 |
demo.launch(server_port=PORT)
|
1815 |
else:
|
1816 |
+
descriptions = model_desc
|
1817 |
+
if DISPLAY_MODEL_PATH:
|
1818 |
+
descriptions += f"<br> {path_markdown.format(model_path=model_path)}"
|
1819 |
demo = gr.ChatInterface(
|
1820 |
response_fn,
|
1821 |
chatbot=ChatBot(
|
|
|
1827 |
],
|
1828 |
show_copy_button=True,
|
1829 |
),
|
1830 |
+
textbox=gr.Textbox(placeholder='Type message', lines=4, max_lines=128, min_width=200),
|
1831 |
submit_btn=gr.Button(value='Submit', variant="primary", scale=0),
|
1832 |
# ! consider preventing the stop button
|
1833 |
# stop_btn=None,
|
1834 |
title=f"{model_title}",
|
1835 |
+
description=descriptions,
|
1836 |
additional_inputs=[
|
1837 |
gr.Number(value=temperature, label='Temperature (higher -> more random)'),
|
1838 |
gr.Number(value=max_tokens, label='Max generated tokens (increase if want more generation)'),
|
|
|
1842 |
# ! Remove the system prompt textbox to avoid jailbreaking
|
1843 |
# gr.Textbox(value=sys_prompt, label='System prompt', lines=8)
|
1844 |
],
|
1845 |
+
examples=CHAT_EXAMPLES,
|
1846 |
+
cache_examples=False
|
1847 |
)
|
1848 |
demo.title = MODEL_NAME
|
1849 |
with demo:
|