tamil-ai-assistant-constitution

Sleeping

App Files Files Community

saffr0n commited on May 9, 2024

Commit

04894f0

verified ·

1 Parent(s): d626646

Add constitution box

Browse files

Files changed (1) hide show

app.py +40 -14

app.py CHANGED Viewed

@@ -29,8 +29,6 @@ this demo is governed by the original [license](https://huggingface.co/spaces/hu
 SYSTEM_PROMPT = "ஒரு பணியை எவ்வாறு நிறைவேற்ற வேண்டும் என்று கூறும் அறிவுரை கீழே உள்ளது. வேண்டுகோளைப் பொருத்தமாக நிறைவு செய்கின்ற பதில் ஒன்றை எழுதுக."
-PROMPT_TEMPLATE = """{% if messages[0]['role'] == 'system' %}{{ messages[0]['content'] + '\n\n' }}{% endif %}### Instruction:\nநீங்கள் ஒரு பயனருடன் உரையாடும் AI உதவியாளர். இதுவரை உங்கள் தொடர்புகளின் அரட்டை வரலாறு இதுதான்:\n\n{% for message in messages %}{% if message['role'] == 'user' %}{{ '\nUser: ' + message['content'] + '\n'}}{% elif message['role'] == 'assistant' %}{{ '\nAI: ' + message['content'] + '\n'}}{% endif %}{% endfor %}\n\nAI உதவியாளராக, உங்கள் அடுத்த பதிலை அரட்டையில் எழுதவும். ஒரே ஒரு பதிலை மட்டும் எழுதுங்கள்.\n\n### Response:\n"""
 if not torch.cuda.is_available():
     DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
@@ -38,12 +36,12 @@ if torch.cuda.is_available():
     model_id = "abhinand/tamil-llama-7b-instruct-v0.1"
     model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
     tokenizer = AutoTokenizer.from_pretrained(model_id)
-    tokenizer.chat_template = PROMPT_TEMPLATE
     tokenizer.use_default_system_prompt = False
 @spaces.GPU
 def generate(
     message: str,
     chat_history: list[tuple[str, str]],
     max_new_tokens: int = 1024,
     temperature: float = 0.6,
@@ -52,14 +50,20 @@ def generate(
     repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
     print("chat history: ", chat_history)
-    conversation = []
-    conversation.append({"role": "system", "content": SYSTEM_PROMPT})
     for user, assistant in chat_history:
-        conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
-    conversation.append({"role": "user", "content": message})
-    print(tokenizer.apply_chat_template(conversation, tokenize=False))
-    print("conversation: ", conversation)
-    input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
     if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
         gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
@@ -93,8 +97,29 @@ examples = [
     ["நான் பணம் சம்பாதிக்க வேண்டும் ஆனால் வேடிக்கையாக இருக்க வேண்டும் என்றால் நல்ல தொழில் எது?"],
 ]
 with gr.Blocks(css="style.css") as demo:
     gr.Markdown(DESCRIPTION)
     chatbot = gr.Chatbot()
     msg = gr.Textbox(label="உங்கள் செய்தியை உள்ளிடவும் / Enter your message")
     submit_btn = gr.Button("சமர்ப்பிக்கவும் / Submit")
@@ -103,11 +128,12 @@ with gr.Blocks(css="style.css") as demo:
     def user(user_message, history):
         return "", history + [[user_message, None]]
-    def bot(history, max_new_tokens, temperature, top_p, top_k, repetition_penalty):
         user_message = history[-1][0]
         chat_history = [(msg[0], msg[1]) for msg in history[:-1]]
         bot_message = ""
-        for response in generate(user_message, chat_history, max_new_tokens, temperature, top_p, top_k, repetition_penalty):
             bot_message = response
             history[-1][1] = bot_message
             yield history
@@ -123,12 +149,12 @@ with gr.Blocks(css="style.css") as demo:
     submit_btn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
         bot,
-        [chatbot, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
         chatbot,
     )
     msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
         bot,
-        [chatbot, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
         chatbot,
     )
     clear.click(lambda: None, None, chatbot, queue=False)

 SYSTEM_PROMPT = "ஒரு பணியை எவ்வாறு நிறைவேற்ற வேண்டும் என்று கூறும் அறிவுரை கீழே உள்ளது. வேண்டுகோளைப் பொருத்தமாக நிறைவு செய்கின்ற பதில் ஒன்றை எழுதுக."
 if not torch.cuda.is_available():
     DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
     model_id = "abhinand/tamil-llama-7b-instruct-v0.1"
     model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     tokenizer.use_default_system_prompt = False
 @spaces.GPU
 def generate(
     message: str,
+    principle_prompt: str,
     chat_history: list[tuple[str, str]],
     max_new_tokens: int = 1024,
     temperature: float = 0.6,
     repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
     print("chat history: ", chat_history)
+    conversation_string_list = [
+        SYSTEM_PROMPT,
+        "\n\n### Instruction:\n",
+        principle_prompt,
+        "\n\nஇதுவரை உங்கள் தொடர்புகளின் அரட்டை வரலாறு இதுதான்:\n\n",
+        ]
     for user, assistant in chat_history:
+        conversation_string_list.append(f'\nUser: {user}\n')
+        conversation_string_list.append(f'\nAssistant: {assistant}\n')
+    conversation_string_list.append(f'\nUser: {message}\n')
+    conversation_string_list.append("\n\nAI உதவியாளராக, உங்கள் அடுத்த பதிலை அரட்டையில் எழுதவும். ஒரே ஒரு பதிலை மட்டும் எழுதுங்கள்.\n\n### Response:\n")
+    conversation_string = "".join(conversation_string_list)
+    print("conversation_string: ", conversation_string)
+    input_ids = tokenizer(conversation_string, return_tensors="pt").input_ids
     if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
         gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
     ["நான் பணம் சம்பாதிக்க வேண்டும் ஆனால் வேடிக்கையாக இருக்க வேண்டும் என்றால் நல்ல தொழில் எது?"],
 ]
+chatbot_instructions_principles = """இது பயனர்களின் அன்றாட வாழ்வில் உதவுவதற்காக உருவாக்கப்பட்ட AI உதவியாளர். தினசரி வாழ்க்கை, சமூக விதிமுறைகள், பிரபலமான செயல்பாடுகள், பொதுவான சூழ்நிலைகளில் எவ்வாறு நடந்துகொள்வது மற்றும் தனிப்பட்ட மற்றும் தொழில்முறை சூழல்களில் ஒருவருக்கொருவருடனான உறவுகளை எவ்வாறு வழிநடத்துவது போன்ற தலைப்புகளைப் பற்றி பேசலாம்.
+நீங்கள் பின்வரும் கொள்கைகளை கடைபிட��க்கிறீர்கள்:
+{principles}
+"""
+chatbot_instructions_no_principles = """இது பயனர்களின் அன்றாட வாழ்வில் உதவுவதற்காக உருவாக்கப்பட்ட AI உதவியாளர். தினசரி வாழ்க்கை, சமூக விதிமுறைகள், பிரபலமான செயல்பாடுகள், பொதுவான சூழ்நிலைகளில் எவ்வாறு நடந்துகொள்வது மற்றும் தனிப்பட்ட மற்றும் தொழில்முறை சூழல்களில் ஒருவருக்கொருவருடனான உறவுகளை எவ்வாறு வழிநடத்துவது போன்ற தலைப்புகளைப் பற்றி பேசலாம்."""
+initial_principles = """1. நீங்கள் பயனருடன் தொடர்பு கொள்ளும்போது எளிமையான மற்றும் முன்முடிவுடன் இல்லாத தொனியில் தொடர்பு கொள்ளவும்.
+2. முடிந்தவரை சுருக்கமாக இருக்கட்டும், மேலும் விளக்கும்படி பயனர் கேட்டால் விளக்கமளிக்கலாம்.
+3. பயனர் உங்களிடம் தனிப்பட்ட ஆலோசனையைக் கேட்டால், சம்பந்தப்பட்ட மற்றவர்கள் மீது எந்த முடிவையும் வழங்க வேண்டாம்.
+4. பயனர் உங்களிடம் ஆலோசனை கேட்டிருந்தால், உங்கள் ஆலோசனையை இன்னும் சிறப்பாகச் செய்ய உங்களுக்கு இன்னும் சில விவரங்களை வழங்க முடியும் என்றால், தொடரும் முன் அதற்கான கேள்விகளை பயனரிடம் கேளுங்கள்."""
 with gr.Blocks(css="style.css") as demo:
     gr.Markdown(DESCRIPTION)
+    principle_list = gr.Textbox(lines=10, max_lines=20,
+                             value=initial_principles,
+                             label="கொள்கைகள்",
+                             show_copy_button=True)
     chatbot = gr.Chatbot()
     msg = gr.Textbox(label="உங்கள் செய்தியை உள்ளிடவும் / Enter your message")
     submit_btn = gr.Button("சமர்ப்பிக்கவும் / Submit")
     def user(user_message, history):
         return "", history + [[user_message, None]]
+    def bot(history, max_new_tokens, temperature, top_p, top_k, repetition_penalty, principle_list):
+        principle_prompt = chatbot_instructions_no_principles if not principle_list else chatbot_instructions_principles.format(principles=principle_list)
         user_message = history[-1][0]
         chat_history = [(msg[0], msg[1]) for msg in history[:-1]]
         bot_message = ""
+        for response in generate(user_message, principle_prompt, chat_history, max_new_tokens, temperature, top_p, top_k, repetition_penalty):
             bot_message = response
             history[-1][1] = bot_message
             yield history
     submit_btn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
         bot,
+        [chatbot, max_new_tokens, temperature, top_p, top_k, repetition_penalty, principle_list],
         chatbot,
     )
     msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
         bot,
+        [chatbot, max_new_tokens, temperature, top_p, top_k, repetition_penalty, principle_list],
         chatbot,
     )
     clear.click(lambda: None, None, chatbot, queue=False)