Spaces:

KGSAGAR
/

Hindi_Text_Normalization

Running

App Files Files Community

KGSAGAR commited on Feb 10

Commit

e5d4b35

verified ·

1 Parent(s): c81028d

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -22

app.py CHANGED Viewed

@@ -18,40 +18,88 @@ peft_model = PeftModel.from_pretrained(base_model, "KGSAGAR/Sarvam-1-text-normal
 peft_model = peft_model.merge_and_unload()
-client = InferenceClient(peft_model)
 def respond(
     message,
-    history: list[tuple[str, str]],
     system_message,
     max_tokens,
     temperature,
     top_p,
 ):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
         temperature=temperature,
         top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
 """

 peft_model = peft_model.merge_and_unload()
+# client = InferenceClient(peft_model)
+import re
+import torch
+from transformers import AutoTokenizer
 def respond(
     message,
+    history,
     system_message,
     max_tokens,
     temperature,
     top_p,
+    peft_model,
+    tokenizer_name='your-tokenizer-name',
+    device='cuda'  # or 'cpu' based on your setup
 ):
+    """
+    Generates a response based on the user message and history using the provided PEFT model.
+    Args:
+        message (str): The user's input message.
+        history (list of tuples): A list containing tuples of (user_message, assistant_response).
+        system_message (str): The system's initial message or prompt.
+        max_tokens (int): The maximum number of tokens to generate.
+        temperature (float): The temperature parameter for generation.
+        top_p (float): The top_p parameter for nucleus sampling.
+        peft_model: The pre-trained fine-tuned model for generation.
+        tokenizer_name (str): The name or path of the tokenizer.
+        device (str): The device to run the model on ('cuda' or 'cpu').
+    Yields:
+        str: The generated response up to the current token.
+    """
+    # Load the tokenizer
+    tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
+    # Construct the prompt
+    prompt = system_message
+    for user_msg, assistant_msg in history:
+        if user_msg:
+            prompt += f"<user>{user_msg}</user>"
+        if assistant_msg:
+            prompt += f"<assistant>{assistant_msg}</assistant>"
+    prompt += f"<user>{message}</user>"
+    # Tokenize the input prompt
+    inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to(device)
+    # Generate the output
+    outputs = peft_model.generate(
+        **inputs,
+        max_new_tokens=max_tokens,
         temperature=temperature,
         top_p=top_p,
+        do_sample=True  # Enable sampling for more diverse outputs
+    )
+    # Decode the generated tokens
+    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Extract content between <user>...</user> tags
+    def extract_user_content(text):
+        """
+        Extracts and returns content between <user>...</user> tags in the given text.
+        If multiple such sections exist, their contents are concatenated.
+        """
+        pattern = r'<user>(.*?)</user>'
+        matches = re.findall(pattern, text, re.DOTALL)
+        extracted_content = '\n'.join(match.strip() for match in matches)
+        return extracted_content
+    # Extract the normalized text
+    normalized_text = extract_user_content(generated_text)
+    # Stream the response token by token
+    response = ""
+    for token in normalized_text.split():
+        response += token + " "
+        yield response.strip()
 """