Spaces:

pratikshahp
/

chatbot-with-guardrail

Sleeping

pratikshahp commited on Dec 15, 2024

Commit

dcdb4b1

verified ·

1 Parent(s): c9c5c98

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,11 +2,15 @@ import os
 from dotenv import load_dotenv
 import gradio as gr
 from langchain_huggingface import HuggingFaceEndpoint
-from transformers import pipeline
 # Load environment variables
 load_dotenv()
 HF_TOKEN = os.getenv("HF_TOKEN")
 # Initialize the Hugging Face endpoint for text generation (Mistral model)
 llm = HuggingFaceEndpoint(
@@ -16,24 +20,28 @@ llm = HuggingFaceEndpoint(
     max_new_tokens=100
 )
-# Use a pipeline as a high-level helper
-content_filter = pipeline("text-generation", model="meta-llama/LlamaGuard-7b")
-# Function to handle chatbot response and guardrails
 def chatbot_response_with_guardrails(message):
     try:
-        # Generate raw response from the primary model (Mistral)
         raw_response = llm(message)
-        # Check if the response contains inappropriate content using the content filter
-        result = content_filter(raw_response)
-        # If the response is deemed harmful, modify it or reject it
-        if result[0]['label'] == 'toxic':  # Adjust based on your model's output
             return "Content not suitable."
         else:
             return raw_response
     except Exception as e:
         return f"Error: {e}"

 from dotenv import load_dotenv
 import gradio as gr
 from langchain_huggingface import HuggingFaceEndpoint
+from together import Together
 # Load environment variables
 load_dotenv()
 HF_TOKEN = os.getenv("HF_TOKEN")
+API_KEY = os.getenv("API_KEY")
+# Initialize the Together client for guardrail functionality
+client = Together(api_key=API_KEY)
 # Initialize the Hugging Face endpoint for text generation (Mistral model)
 llm = HuggingFaceEndpoint(
     max_new_tokens=100
 )
+# Function to handle chatbot response with TogetherAI's guardrails
 def chatbot_response_with_guardrails(message):
     try:
+        # Step 1: Generate raw response using Mistral model
         raw_response = llm(message)
+        # Step 2: Use TogetherAI's guardrail model to check the response
+        response = client.completions.create(
+            model="Meta-Llama/LlamaGuard-2-8b",  # TogetherAI guardrail model
+            prompt=raw_response
+        )
+        # Extract the response from TogetherAI's guardrail model
+        guardrail_check = response.choices[0].text.strip()
+        # Step 3: Check if the guardrail model labels the response as harmful
+        if 'toxic' in guardrail_check.lower():  # Adjust based on your guardrail model's output
             return "Content not suitable."
         else:
+            # If the response is safe, return the raw response
             return raw_response
     except Exception as e:
         return f"Error: {e}"