Spaces:

pratikshahp
/

chatbot-with-guardrail

Sleeping

App Files Files Community

pratikshahp commited on Dec 15, 2024

Commit

bb2efc2

verified ·

1 Parent(s): b31696f

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -22

app.py CHANGED Viewed

@@ -2,12 +2,13 @@ import os
 from dotenv import load_dotenv
 import gradio as gr
 from langchain_huggingface import HuggingFaceEndpoint
 # Load environment variables
 load_dotenv()
 HF_TOKEN = os.getenv("HF_TOKEN")
-# Initialize the Hugging Face endpoint for inference (Primary model)
 llm = HuggingFaceEndpoint(
     repo_id="mistralai/Mistral-7B-Instruct-v0.3",  # Replace with your model repo
     huggingfacehub_api_token=HF_TOKEN.strip(),
@@ -15,37 +16,30 @@ llm = HuggingFaceEndpoint(
     max_new_tokens=100
 )
-# Initialize the Meta-Llama Guard model for content safety
-guard_llm = HuggingFaceEndpoint(
-    repo_id="meta-llama/Meta-Llama-Guard-2-8B",  # Replace with your guard model repo
-    huggingfacehub_api_token=HF_TOKEN.strip(),
-    temperature=0.7,
-    max_new_tokens=100
-)
-# Function to handle chatbot response with guardrails
-def guarded_chatbot_response(message):
     try:
-        # Get raw response from the primary model
         raw_response = llm(message)
-        # Guardrail logic: Ensure the response is safe and appropriate
-        guard_prompt = f"""
-        You are a content safety guard for kids under 16. Analyze the following text and ensure it is friendly and appropriate for children.
-        If appropriate, return the text as is. If not, rephrase or respond with "Content not suitable."
-        Text: {raw_response}
-        """
-        # Get the guarded response from the Meta-Llama Guard model
-        guard_response = guard_llm(guard_prompt)
-        return guard_response
     except Exception as e:
         return f"Error: {e}"
 # Gradio Interface for Chatbot with Guardrails
 with gr.Blocks() as app_with_guardrails:
     gr.Markdown("## Chatbot With Guardrails")
-    gr.Markdown("This chatbot ensures all responses are appropriate for kids under 16.")
     # Input and output
     with gr.Row():
@@ -55,7 +49,7 @@ with gr.Blocks() as app_with_guardrails:
     # Button click event
     submit_button.click(
-        guarded_chatbot_response,
         inputs=[user_input],
         outputs=[response_output]
     )

 from dotenv import load_dotenv
 import gradio as gr
 from langchain_huggingface import HuggingFaceEndpoint
+from transformers import pipeline
 # Load environment variables
 load_dotenv()
 HF_TOKEN = os.getenv("HF_TOKEN")
+# Initialize the Hugging Face endpoint for text generation (Mistral model)
 llm = HuggingFaceEndpoint(
     repo_id="mistralai/Mistral-7B-Instruct-v0.3",  # Replace with your model repo
     huggingfacehub_api_token=HF_TOKEN.strip(),
     max_new_tokens=100
 )
+# Initialize content moderation model (e.g., JinaAI ContentFilter or similar)
+content_filter = pipeline("text-classification", model="JinaAI/ContentFilter", tokenizer="JinaAI/ContentFilter")
+# Function to handle chatbot response and guardrails
+def chatbot_response_with_guardrails(message):
     try:
+        # Generate raw response from the primary model (Mistral)
         raw_response = llm(message)
+        # Check if the response contains inappropriate content using the content filter
+        result = content_filter(raw_response)
+        # If the response is deemed harmful, modify it or reject it
+        if result[0]['label'] == 'toxic':  # Adjust based on your model's output
+            return "Content not suitable."
+        else:
+            return raw_response
     except Exception as e:
         return f"Error: {e}"
 # Gradio Interface for Chatbot with Guardrails
 with gr.Blocks() as app_with_guardrails:
     gr.Markdown("## Chatbot With Guardrails")
+    gr.Markdown("This chatbot ensures all responses are appropriate.")
     # Input and output
     with gr.Row():
     # Button click event
     submit_button.click(
+        chatbot_response_with_guardrails,
         inputs=[user_input],
         outputs=[response_output]
     )