File size: 2,305 Bytes
8d5b4ff
 
 
 
dcdb4b1
8d5b4ff
 
 
 
dcdb4b1
 
 
 
8d5b4ff
bb2efc2
8d5b4ff
 
 
 
 
 
 
dcdb4b1
bb2efc2
8d5b4ff
dcdb4b1
8d5b4ff
 
dcdb4b1
 
 
 
 
 
 
 
 
 
 
bb2efc2
 
dcdb4b1
bb2efc2
dcdb4b1
8d5b4ff
 
 
 
 
 
bb2efc2
8d5b4ff
 
 
 
 
 
 
 
 
bb2efc2
8d5b4ff
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import os
from dotenv import load_dotenv
import gradio as gr
from langchain_huggingface import HuggingFaceEndpoint
from together import Together

# Load environment variables
load_dotenv()
HF_TOKEN = os.getenv("HF_TOKEN")
API_KEY = os.getenv("API_KEY")

# Initialize the Together client for guardrail functionality
client = Together(api_key=API_KEY)

# Initialize the Hugging Face endpoint for text generation (Mistral model)
llm = HuggingFaceEndpoint(
    repo_id="mistralai/Mistral-7B-Instruct-v0.3",  # Replace with your model repo
    huggingfacehub_api_token=HF_TOKEN.strip(),
    temperature=0.7,
    max_new_tokens=100
)

# Function to handle chatbot response with TogetherAI's guardrails
def chatbot_response_with_guardrails(message):
    try:
        # Step 1: Generate raw response using Mistral model
        raw_response = llm(message)

        # Step 2: Use TogetherAI's guardrail model to check the response
        response = client.completions.create(
            model="Meta-Llama/LlamaGuard-2-8b",  # TogetherAI guardrail model
            prompt=raw_response
        )

        # Extract the response from TogetherAI's guardrail model
        guardrail_check = response.choices[0].text.strip()

        # Step 3: Check if the guardrail model labels the response as harmful
        if 'toxic' in guardrail_check.lower():  # Adjust based on your guardrail model's output
            return "Content not suitable."
        else:
            # If the response is safe, return the raw response
            return raw_response

    except Exception as e:
        return f"Error: {e}"

# Gradio Interface for Chatbot with Guardrails
with gr.Blocks() as app_with_guardrails:
    gr.Markdown("## Chatbot With Guardrails")
    gr.Markdown("This chatbot ensures all responses are appropriate.")

    # Input and output
    with gr.Row():
        user_input = gr.Textbox(label="Your Message", placeholder="Type here...")
    response_output = gr.Textbox(label="Guarded Response", placeholder="Bot will respond here...")
    submit_button = gr.Button("Send")

    # Button click event
    submit_button.click(
        chatbot_response_with_guardrails,
        inputs=[user_input],
        outputs=[response_output]
    )

# Launch the app
if __name__ == "__main__":
    app_with_guardrails.launch()