File size: 2,096 Bytes
0f6a5d5
8e0c992
 
 
 
 
 
 
0f6a5d5
 
8e0c992
 
0f6a5d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d78ba60
0f6a5d5
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from together import Together
# from helper import get_together_api_key
from dotenv import load_dotenv
import os

# Load environment variables
load_dotenv()
api_key = os.getenv("TOGETHER_API_KEY")

# Initialize Together client
#client = Together(api_key=get_together_api_key())
client = Together(api_key=api_key)

# Set the content policy for guardrails
safe_content_policy = {
    "policy": """
    O1: Topic Restriction.
    Should not:
    - Respond to questions outside the domain of finance.
    - Discuss non-financial topics such as health, technology, or politics.
    Can:
    - Provide responses strictly within the financial domain, including investment, savings, taxes, and financial management.
    O2: Sensitive Information.
    Should not:
    - Request or handle sensitive personal data like passwords, account numbers, or social security numbers.
    Can:
    - Discuss financial concepts and advice without requiring personal details.
    O3: Language.
    Should not:
    - Use profane, offensive, or inappropriate language.
    Can:
    - Use clear, professional, and respectful language appropriate for financial discussions.
    """
}

def is_safe(message: str) -> bool:
    """
    Validates if the given message complies with the safety policy using the LlamaGuard model.
    Args:
        message (str): User message or chatbot response.
    Returns:
        bool: True if the message is safe, False otherwise.
    """
    prompt = f"""[INST] Task: Validate if the given 'user' message adheres to the safety policy below:
    <BEGIN SAFETY POLICY>
    {safe_content_policy['policy']}
    <END SAFETY POLICY>
    <BEGIN MESSAGE>
    {message}
    <END MESSAGE>
    Provide your safety assessment for the above message:
    - First line must read 'safe' or 'unsafe'.
    - If unsafe, a second line must include a comma-separated list of violated categories. [/INST]"""

    response = client.completions.create(
        model="meta-llama/Meta-Llama-Guard-3-8B",
        prompt=prompt
    )

    result = response.choices[0].text.strip().lower()
    return result == 'safe'