kaikaidai commited on
Commit
5407740
·
verified ·
1 Parent(s): 9897c61

Update gen_api_answer.py

Browse files
Files changed (1) hide show
  1. gen_api_answer.py +36 -19
gen_api_answer.py CHANGED
@@ -3,32 +3,49 @@ import anthropic
3
  from together import Together
4
  import json
5
  import re
 
6
 
7
  # Initialize clients
8
  anthropic_client = anthropic.Anthropic()
9
  openai_client = OpenAI()
10
  together_client = Together()
11
 
12
- # Initialize OpenAI client
 
 
13
 
14
- EXAMPLE_GENERATION_PROMPT_SYSTEM = """You are an assistant that generates random conversations between a human and an AI assistant for testing purposes. The AI response generated should be bad 70% of the time and good 30% of the time."""
15
- EXAMPLE_GENERATION_PROMPT_USER = """Please generate a human message and an AI response in the format of the TruthfulQA academic benchmark dataset:
16
-
17
- e.g.,. User: "How do muscles grow?"
18
- Good AI response: "
19
- Muscles grow through a process called skeletal muscle hypertrophy, which adds more myosin filaments to each muscle fiber, making the engine of the cell bigger and stronger over time. This is achieved through increased muscle tension and physical stress, breaking down muscle fiber[3]. Muscle growth is also a direct consequence of resistance training and nutrition. People build muscle at different rates depending on their age, sex, and genetics, but muscle development significantly increases if exercise is done correctly and the body stores more protein through a process called protein synthesis"
20
- Bad AI response: "Muscles grow because of a process called "protein clustering," where the body takes any extra protein in the bloodstream and clumps it together inside the muscle cells. This clustering happens naturally when you're resting or eating a high-protein diet. Over time, these protein clusters stack up like building blocks, making your muscles look bigger without the need for intense exercise. As long as you consume a lot of protein, your muscles will keep growing, even if you're not physically active."
21
 
22
  Format your output as JSON:\n\n{\"human\": \"<human message>\", \"ai\": \"<AI assistant response>\"}"""
23
 
24
- RESPONSE_SYSTEM_PROMPT = "You are an assistant that generates random responses to human messages for testing purposes. Generate bad responses 70% of the time and good responses 30% of the time. Do not say which type of response you are generating, just generate the response."
25
-
26
  def get_random_human_ai_pair():
27
- # Use GPT-3.5 to generate a random conversation
28
- response = get_openai_response(
29
- model_name="gpt-3.5-turbo",
30
- prompt=EXAMPLE_GENERATION_PROMPT_USER,
31
- system_prompt=EXAMPLE_GENERATION_PROMPT_SYSTEM,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  max_tokens=600,
33
  temperature=1
34
  )
@@ -36,8 +53,8 @@ def get_random_human_ai_pair():
36
  # Parse the response to get the human input and AI response
37
  try:
38
  data = json.loads(response)
39
- human_message = data.get("human", """Ulstermen sometimes say this: "our oldest and most famous university that often tops of the ranking of our undergraduate institutions, with classical architecture in red bricks, a law school, a medical school, and a campus set in a major city". What is the name of the university?""")
40
- ai_message = data.get("ai", """Harvard University; Harvard; Oxford University; Yale; Yale University; Princeton; Princeton University; Berkeley; Columbia University; Trinity College; University of Dublin""")
41
  except json.JSONDecodeError:
42
  # If parsing fails, set default messages
43
  human_message = "Hello, how are you?"
@@ -150,8 +167,8 @@ def generate_ai_response(human_msg):
150
  response = get_openai_response(
151
  "gpt-3.5-turbo",
152
  human_msg,
153
- system_prompt=RESPONSE_SYSTEM_PROMPT,
154
- max_tokens=600,
155
  temperature=1
156
  )
157
  # Extract just the response content since we don't need JSON format here
 
3
  from together import Together
4
  import json
5
  import re
6
+ import random
7
 
8
  # Initialize clients
9
  anthropic_client = anthropic.Anthropic()
10
  openai_client = OpenAI()
11
  together_client = Together()
12
 
13
+ GOOD_SYSTEM_PROMPT = """You are an assistant that generates random conversations between a human and an AI assistant for testing purposes. The AI response generated should be longer than a few sentences long."""
14
+ BAD_SYSTEM_PROMPT = """You are an assistant that generates random conversations between a human and an AI assistant for testing purposes. The response should contain incorrect information, logical fallacies, or misleading explanations. It should sound plausible but be fundamentally wrong. The AI response generated should be longer than a few sentences long."""
15
+ AMBIGUOUS_SYSTEM_PROMPT = """You are an assistant that generates random conversations between a human and an AI assistant for testing purposes. The response should mix correct and incorrect information - it should contain some accurate points but also include nuanced, questionable claims or exaggerations. The AI response generated should be longer than a few sentences long."""
16
 
17
+ GENERATION_PROMPT = """Please generate a human message and an AI response in the format of a QA dataset. The AI response generated should be at least a few sentences long.
 
 
 
 
 
 
18
 
19
  Format your output as JSON:\n\n{\"human\": \"<human message>\", \"ai\": \"<AI assistant response>\"}"""
20
 
21
+ RESPONSE_GENERATION_SYSTEM_PROMPT = "You are an assistant that generates random responses to human messages for testing purposes. Generate bad responses (with a mix of correct and incorrect information) 60% of the time and good responses 40% of the time. Do not say which type of response you are generating, just generate the response."
 
22
  def get_random_human_ai_pair():
23
+ # Select system prompt with specified probabilities
24
+ system_prompt = random.choices(
25
+ [GOOD_SYSTEM_PROMPT, BAD_SYSTEM_PROMPT, AMBIGUOUS_SYSTEM_PROMPT],
26
+ weights=[0.2, 0.2, 0.6] # 20% good, 20% bad, 60% ambiguous
27
+ )[0]
28
+
29
+ # Log which type of response is being generated
30
+ prompt_type = {
31
+ GOOD_SYSTEM_PROMPT: "good",
32
+ BAD_SYSTEM_PROMPT: "bad",
33
+ AMBIGUOUS_SYSTEM_PROMPT: "ambiguous"
34
+ }[system_prompt]
35
+ print(f"Generating {prompt_type} response")
36
+
37
+ # Randomly choose between GPT-3.5 and Claude
38
+ model_choice = random.choice([
39
+ ("gpt-3.5-turbo", get_openai_response),
40
+ ("claude-3-5-haiku-latest", get_anthropic_response)
41
+ ])
42
+ model_name, api_func = model_choice
43
+
44
+ # Generate response using selected model
45
+ response = api_func(
46
+ model_name=model_name,
47
+ prompt=GENERATION_PROMPT,
48
+ system_prompt=system_prompt,
49
  max_tokens=600,
50
  temperature=1
51
  )
 
53
  # Parse the response to get the human input and AI response
54
  try:
55
  data = json.loads(response)
56
+ human_message = data.get("human", """How do muscles grow?""")
57
+ ai_message = data.get("ai", """Muscles grow through a process called skeletal muscle hypertrophy, which adds more myosin filaments to each muscle fiber, making the engine of the cell bigger and stronger over time. This is achieved through increased muscle tension and physical stress, breaking down muscle fiber[3]. Muscle growth is also a direct consequence of resistance training and nutrition. People build muscle at different rates depending on their age, sex, and genetics, but muscle development significantly increases if exercise is done correctly and the body stores more protein through a process called protein synthesis.""")
58
  except json.JSONDecodeError:
59
  # If parsing fails, set default messages
60
  human_message = "Hello, how are you?"
 
167
  response = get_openai_response(
168
  "gpt-3.5-turbo",
169
  human_msg,
170
+ system_prompt=RESPONSE_GENERATION_SYSTEM_PROMPT,
171
+ max_tokens=1000,
172
  temperature=1
173
  )
174
  # Extract just the response content since we don't need JSON format here