ambrosfitz commited on
Commit
0d1b0b3
·
verified ·
1 Parent(s): 77b39ea

Update question_generator.py

Browse files
Files changed (1) hide show
  1. question_generator.py +43 -61
question_generator.py CHANGED
@@ -3,6 +3,7 @@ import csv
3
  import os
4
  import logging
5
  import hashlib
 
6
  from typing import List, Dict
7
  from datetime import datetime
8
  from mistralai.client import MistralClient
@@ -22,48 +23,7 @@ model = "mistral-large-latest"
22
  # Initialize Mistral client
23
  client = MistralClient(api_key=api_key)
24
 
25
- def load_csv_data(file_path: str) -> List[Dict[str, str]]:
26
- """Load data from a CSV file."""
27
- logging.info(f"Loading data from {file_path}...")
28
- try:
29
- with open(file_path, 'r', encoding='utf-8') as csvfile:
30
- reader = csv.DictReader(csvfile)
31
- data = list(reader)
32
- logging.info(f"Loaded {len(data)} rows from {file_path}")
33
- return data
34
- except FileNotFoundError:
35
- logging.error(f"File not found: {file_path}")
36
- raise
37
- except csv.Error as e:
38
- logging.error(f"Error reading CSV file {file_path}: {e}")
39
- raise
40
-
41
- # Load data from both CSV files
42
- try:
43
- detailed_cases = load_csv_data('processed_medical_history.csv')
44
- infectious_diseases = load_csv_data('infectious_diseases.csv')
45
- except Exception as e:
46
- logging.error(f"Failed to load CSV data: {e}")
47
- raise
48
-
49
- def hash_question(question: str) -> str:
50
- """Generate a hash for a question to check for duplicates."""
51
- return hashlib.md5(question.encode()).hexdigest()
52
-
53
- def load_generated_questions() -> set:
54
- """Load previously generated question hashes from a file."""
55
- try:
56
- with open('generated_questions.txt', 'r') as f:
57
- return set(line.strip() for line in f)
58
- except FileNotFoundError:
59
- return set()
60
-
61
- def save_generated_question(question_hash: str):
62
- """Save a newly generated question hash to the file."""
63
- with open('generated_questions.txt', 'a') as f:
64
- f.write(question_hash + '\n')
65
-
66
- generated_questions = load_generated_questions()
67
 
68
  def generate_microbiology_question() -> Dict[str, str]:
69
  """Generate a microbiology question."""
@@ -151,26 +111,48 @@ def generate_microbiology_question() -> Dict[str, str]:
151
  }}
152
  """
153
 
154
- chat_response = client.chat(
155
- model=model,
156
- messages=[
157
- ChatMessage(role="system", content="You are a medical educator creating unique microbiology questions for the NBME exam. Ensure each question is distinct from previously generated ones and follows the specified template."),
158
- ChatMessage(role="user", content=prompt)
159
- ]
160
- )
161
-
162
- response_content = chat_response.choices[0].message.content
163
- # Parse the JSON response
164
- import json
165
- question_data = json.loads(response_content)
166
-
167
- # Save the question hash
168
- question_hash = hash_question(question_data['question'])
169
- if question_hash not in generated_questions:
170
- generated_questions.add(question_hash)
171
- save_generated_question(question_hash)
 
 
 
 
 
 
 
 
 
 
 
 
172
 
173
- return question_data
 
 
 
 
 
 
 
 
 
 
174
 
175
  # Example usage
176
  if __name__ == "__main__":
 
3
  import os
4
  import logging
5
  import hashlib
6
+ import json
7
  from typing import List, Dict
8
  from datetime import datetime
9
  from mistralai.client import MistralClient
 
23
  # Initialize Mistral client
24
  client = MistralClient(api_key=api_key)
25
 
26
+ # ... (previous functions remain the same)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  def generate_microbiology_question() -> Dict[str, str]:
29
  """Generate a microbiology question."""
 
111
  }}
112
  """
113
 
114
+ try:
115
+ chat_response = client.chat(
116
+ model=model,
117
+ messages=[
118
+ ChatMessage(role="system", content="You are a medical educator creating unique microbiology questions for the NBME exam. Ensure each question is distinct from previously generated ones and follows the specified template."),
119
+ ChatMessage(role="user", content=prompt)
120
+ ]
121
+ )
122
+
123
+ response_content = chat_response.choices[0].message.content
124
+ logging.info(f"Received response from Mistral API: {response_content[:100]}...") # Log first 100 characters
125
+
126
+ # Parse the JSON response
127
+ question_data = json.loads(response_content)
128
+
129
+ # Validate the structure of the parsed JSON
130
+ required_keys = ["question", "options", "correct_answer", "explanation", "medical_reasoning"]
131
+ if not all(key in question_data for key in required_keys):
132
+ raise ValueError("Response is missing required keys")
133
+
134
+ if not all(key in question_data["options"] for key in ["A", "B", "C", "D", "E"]):
135
+ raise ValueError("Response is missing required option keys")
136
+
137
+ # Save the question hash
138
+ question_hash = hash_question(question_data['question'])
139
+ if question_hash not in generated_questions:
140
+ generated_questions.add(question_hash)
141
+ save_generated_question(question_hash)
142
+
143
+ return question_data
144
 
145
+ except json.JSONDecodeError as e:
146
+ logging.error(f"Failed to parse JSON response: {e}")
147
+ logging.error(f"Response content: {response_content}")
148
+ raise
149
+ except ValueError as e:
150
+ logging.error(f"Invalid response structure: {e}")
151
+ logging.error(f"Response content: {response_content}")
152
+ raise
153
+ except Exception as e:
154
+ logging.error(f"An unexpected error occurred: {e}")
155
+ raise
156
 
157
  # Example usage
158
  if __name__ == "__main__":