Spaces:

ambrosfitz
/

md-qa-test

Sleeping

App Files Files Community

ambrosfitz commited on Oct 13, 2024

Commit

0d1b0b3

verified ·

1 Parent(s): 77b39ea

Update question_generator.py

Browse files

Files changed (1) hide show

question_generator.py +43 -61

question_generator.py CHANGED Viewed

@@ -3,6 +3,7 @@ import csv
 import os
 import logging
 import hashlib
 from typing import List, Dict
 from datetime import datetime
 from mistralai.client import MistralClient
@@ -22,48 +23,7 @@ model = "mistral-large-latest"
 # Initialize Mistral client
 client = MistralClient(api_key=api_key)
-def load_csv_data(file_path: str) -> List[Dict[str, str]]:
-    """Load data from a CSV file."""
-    logging.info(f"Loading data from {file_path}...")
-    try:
-        with open(file_path, 'r', encoding='utf-8') as csvfile:
-            reader = csv.DictReader(csvfile)
-            data = list(reader)
-        logging.info(f"Loaded {len(data)} rows from {file_path}")
-        return data
-    except FileNotFoundError:
-        logging.error(f"File not found: {file_path}")
-        raise
-    except csv.Error as e:
-        logging.error(f"Error reading CSV file {file_path}: {e}")
-        raise
-# Load data from both CSV files
-try:
-    detailed_cases = load_csv_data('processed_medical_history.csv')
-    infectious_diseases = load_csv_data('infectious_diseases.csv')
-except Exception as e:
-    logging.error(f"Failed to load CSV data: {e}")
-    raise
-def hash_question(question: str) -> str:
-    """Generate a hash for a question to check for duplicates."""
-    return hashlib.md5(question.encode()).hexdigest()
-def load_generated_questions() -> set:
-    """Load previously generated question hashes from a file."""
-    try:
-        with open('generated_questions.txt', 'r') as f:
-            return set(line.strip() for line in f)
-    except FileNotFoundError:
-        return set()
-def save_generated_question(question_hash: str):
-    """Save a newly generated question hash to the file."""
-    with open('generated_questions.txt', 'a') as f:
-        f.write(question_hash + '\n')
-generated_questions = load_generated_questions()
 def generate_microbiology_question() -> Dict[str, str]:
     """Generate a microbiology question."""
@@ -151,26 +111,48 @@ def generate_microbiology_question() -> Dict[str, str]:
     }}
     """
-    chat_response = client.chat(
-        model=model,
-        messages=[
-            ChatMessage(role="system", content="You are a medical educator creating unique microbiology questions for the NBME exam. Ensure each question is distinct from previously generated ones and follows the specified template."),
-            ChatMessage(role="user", content=prompt)
-        ]
-    )
-    response_content = chat_response.choices[0].message.content
-    # Parse the JSON response
-    import json
-    question_data = json.loads(response_content)
-    # Save the question hash
-    question_hash = hash_question(question_data['question'])
-    if question_hash not in generated_questions:
-        generated_questions.add(question_hash)
-        save_generated_question(question_hash)
-    return question_data
 # Example usage
 if __name__ == "__main__":

 import os
 import logging
 import hashlib
+import json
 from typing import List, Dict
 from datetime import datetime
 from mistralai.client import MistralClient
 # Initialize Mistral client
 client = MistralClient(api_key=api_key)
+# ... (previous functions remain the same)
 def generate_microbiology_question() -> Dict[str, str]:
     """Generate a microbiology question."""
     }}
     """
+    try:
+        chat_response = client.chat(
+            model=model,
+            messages=[
+                ChatMessage(role="system", content="You are a medical educator creating unique microbiology questions for the NBME exam. Ensure each question is distinct from previously generated ones and follows the specified template."),
+                ChatMessage(role="user", content=prompt)
+            ]
+        )
+        response_content = chat_response.choices[0].message.content
+        logging.info(f"Received response from Mistral API: {response_content[:100]}...")  # Log first 100 characters
+        # Parse the JSON response
+        question_data = json.loads(response_content)
+        # Validate the structure of the parsed JSON
+        required_keys = ["question", "options", "correct_answer", "explanation", "medical_reasoning"]
+        if not all(key in question_data for key in required_keys):
+            raise ValueError("Response is missing required keys")
+        if not all(key in question_data["options"] for key in ["A", "B", "C", "D", "E"]):
+            raise ValueError("Response is missing required option keys")
+        # Save the question hash
+        question_hash = hash_question(question_data['question'])
+        if question_hash not in generated_questions:
+            generated_questions.add(question_hash)
+            save_generated_question(question_hash)
+        return question_data
+    except json.JSONDecodeError as e:
+        logging.error(f"Failed to parse JSON response: {e}")
+        logging.error(f"Response content: {response_content}")
+        raise
+    except ValueError as e:
+        logging.error(f"Invalid response structure: {e}")
+        logging.error(f"Response content: {response_content}")
+        raise
+    except Exception as e:
+        logging.error(f"An unexpected error occurred: {e}")
+        raise
 # Example usage
 if __name__ == "__main__":