Spaces:

ambrosfitz
/

md-qa-test

Sleeping

App Files Files Community

ambrosfitz commited on Oct 13, 2024

Commit

b9549f1

verified ·

1 Parent(s): 6d85b30

Create question_generator.py

Browse files

Files changed (1) hide show

question_generator.py +183 -0

question_generator.py ADDED Viewed

	@@ -0,0 +1,183 @@

+import random
+import csv
+import os
+import logging
+import hashlib
+from typing import List, Dict
+from datetime import datetime
+from mistralai import Mistral
+# Set up logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+# Get the Mistral API key from environment variables
+api_key = os.environ.get("MISTRAL_API_KEY")
+if not api_key:
+    logging.error("MISTRAL_API_KEY environment variable is not set.")
+    raise ValueError("MISTRAL_API_KEY environment variable is not set.")
+model = "mistral-large-latest"
+# Initialize Mistral client
+client = Mistral(api_key=api_key)
+def load_csv_data(file_path: str) -> List[Dict[str, str]]:
+    """Load data from a CSV file."""
+    logging.info(f"Loading data from {file_path}...")
+    try:
+        with open(file_path, 'r', encoding='utf-8') as csvfile:
+            reader = csv.DictReader(csvfile)
+            data = list(reader)
+        logging.info(f"Loaded {len(data)} rows from {file_path}")
+        return data
+    except FileNotFoundError:
+        logging.error(f"File not found: {file_path}")
+        raise
+    except csv.Error as e:
+        logging.error(f"Error reading CSV file {file_path}: {e}")
+        raise
+# Load data from both CSV files
+try:
+    detailed_cases = load_csv_data('processed_medical_history.csv')
+    infectious_diseases = load_csv_data('infectious_diseases.csv')
+except Exception as e:
+    logging.error(f"Failed to load CSV data: {e}")
+    raise
+def hash_question(question: str) -> str:
+    """Generate a hash for a question to check for duplicates."""
+    return hashlib.md5(question.encode()).hexdigest()
+def load_generated_questions() -> set:
+    """Load previously generated question hashes from a file."""
+    try:
+        with open('generated_questions.txt', 'r') as f:
+            return set(line.strip() for line in f)
+    except FileNotFoundError:
+        return set()
+def save_generated_question(question_hash: str):
+    """Save a newly generated question hash to the file."""
+    with open('generated_questions.txt', 'a') as f:
+        f.write(question_hash + '\n')
+generated_questions = load_generated_questions()
+def generate_microbiology_question() -> Dict[str, str]:
+    """Generate a microbiology question."""
+    question_types = [
+        "clinical_vignette",
+        "mechanism_of_pathogenesis",
+        "laboratory_diagnosis",
+        "antimicrobial_resistance",
+        "vaccine_preventable_disease",
+        "microbial_physiology_genetics",
+        "epidemiology_transmission"
+    ]
+    question_type = random.choice(question_types)
+    logging.info(f"Generating {question_type} question...")
+    if question_type == "clinical_vignette":
+        case = random.choice(detailed_cases)
+        context = f"""
+        Pathogen: {case['Pathogen_Name']} ({case['Pathogen_Type']})
+        Key Symptoms: {case['Key_Symptoms']}
+        Physical Findings: {case['Physical_Findings']}
+        Lab Results: {case['Lab_Results']}
+        Patient Demographics: {case['Patient_Demographics']}
+        """
+    else:
+        disease = random.choice(infectious_diseases)
+        context = f"""
+        Infectious Agent: {disease['infectious_agent']}
+        Diagnosis: {disease['diagnosis']}
+        Treatment: {disease['treatment']}
+        """
+    prompt = f"""
+    Create a microbiology question that could appear on the NBME exam. This should be a {question_type} question.
+    Use the following information as inspiration, but feel free to expand or modify:
+    {context}
+    Generate a question based on the following template, depending on the question type:
+    1. Clinical Vignette with Pathogen Identification:
+    A [age]-year-old [gender] presents with [symptoms and clinical findings]. [Additional relevant information]. Which of the following is the most likely causal organism?
+    2. Mechanism of Pathogenesis:
+    [Description of a pathogen or clinical scenario]
+    Which of the following best describes the mechanism by which this organism causes disease?
+    3. Laboratory Diagnosis:
+    A patient presents with [symptoms]. [Description of laboratory findings or test results].
+    Which of the following is the most likely diagnosis based on these laboratory findings?
+    4. Antimicrobial Mechanism and Resistance:
+    A patient is diagnosed with [infection]. The causative organism is found to be resistant to [antibiotic]. Which of the following mechanisms is most likely responsible for this resistance?
+    5. Vaccine-Preventable Disease:
+    A [age]-year-old [gender] presents with [symptoms of a vaccine-preventable disease]. Which of the following vaccines would have been most likely to prevent this condition?
+    6. Microbial Physiology and Genetics:
+    An investigator observes [description of microbial behavior or genetic phenomenon]. Which of the following best explains this observation?
+    7. Epidemiology and Transmission:
+    A cluster of [disease] cases is reported in [location]. [Description of affected population and circumstances]. Which of the following is the most likely mode of transmission?
+    Include:
+    1. The question based on the selected template
+    2. Five possible answer options (A through E)
+    3. The correct answer
+    4. A brief explanation of why the correct answer is right and why the other options are incorrect
+    5. Detailed medical reasoning for the correct answer, including relevant pathophysiology, microbiology concepts, and clinical implications.
+    Format the response as a JSON object with the following keys:
+    {
+        "question": "The question text",
+        "options": {
+            "A": "Option A text",
+            "B": "Option B text",
+            "C": "Option C text",
+            "D": "Option D text",
+            "E": "Option E text"
+        },
+        "correct_answer": "The letter of the correct answer (A, B, C, D, or E)",
+        "explanation": "The explanation text",
+        "medical_reasoning": "The detailed medical reasoning text"
+    }
+    """
+    chat_response = client.chat.complete(
+        model=model,
+        messages=[
+            {
+                "role": "system",
+                "content": "You are a medical educator creating unique microbiology questions for the NBME exam. Ensure each question is distinct from previously generated ones and follows the specified template."
+            },
+            {
+                "role": "user",
+                "content": prompt
+            }
+        ]
+    )
+    response_content = chat_response.choices[0].message.content
+    # Parse the JSON response
+    import json
+    question_data = json.loads(response_content)
+    # Save the question hash
+    question_hash = hash_question(question_data['question'])
+    if question_hash not in generated_questions:
+        generated_questions.add(question_hash)
+        save_generated_question(question_hash)
+    return question_data
+# Example usage
+if __name__ == "__main__":
+    question = generate_microbiology_question()
+    print(json.dumps(question, indent=2))