Spaces:

PearlIsa
/

pearly_med_triage_chatbot_kagglex

Runtime error

File size: 28,484 Bytes

# Standard Libraries
import os
import json
import time
import asyncio
import logging
import gc
import re
import traceback
from pathlib import Path
from datetime import datetime
from typing import List, Dict, Union, Tuple, Optional, Any
from dataclasses import dataclass, field
import zipfile

# Machine Learning and Deep Learning Libraries
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.cuda.amp import autocast
from torch.utils.data import DataLoader
import tensorflow as tf
import keras
import numpy as np

# Hugging Face and Transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from sentence_transformers import SentenceTransformer
from datasets import load_dataset, Dataset, concatenate_datasets
from huggingface_hub import login

# FAISS and PEFT
import faiss
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, TaskType, PeftModel

# LangChain - updated imports as per recent deprecations
from langchain_community.vectorstores import FAISS  # Updated import
from langchain_community.embeddings import HuggingFaceEmbeddings  # Updated import
from langchain_community.document_loaders import TextLoader  # Updated import
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Data Science and Visualization Libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from sklearn.metrics import classification_report, confusion_matrix

# Development and Testing
import pytest
from unittest.mock import Mock, patch

# External Tools and APIs
import wandb
import requests
import gradio as gr
import IPython.display as display  # Required for IPython display functionality
from dotenv import load_dotenv
from tqdm.auto import tqdm

# Suppress Warnings
import warnings
warnings.filterwarnings('ignore')


# Ensure Hugging Face login
try:
    hf_token = os.getenv("HF_TOKEN")
    if hf_token:
        login(token=hf_token)
    print("Login successful!")
except Exception as e:
    print("Hugging Face Login failed:", e)


os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:64,garbage_collection_threshold:0.8,expandable_segments:True'
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'



# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)



class ModelManager:
    """Handles model loading and resource management"""
    
    @staticmethod
    def verify_and_extract_model(checkpoint_zip_path: str, extracted_model_dir: str) -> str:
        """Verify and extract the model if it's not already extracted"""
        if not os.path.exists(extracted_model_dir):
            # Unzip the model if it hasn’t been extracted yet
            with zipfile.ZipFile(checkpoint_zip_path, 'r') as zip_ref:
                zip_ref.extractall(extracted_model_dir)
            logger.info(f"Extracted model to: {extracted_model_dir}")
        else:
            logger.info(f"Model already extracted: {extracted_model_dir}")
            
        return extracted_model_dir
    
    @staticmethod
    def clear_gpu_memory():
        """Clear GPU memory cache"""
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
            gc.collect()

class PearlyBot:
    def __init__(self, model_zip_path: str = "./checkpoint-500.zip", model_dir: str = "./checkpoint-500"):
        self.model_dir = ModelManager.verify_and_extract_model(model_zip_path, model_dir)
        self.setup_model(self.model_dir)
        self.setup_rag()
        self.conversation_history = []
        self.last_interaction_time = time.time()
        self.interaction_cooldown = 1.0  # seconds
        
    def setup_model(self, model_path: str):
        """Initialize the model with proper error handling"""
        try:
            logger.info("Starting model initialization...")
            ModelManager.clear_gpu_memory()
            
            # Load tokenizer
            try:
                self.tokenizer = AutoTokenizer.from_pretrained(model_path)
                self.tokenizer.pad_token = self.tokenizer.eos_token
                logger.info("Tokenizer loaded successfully")
            except Exception as e:
                logger.error(f"Failed to load tokenizer: {str(e)}")
                raise
            
            # Load model
            try:
                self.model = AutoModelForCausalLM.from_pretrained(
                    model_path,
                    device_map="auto",
                    load_in_8bit=True,
                    torch_dtype=torch.float16,
                    low_cpu_mem_usage=True
                )
                self.model.eval()
                logger.info("Model loaded successfully")
            except Exception as e:
                logger.error(f"Failed to load model: {str(e)}")
                raise
                
        except Exception as e:
            logger.error(f"Error in model setup: {str(e)}")
            raise

    def setup_rag(self):
        try:
            logger.info("Setting up RAG system...")
            # Load your knowledge base content
            knowledge_base = {
                "triage_scenarios.txt": """Medical Triage Scenarios and Responses:

EMERGENCY (999) SCENARIOS:
1. Cardiovascular:
- Chest pain/pressure
- Heart attack symptoms
- Irregular heartbeat with dizziness
Response: Immediate 999 call, sit/lie down, chew aspirin if available

2. Respiratory:
- Severe breathing difficulty
- Choking
- Unable to speak full sentences
Response: 999, sitting position, clear airway

3. Neurological:
- Stroke symptoms (FAST)
- Seizures
- Unconsciousness
Response: 999, recovery position if unconscious

4. Trauma:
- Severe bleeding
- Head injuries with confusion
- Major burns
Response: 999, apply direct pressure to bleeding

URGENT CARE (111) SCENARIOS:
1. Moderate Symptoms:
- Persistent fever
- Non-severe infections
- Minor injuries
Response: 111 contact, monitor symptoms

2. Minor Emergencies:
- Small cuts needing stitches
- Sprains and strains
- Mild allergic reactions
Response: 111 or urgent care visit

GP APPOINTMENT SCENARIOS:
1. Routine Care:
- Chronic condition review
- Medication reviews
- Non-urgent symptoms
Response: Book routine GP appointment

2. Preventive Care:
- Vaccinations
- Health screenings
- Regular check-ups
Response: Schedule with GP reception""",
                "emergency_detection.txt": """Enhanced Emergency Detection Criteria:

IMMEDIATE LIFE THREATS:
1. Cardiac Symptoms:
- Chest pain/pressure/tightness
- Pain spreading to arms/jaw/neck
- Sweating with nausea
- Shortness of breath

2. Breathing Problems:
- Severe shortness of breath
- Blue lips or face
- Unable to complete sentences
- Choking/airway blockage

3. Neurological:
- FAST (Face, Arms, Speech, Time)
- Sudden confusion
- Severe headache
- Seizures
- Loss of consciousness

4. Severe Trauma:
- Heavy bleeding
- Deep wounds
- Head injury with confusion
- Severe burns
- Broken bones with deformity

5. Anaphylaxis:
- Sudden swelling
- Difficulty breathing
- Rapid onset rash
- Light-headedness

URGENT BUT NOT IMMEDIATE:
1. Moderate Symptoms:
- Persistent fever
- Dehydration
- Non-severe infections
- Minor injuries

2. Worsening Conditions:
- Increasing pain
- Progressive symptoms
- Medication reactions

RESPONSE PROTOCOLS:
1. For Life Threats:
- Immediate 999 call
- Clear first aid instructions
- Stay on line until help arrives

2. For Urgent Care:
- 111 contact
- Monitor for worsening
- Document symptoms""",
                "gp_booking.txt": """GP Appointment Booking Templates:

APPOINTMENT TYPES:
1. Routine Appointments:
Template: "I need to book a routine appointment for [condition]. My availability is [times/dates]. My GP is Dr. [name] if available."

2. Follow-up Appointments:
Template: "I need a follow-up appointment regarding [condition] discussed on [date]. My previous appointment was with Dr. [name]."

3. Medication Reviews:
Template: "I need a medication review for [medication]. My last review was [date]."

BOOKING INFORMATION NEEDED:
1. Patient Details:
- Full name
- Date of birth
- NHS number (if known)
- Registered GP practice

2. Appointment Details:
- Nature of appointment
- Preferred times/dates
- Urgency level
- Special requirements

3. Contact Information:
- Phone number
- Alternative contact
- Preferred contact method

BOOKING PROCESS:
1. Online Booking:
- NHS app instructions
- Practice website guidance
- System navigation help

2. Phone Booking:
- Best times to call
- Required information
- Queue management tips

3. Special Circumstances:
- Interpreter needs
- Accessibility requirements
- Transport arrangements""",
                "cultural_sensitivity.txt": """Cultural Sensitivity Guidelines:

CULTURAL AWARENESS:
1. Religious Considerations:
- Prayer times
- Religious observations
- Dietary restrictions
- Gender preferences for care
- Religious festivals/fasting periods

2. Language Support:
- Interpreter services
- Multi-language resources
- Clear communication methods
- Family involvement preferences

3. Cultural Beliefs:
- Traditional medicine practices
- Cultural health beliefs
- Family decision-making
- Privacy customs

COMMUNICATION APPROACHES:
1. Respectful Interaction:
- Use preferred names/titles
- Appropriate greetings
- Non-judgmental responses
- Active listening

2. Language Usage:
- Clear, simple terms
- Avoid medical jargon
- Confirm understanding
- Respect silence/pauses

3. Non-verbal Communication:
- Eye contact customs
- Personal space
- Body language awareness
- Gesture sensitivity

SPECIFIC CONSIDERATIONS:
1. South Asian Communities:
- Family involvement
- Gender sensitivity
- Traditional medicine
- Language diversity

2. Middle Eastern Communities:
- Gender-specific care
- Religious observations
- Family hierarchies
- Privacy concerns

3. African/Caribbean Communities:
- Traditional healers
- Community involvement
- Historical medical mistrust
- Cultural specific conditions

4. Eastern European Communities:
- Direct communication
- Family involvement
- Medical documentation
- Language support

INCLUSIVE PRACTICES:
1. Appointment Scheduling:
- Religious holidays
- Prayer times
- Family availability
- Interpreter needs

2. Treatment Planning:
- Cultural preferences
- Traditional practices
- Family involvement
- Dietary requirements

3. Support Services:
- Community resources
- Cultural organizations
- Language services
- Social support""",
                "service_boundaries.txt": """Service Limitations and Professional Boundaries:

CLEAR BOUNDARIES:
1. Medical Advice:
- No diagnoses
- No prescriptions
- No treatment recommendations
- No medical procedures
- No second opinions

2. Emergency Services:
- Clear referral criteria
- Documented responses
- Follow-up protocols
- Handover procedures

3. Information Sharing:
- Confidentiality limits
- Data protection
- Record keeping
- Information governance

PROFESSIONAL CONDUCT:
1. Communication:
- Professional language
- Emotional boundaries
- Personal distance
- Service scope

2. Service Delivery:
- No financial transactions
- No personal relationships
- Clear role definition
- Professional limits"""
            }
            
            os.makedirs("knowledge_base", exist_ok=True)
            
            # Create and process documents
            documents = []
            for filename, content in knowledge_base.items():
                with open(f"knowledge_base/{filename}", "w") as f:
                    f.write(content)
                documents.append(content)
            
            # Setup embeddings and vector store
            self.embeddings = HuggingFaceEmbeddings(
                model_name="sentence-transformers/all-MiniLM-L6-v2"
            )
            
            text_splitter = RecursiveCharacterTextSplitter(
                chunk_size=300,
                chunk_overlap=100
            )
            
            texts = text_splitter.split_text("\n\n".join(documents))
            self.vector_store = FAISS.from_texts(texts, self.embeddings)
            logger.info("RAG system setup complete")
            
        except Exception as e:
            logger.error(f"Error setting up RAG: {str(e)}")
            raise

    def get_relevant_context(self, query):
        try:
            docs = self.vector_store.similarity_search(query, k=3)
            return "\n".join(doc.page_content for doc in docs)
        except Exception as e:
            logger.error(f"Error retrieving context: {str(e)}")
            return ""

    @torch.inference_mode()
    def generate_response(self, message: str, history: list) -> str:
        """Generate response using both fine-tuned model and RAG"""
        try:
            # Rate limiting
            current_time = time.time()
            if current_time - self.last_interaction_time < self.interaction_cooldown:
                time.sleep(self.interaction_cooldown)
            
            # Clear GPU memory before generation
            ModelManager.clear_gpu_memory()
            
            # Get RAG context
            context = self.get_relevant_context(message)
            
            # Format conversation history
            conv_history = "\n".join([
                f"User: {user}\nAssistant: {assistant}"
                for user, assistant in history[-3:]  # Keep last 3 turns
            ])
            
            # Create prompt
            prompt = f"""<start_of_turn>system
Using these medical guidelines:

{context}

Previous conversation:
{conv_history}

Guidelines:
1. Assess symptoms and severity
2. Ask relevant follow-up questions
3. Direct to appropriate care (999, 111, or GP)
4. Show empathy and cultural sensitivity
5. Never diagnose or recommend treatments
<end_of_turn>
<start_of_turn>user
{message}
<end_of_turn>
<start_of_turn>assistant"""

            # Generate response
            try:
                inputs = self.tokenizer(
                    prompt,
                    return_tensors="pt",
                    truncation=True,
                    max_length=512
                ).to(self.model.device)

                outputs = self.model.generate(
                    **inputs,
                    max_new_tokens=256,
                    min_new_tokens=20,
                    do_sample=True,
                    temperature=0.7,
                    top_p=0.9,
                    repetition_penalty=1.2,
                    no_repeat_ngram_size=3
                )
                
                response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
                response = response.split("<start_of_turn>assistant")[-1].strip()
                if "<end_of_turn>" in response:
                    response = response.split("<end_of_turn>")[0].strip()
                
                self.last_interaction_time = time.time()
                return response
                
            except torch.cuda.OutOfMemoryError:
                ModelManager.clear_gpu_memory()
                logger.error("GPU out of memory, cleared cache and retrying...")
                return "I apologize, but I'm experiencing technical difficulties. Please try again."
                
        except Exception as e:
            logger.error(f"Error generating response: {str(e)}")
            return "I apologize, but I encountered an error. Please try again."
            
    def handle_feedback(self, message: str, response: str, feedback: int):
        """Handle user feedback for responses"""
        try:
            timestamp = datetime.now().isoformat()
            feedback_data = {
                "message": message,
                "response": response,
                "feedback": feedback,
                "timestamp": timestamp
            }
            
            # Log feedback
            logger.info(f"Feedback received: {feedback_data}")
            
            # Here you could:
            # 1. Store feedback in a database
            # 2. Send to monitoring system
            # 3. Use for model improvements
            
            return True
        except Exception as e:
            logger.error(f"Error handling feedback: {e}")
            return False

    def __del__(self):
        """Cleanup resources"""
        try:
            if hasattr(self, 'model'):
                del self.model
            ModelManager.clear_gpu_memory()
        except Exception as e:
            logger.error(f"Error in cleanup: {e}")

    def process_feedback(positive: bool, comment: str, history: List[Dict[str, str]]):
        try:
            if not history or len(history) < 2:
                return gr.update(value="")
                
            last_user_msg = history[-2]["content"] if isinstance(history[-2], dict) else history[-2][0]
            last_bot_msg = history[-1]["content"] if isinstance(history[-1], dict) else history[-1][1]
            
            bot.handle_feedback(
                message=last_user_msg,
                response=last_bot_msg,
                feedback=1 if positive else -1
            )
            
            return gr.update(value="")
            
        except Exception as e:
            logger.error(f"Error processing feedback: {e}")
            return gr.update(value="")

def create_demo():
    """Set up Gradio interface for the chatbot with enhanced styling and functionality."""
    try:
        # Initialize bot
        bot = PearlyBot()

        def chat(message: str, history: list):
            """Handle chat interactions"""
            try:
                if not message.strip():
                    return history
                
                # Generate response
                response = bot.generate_response(message, history)
                
                # Update history with proper formatting
                history.append({
                    "role": "user",
                    "content": message
                })
                history.append({
                    "role": "assistant",
                    "content": response
                })
                return history
                
            except Exception as e:
                logger.error(f"Chat error: {e}")
                return history + [{
                    "role": "assistant",
                    "content": "I apologize, but I'm experiencing technical difficulties. For emergencies, please call 999."
                }]

        def process_feedback(positive: bool, comment: str, history: list):
            try:
                if not history or len(history) < 2:
                    return gr.update(value="")
                
                last_user_msg = history[-2]["content"] if isinstance(history[-2], dict) else history[-2][0]
                last_bot_msg = history[-1]["content"] if isinstance(history[-1], dict) else history[-1][1]
                
                bot.handle_feedback(
                    message=last_user_msg,
                    response=last_bot_msg,
                    feedback=1 if positive else -1
                )
                
                return gr.update(value="")
            except Exception as e:
                logger.error(f"Error processing feedback: {e}")
                return gr.update(value="")
        

        # Create enhanced Gradio interface
        with gr.Blocks(theme=gr.themes.Soft(
            primary_hue="blue",
            secondary_hue="indigo",
            neutral_hue="slate",
            font=gr.themes.GoogleFont("Inter")
        )) as demo:
            # Custom CSS for enhanced styling
            gr.HTML("""
                <style>
                    .container { max-width: 900px; margin: auto; }
                    .header { text-align: center; padding: 20px; }
                    .emergency-banner {
                        background-color: #ff4444;
                        color: white;
                        padding: 10px;
                        text-align: center;
                        font-weight: bold;
                        margin-bottom: 20px;
                    }
                    .feature-card {
                        padding: 15px;
                        border-radius: 10px;
                        text-align: center;
                        transition: transform 0.2s;
                        color: white;
                        font-weight: bold;
                    }
                    .feature-card:nth-child(1) { background: linear-gradient(135deg, #2193b0, #6dd5ed); }
                    .feature-card:nth-child(2) { background: linear-gradient(135deg, #834d9b, #d04ed6); }
                    .feature-card:nth-child(3) { background: linear-gradient(135deg, #ff4b1f, #ff9068); }
                    .feature-card:nth-child(4) { background: linear-gradient(135deg, #38ef7d, #11998e); }
                    .feature-card:hover {
                        transform: translateY(-5px);
                        box-shadow: 0 5px 15px rgba(0,0,0,0.2);
                    }
                    .feature-card span.emoji {
                        font-size: 2em;
                        display: block;
                        margin-bottom: 10px;
                    }
                    .message-textbox textarea { resize: none; }
                    #thumb-up, #thumb-down {
                        min-width: 60px;
                        padding: 8px;
                        margin: 5px;
                    }
                    .chatbot-message {
                        padding: 12px;
                        margin: 8px 0;
                        border-radius: 8px;
                    }
                    .user-message { background-color: #e3f2fd; }
                    .assistant-message { background-color: #f5f5f5; }
                    .feedback-section {
                        margin-top: 20px;
                        padding: 15px;
                        border-radius: 8px;
                        background-color: #f8f9fa;
                    }
                </style>
            """)
            # Event Handlers - Moved inside the gr.Blocks context
            msg.submit(chat, [msg, chatbot], [chatbot]).then(
                lambda: gr.update(value=""), None, [msg]
            )
            
            submit.click(chat, [msg, chatbot], [chatbot]).then(
                lambda: gr.update(value=""), None, [msg]
            )
            
            # Feedback handlers
            feedback_positive.click(
                lambda h: process_feedback(True, feedback_text.value, h),
                inputs=[chatbot],
                outputs=[feedback_text]
            )
            
            feedback_negative.click(
                lambda h: process_feedback(False, feedback_text.value, h),
                inputs=[chatbot],
                outputs=[feedback_text]
            )
            
            # Clear chat
            clear.click(lambda: None, None, chatbot)

            # Add queue for handling multiple users
            demo.queue(concurrency_count=1, max_size=10)

            # Emergency Banner
            gr.HTML("""
                <div class="emergency-banner">
                    🚨 For medical emergencies, always call 999 immediately 🚨
                </div>
            """)

            # Header Section
            with gr.Row(elem_classes="header"):
                gr.Markdown("""
                    # GP Medical Triage Assistant - Pearly
                    Welcome to your personal medical triage assistant. I'm here to help assess your symptoms and guide you to appropriate care.
                """)

            # Main Features Grid
            gr.HTML("""
                <div class="features-grid">
                    <div class="feature-card">
                        <span class="emoji">🏥</span>
                        <div>GP Appointments</div>
                    </div>
                    <div class="feature-card">
                        <span class="emoji">🔍</span>
                        <div>Symptom Assessment</div>
                    </div>
                    <div class="feature-card">
                        <span class="emoji">⚡</span>
                        <div>Urgent Care Guide</div>
                    </div>
                    <div class="feature-card">
                        <span class="emoji">💊</span>
                        <div>Medical Advice</div>
                    </div>
                </div>
            """)

            # Chat Interface
            with gr.Row():
                with gr.Column(scale=4):
                    chatbot = gr.Chatbot(
                        value=[{
                            "role": "assistant",
                            "content": "Hello! I'm Pearly, your GP medical assistant. How can I help you today?"
                        }],
                        height=500,
                        elem_id="chatbot",
                        type="messages",
                        show_label=False
                    )
                    
                    with gr.Row():
                        msg = gr.Textbox(
                            label="Your message",
                            placeholder="Type your message here...",
                            lines=2,
                            scale=4,
                            autofocus=True,
                            submit_on_enter=True
                        )
                        submit = gr.Button("Send", variant="primary", scale=1)

                with gr.Column(scale=1):
                    # Quick Actions Panel
                    gr.Markdown("### Quick Actions")
                    emergency_btn = gr.Button("🚨 Emergency Info", variant="secondary")
                    nhs_111_btn = gr.Button("📞 NHS 111 Info", variant="secondary")
                    booking_btn = gr.Button("📅 GP Booking", variant="secondary")
                    
                    # Controls and Feedback
                    gr.Markdown("### Controls")
                    clear = gr.Button("🗑️ Clear Chat")
                    
                    gr.Markdown("### Feedback")
                    with gr.Row():
                        feedback_positive = gr.Button("👍", elem_id="thumb-up")
                        feedback_negative = gr.Button("👎", elem_id="thumb-down")
                    
                    feedback_text = gr.Textbox(
                        label="Additional comments",
                        placeholder="Tell us more...",
                        lines=2,
                        visible=True
                    )
                    feedback_submit = gr.Button("Submit Feedback", visible=True)

            # Examples and Information
            with gr.Accordion("Example Messages", open=False):
                gr.Examples([
                    ["I've been having severe headaches for the past week"],
                    ["I need to book a routine checkup"],
                    ["I'm feeling very anxious lately and need help"],
                    ["My child has had a fever for 2 days"],
                    ["I need information about COVID-19 testing"]
                ], inputs=msg)

            with gr.Accordion("NHS Services Guide", open=False):
                gr.Markdown("""
                    ### Emergency Services (999)
                    - Life-threatening emergencies
                    - Severe injuries
                    - Suspected heart attack or stroke
                    
                    ### NHS 111
                    - Urgent but non-emergency situations
                    - Medical advice needed
                    - Unsure where to go
                    
                    ### GP Services
                    - Routine check-ups
                    - Non-urgent medical issues
                    - Prescription renewals
                """)

        

        return demo

    except Exception as e:
        logger.error(f"Error creating demo: {e}")
        raise

if __name__ == "__main__":
    try:
        # Initialize logging
        logging.basicConfig(level=logging.INFO)
        
        # Load environment variables
        load_dotenv()
        
        # Create and launch demo
        demo = create_demo()
        demo.launch(
            server_name="0.0.0.0",
            server_port=7860,
            show_error=True
        )
        
    except Exception as e:
        logger.error(f"Application startup failed: {e}")
        raise