# app.py import os import json import keras from datasets import load_dataset import tensorflow as tf from huggingface_hub import login import torch from transformers import ( AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer) from sentence_transformers import SentenceTransformer from typing import List, Dict, Union, Tuple import faiss import numpy as np from datasets import Dataset import torch.nn.functional as F from torch.cuda.amp import autocast import gc from peft import ( LoraConfig, get_peft_model, prepare_model_for_kbit_training, TaskType, PeftModel) from tqdm.auto import tqdm from torch.utils.data import DataLoader import logging import wandb from pathlib import Path from typing import List, Dict, Union, Optional, Any import torch.nn as nn from dataclasses import dataclass, field import time import asyncio import pytest from unittest.mock import Mock, patch from sklearn.metrics import classification_report, confusion_matrix import gradio as gr import matplotlib.pyplot as plt from datetime import datetime import requests import pandas as pd import seaborn as sns import traceback from matplotlib.gridspec import GridSpec from datasets import load_dataset, concatenate_datasets from langchain.vectorstores import FAISS from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import HuggingFaceEmbeddings from langchain.document_loaders import TextLoader import IPython.display as display from peft import get_peft_model, LoraConfig, prepare_model_for_kbit_training import warnings import re from dotenv import load_dotenv warnings.filterwarnings('ignore') # Ensure Hugging Face login try: hf_token = os.getenv("HF_TOKEN") if hf_token: login(token=hf_token) print("Login successful!") except Exception as e: print("Hugging Face Login failed:", e) # CUDA and Memory Configurations torch.backends.cuda.matmul.allow_tf32 = False torch.backends.cudnn.allow_tf32 = False os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:64,garbage_collection_threshold:0.8,expandable_segments:True' os.environ['CUDA_LAUNCH_BLOCKING'] = '1' # Setup logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class ModelManager: """Handles model loading and resource management""" @staticmethod def verify_model_path(checkpoint_path: str) -> str: """Verify and return valid model path""" if os.path.exists(checkpoint_path): return checkpoint_path alternate_paths = [ f"{os.getcwd()}/checkpoint-500.zip", "./checkpoint-500.zip", "../checkpoint-500.zip" ] for path in alternate_paths: if os.path.exists(path): return path raise FileNotFoundError( f"Model checkpoint not found in any of these locations: " f"{[checkpoint_path] + alternate_paths}" ) @staticmethod def clear_gpu_memory(): """Clear GPU memory cache""" if torch.cuda.is_available(): torch.cuda.empty_cache() gc.collect() class PearlyBot: def __init__(self, model_path: str = "./checkpoint-500.zip"): self.setup_model(model_path) self.setup_rag() self.conversation_history = [] self.last_interaction_time = time.time() self.interaction_cooldown = 1.0 # seconds def setup_model(self, model_path: str): """Initialize the model with proper error handling""" try: logger.info("Starting model initialization...") ModelManager.clear_gpu_memory() # Verify model path verified_path = ModelManager.verify_model_path(model_path) logger.info(f"Using model checkpoint from: {verified_path}") # Base model configuration base_model_id = "google/gemma-2b" logger.info(f"Loading base model: {base_model_id}") # Load tokenizer try: self.tokenizer = AutoTokenizer.from_pretrained(base_model_id) self.tokenizer.pad_token = self.tokenizer.eos_token logger.info("Tokenizer loaded successfully") except Exception as e: logger.error(f"Failed to load tokenizer: {str(e)}") raise # Load model try: self.model = AutoModelForCausalLM.from_pretrained( verified_path, device_map="auto", load_in_8bit=True, torch_dtype=torch.float16, low_cpu_mem_usage=True ) self.model.eval() logger.info("Model loaded successfully") except Exception as e: logger.error(f"Failed to load model: {str(e)}") raise except Exception as e: logger.error(f"Error in model setup: {str(e)}") raise def setup_rag(self): try: logger.info("Setting up RAG system...") # Load your knowledge base content knowledge_base = { "triage_scenarios.txt": """Medical Triage Scenarios and Responses: EMERGENCY (999) SCENARIOS: 1. Cardiovascular: - Chest pain/pressure - Heart attack symptoms - Irregular heartbeat with dizziness Response: Immediate 999 call, sit/lie down, chew aspirin if available 2. Respiratory: - Severe breathing difficulty - Choking - Unable to speak full sentences Response: 999, sitting position, clear airway 3. Neurological: - Stroke symptoms (FAST) - Seizures - Unconsciousness Response: 999, recovery position if unconscious 4. Trauma: - Severe bleeding - Head injuries with confusion - Major burns Response: 999, apply direct pressure to bleeding URGENT CARE (111) SCENARIOS: 1. Moderate Symptoms: - Persistent fever - Non-severe infections - Minor injuries Response: 111 contact, monitor symptoms 2. Minor Emergencies: - Small cuts needing stitches - Sprains and strains - Mild allergic reactions Response: 111 or urgent care visit GP APPOINTMENT SCENARIOS: 1. Routine Care: - Chronic condition review - Medication reviews - Non-urgent symptoms Response: Book routine GP appointment 2. Preventive Care: - Vaccinations - Health screenings - Regular check-ups Response: Schedule with GP reception""", "emergency_detection.txt": """Enhanced Emergency Detection Criteria: IMMEDIATE LIFE THREATS: 1. Cardiac Symptoms: - Chest pain/pressure/tightness - Pain spreading to arms/jaw/neck - Sweating with nausea - Shortness of breath 2. Breathing Problems: - Severe shortness of breath - Blue lips or face - Unable to complete sentences - Choking/airway blockage 3. Neurological: - FAST (Face, Arms, Speech, Time) - Sudden confusion - Severe headache - Seizures - Loss of consciousness 4. Severe Trauma: - Heavy bleeding - Deep wounds - Head injury with confusion - Severe burns - Broken bones with deformity 5. Anaphylaxis: - Sudden swelling - Difficulty breathing - Rapid onset rash - Light-headedness URGENT BUT NOT IMMEDIATE: 1. Moderate Symptoms: - Persistent fever - Dehydration - Non-severe infections - Minor injuries 2. Worsening Conditions: - Increasing pain - Progressive symptoms - Medication reactions RESPONSE PROTOCOLS: 1. For Life Threats: - Immediate 999 call - Clear first aid instructions - Stay on line until help arrives 2. For Urgent Care: - 111 contact - Monitor for worsening - Document symptoms""", "gp_booking.txt": """GP Appointment Booking Templates: APPOINTMENT TYPES: 1. Routine Appointments: Template: "I need to book a routine appointment for [condition]. My availability is [times/dates]. My GP is Dr. [name] if available." 2. Follow-up Appointments: Template: "I need a follow-up appointment regarding [condition] discussed on [date]. My previous appointment was with Dr. [name]." 3. Medication Reviews: Template: "I need a medication review for [medication]. My last review was [date]." BOOKING INFORMATION NEEDED: 1. Patient Details: - Full name - Date of birth - NHS number (if known) - Registered GP practice 2. Appointment Details: - Nature of appointment - Preferred times/dates - Urgency level - Special requirements 3. Contact Information: - Phone number - Alternative contact - Preferred contact method BOOKING PROCESS: 1. Online Booking: - NHS app instructions - Practice website guidance - System navigation help 2. Phone Booking: - Best times to call - Required information - Queue management tips 3. Special Circumstances: - Interpreter needs - Accessibility requirements - Transport arrangements""", "cultural_sensitivity.txt": """Cultural Sensitivity Guidelines: CULTURAL AWARENESS: 1. Religious Considerations: - Prayer times - Religious observations - Dietary restrictions - Gender preferences for care - Religious festivals/fasting periods 2. Language Support: - Interpreter services - Multi-language resources - Clear communication methods - Family involvement preferences 3. Cultural Beliefs: - Traditional medicine practices - Cultural health beliefs - Family decision-making - Privacy customs COMMUNICATION APPROACHES: 1. Respectful Interaction: - Use preferred names/titles - Appropriate greetings - Non-judgmental responses - Active listening 2. Language Usage: - Clear, simple terms - Avoid medical jargon - Confirm understanding - Respect silence/pauses 3. Non-verbal Communication: - Eye contact customs - Personal space - Body language awareness - Gesture sensitivity SPECIFIC CONSIDERATIONS: 1. South Asian Communities: - Family involvement - Gender sensitivity - Traditional medicine - Language diversity 2. Middle Eastern Communities: - Gender-specific care - Religious observations - Family hierarchies - Privacy concerns 3. African/Caribbean Communities: - Traditional healers - Community involvement - Historical medical mistrust - Cultural specific conditions 4. Eastern European Communities: - Direct communication - Family involvement - Medical documentation - Language support INCLUSIVE PRACTICES: 1. Appointment Scheduling: - Religious holidays - Prayer times - Family availability - Interpreter needs 2. Treatment Planning: - Cultural preferences - Traditional practices - Family involvement - Dietary requirements 3. Support Services: - Community resources - Cultural organizations - Language services - Social support""", "service_boundaries.txt": """Service Limitations and Professional Boundaries: CLEAR BOUNDARIES: 1. Medical Advice: - No diagnoses - No prescriptions - No treatment recommendations - No medical procedures - No second opinions 2. Emergency Services: - Clear referral criteria - Documented responses - Follow-up protocols - Handover procedures 3. Information Sharing: - Confidentiality limits - Data protection - Record keeping - Information governance PROFESSIONAL CONDUCT: 1. Communication: - Professional language - Emotional boundaries - Personal distance - Service scope 2. Service Delivery: - No financial transactions - No personal relationships - Clear role definition - Professional limits""" } os.makedirs("knowledge_base", exist_ok=True) # Create and process documents documents = [] for filename, content in knowledge_base.items(): with open(f"knowledge_base/{filename}", "w") as f: f.write(content) documents.append(content) # Setup embeddings and vector store self.embeddings = HuggingFaceEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2" ) text_splitter = RecursiveCharacterTextSplitter( chunk_size=300, chunk_overlap=100 ) texts = text_splitter.split_text("\n\n".join(documents)) self.vector_store = FAISS.from_texts(texts, self.embeddings) logger.info("RAG system setup complete") except Exception as e: logger.error(f"Error setting up RAG: {str(e)}") raise def get_relevant_context(self, query): try: docs = self.vector_store.similarity_search(query, k=3) return "\n".join(doc.page_content for doc in docs) except Exception as e: logger.error(f"Error retrieving context: {str(e)}") return "" @torch.inference_mode() def generate_response(self, message: str, history: list) -> str: """Generate response using both fine-tuned model and RAG""" try: # Rate limiting current_time = time.time() if current_time - self.last_interaction_time < self.interaction_cooldown: time.sleep(self.interaction_cooldown) # Clear GPU memory before generation ModelManager.clear_gpu_memory() # Get RAG context context = self.get_relevant_context(message) # Format conversation history conv_history = "\n".join([ f"User: {user}\nAssistant: {assistant}" for user, assistant in history[-3:] # Keep last 3 turns ]) # Create prompt prompt = f"""system Using these medical guidelines: {context} Previous conversation: {conv_history} Guidelines: 1. Assess symptoms and severity 2. Ask relevant follow-up questions 3. Direct to appropriate care (999, 111, or GP) 4. Show empathy and cultural sensitivity 5. Never diagnose or recommend treatments user {message} assistant""" # Generate response try: inputs = self.tokenizer( prompt, return_tensors="pt", truncation=True, max_length=512 ).to(self.model.device) outputs = self.model.generate( **inputs, max_new_tokens=256, min_new_tokens=20, do_sample=True, temperature=0.7, top_p=0.9, repetition_penalty=1.2, no_repeat_ngram_size=3 ) response = self.tokenizer.decode(outputs[0], skip_special_tokens=True) response = response.split("assistant")[-1].strip() if "" in response: response = response.split("")[0].strip() self.last_interaction_time = time.time() return response except torch.cuda.OutOfMemoryError: ModelManager.clear_gpu_memory() logger.error("GPU out of memory, cleared cache and retrying...") return "I apologize, but I'm experiencing technical difficulties. Please try again." except Exception as e: logger.error(f"Error generating response: {str(e)}") return "I apologize, but I encountered an error. Please try again." def handle_feedback(self, message: str, response: str, feedback: int): """Handle user feedback for responses""" try: timestamp = datetime.now().isoformat() feedback_data = { "message": message, "response": response, "feedback": feedback, "timestamp": timestamp } # Log feedback logger.info(f"Feedback received: {feedback_data}") # Here you could: # 1. Store feedback in a database # 2. Send to monitoring system # 3. Use for model improvements return True except Exception as e: logger.error(f"Error handling feedback: {e}") return False def __del__(self): """Cleanup resources""" try: if hasattr(self, 'model'): del self.model ModelManager.clear_gpu_memory() except Exception as e: logger.error(f"Error in cleanup: {e}") def process_feedback(positive: bool, comment: str, history: List[Dict[str, str]]): try: if not history or len(history) < 2: return gr.update(value="") last_user_msg = history[-2]["content"] if isinstance(history[-2], dict) else history[-2][0] last_bot_msg = history[-1]["content"] if isinstance(history[-1], dict) else history[-1][1] bot.handle_feedback( message=last_user_msg, response=last_bot_msg, feedback=1 if positive else -1 ) return gr.update(value="") except Exception as e: logger.error(f"Error processing feedback: {e}") return gr.update(value="") def create_demo(): """Set up Gradio interface for the chatbot with enhanced styling and functionality.""" try: # Initialize bot bot = PearlyBot() def chat(message: str, history: list): """Handle chat interactions""" try: if not message.strip(): return history # Generate response response = bot.generate_response(message, history) # Update history with proper formatting history.append({ "role": "user", "content": message }) history.append({ "role": "assistant", "content": response }) return history except Exception as e: logger.error(f"Chat error: {e}") return history + [{ "role": "assistant", "content": "I apologize, but I'm experiencing technical difficulties. For emergencies, please call 999." }] def process_feedback(positive: bool, comment: str, history: list): try: if not history or len(history) < 2: return gr.update(value="") last_user_msg = history[-2]["content"] if isinstance(history[-2], dict) else history[-2][0] last_bot_msg = history[-1]["content"] if isinstance(history[-1], dict) else history[-1][1] bot.handle_feedback( message=last_user_msg, response=last_bot_msg, feedback=1 if positive else -1 ) return gr.update(value="") except Exception as e: logger.error(f"Error processing feedback: {e}") return gr.update(value="") # Create enhanced Gradio interface with gr.Blocks(theme=gr.themes.Soft( primary_hue="blue", secondary_hue="indigo", neutral_hue="slate", font=gr.themes.GoogleFont("Inter") )) as demo: # Custom CSS for enhanced styling gr.HTML(""" """) # Event Handlers - Moved inside the gr.Blocks context msg.submit(chat, [msg, chatbot], [chatbot]).then( lambda: gr.update(value=""), None, [msg] ) submit.click(chat, [msg, chatbot], [chatbot]).then( lambda: gr.update(value=""), None, [msg] ) # Feedback handlers feedback_positive.click( lambda h: process_feedback(True, feedback_text.value, h), inputs=[chatbot], outputs=[feedback_text] ) feedback_negative.click( lambda h: process_feedback(False, feedback_text.value, h), inputs=[chatbot], outputs=[feedback_text] ) # Clear chat clear.click(lambda: None, None, chatbot) # Add queue for handling multiple users demo.queue(concurrency_count=1, max_size=10) # Emergency Banner gr.HTML("""
🚨 For medical emergencies, always call 999 immediately 🚨
""") # Header Section with gr.Row(elem_classes="header"): gr.Markdown(""" # GP Medical Triage Assistant - Pearly Welcome to your personal medical triage assistant. I'm here to help assess your symptoms and guide you to appropriate care. """) # Main Features Grid gr.HTML("""
🏥
GP Appointments
🔍
Symptom Assessment
Urgent Care Guide
💊
Medical Advice
""") # Chat Interface with gr.Row(): with gr.Column(scale=4): chatbot = gr.Chatbot( value=[{ "role": "assistant", "content": "Hello! I'm Pearly, your GP medical assistant. How can I help you today?" }], height=500, elem_id="chatbot", type="messages", show_label=False ) with gr.Row(): msg = gr.Textbox( label="Your message", placeholder="Type your message here...", lines=2, scale=4, autofocus=True, submit_on_enter=True ) submit = gr.Button("Send", variant="primary", scale=1) with gr.Column(scale=1): # Quick Actions Panel gr.Markdown("### Quick Actions") emergency_btn = gr.Button("🚨 Emergency Info", variant="secondary") nhs_111_btn = gr.Button("📞 NHS 111 Info", variant="secondary") booking_btn = gr.Button("📅 GP Booking", variant="secondary") # Controls and Feedback gr.Markdown("### Controls") clear = gr.Button("🗑️ Clear Chat") gr.Markdown("### Feedback") with gr.Row(): feedback_positive = gr.Button("👍", elem_id="thumb-up") feedback_negative = gr.Button("👎", elem_id="thumb-down") feedback_text = gr.Textbox( label="Additional comments", placeholder="Tell us more...", lines=2, visible=True ) feedback_submit = gr.Button("Submit Feedback", visible=True) # Examples and Information with gr.Accordion("Example Messages", open=False): gr.Examples([ ["I've been having severe headaches for the past week"], ["I need to book a routine checkup"], ["I'm feeling very anxious lately and need help"], ["My child has had a fever for 2 days"], ["I need information about COVID-19 testing"] ], inputs=msg) with gr.Accordion("NHS Services Guide", open=False): gr.Markdown(""" ### Emergency Services (999) - Life-threatening emergencies - Severe injuries - Suspected heart attack or stroke ### NHS 111 - Urgent but non-emergency situations - Medical advice needed - Unsure where to go ### GP Services - Routine check-ups - Non-urgent medical issues - Prescription renewals """) return demo except Exception as e: logger.error(f"Error creating demo: {e}") raise if __name__ == "__main__": try: # Initialize logging logging.basicConfig(level=logging.INFO) # Load environment variables load_dotenv() # Create and launch demo demo = create_demo() demo.launch( server_name="0.0.0.0", server_port=7860, show_error=True ) except Exception as e: logger.error(f"Application startup failed: {e}") raise