PearlIsa's picture
Update app.py
f8fc8c8 verified
raw
history blame
28.6 kB
# app.py
import os
import json
import keras
from datasets import load_dataset
import tensorflow as tf
from huggingface_hub import login
import torch
from transformers import ( AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer)
from sentence_transformers import SentenceTransformer
from typing import List, Dict, Union, Tuple
import faiss
import numpy as np
from datasets import Dataset
import torch.nn.functional as F
from torch.cuda.amp import autocast
import gc
from peft import ( LoraConfig, get_peft_model, prepare_model_for_kbit_training, TaskType, PeftModel)
from tqdm.auto import tqdm
from torch.utils.data import DataLoader
import logging
import wandb
from pathlib import Path
from typing import List, Dict, Union, Optional, Any
import torch.nn as nn
from dataclasses import dataclass, field
import time
import asyncio
import pytest
from unittest.mock import Mock, patch
from sklearn.metrics import classification_report, confusion_matrix
import gradio as gr
import matplotlib.pyplot as plt
from datetime import datetime
import requests
import pandas as pd
import seaborn as sns
import traceback
from matplotlib.gridspec import GridSpec
from datasets import load_dataset, concatenate_datasets
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import TextLoader
import IPython.display as display
from peft import get_peft_model, LoraConfig, prepare_model_for_kbit_training
import warnings
import re
from dotenv import load_dotenv
warnings.filterwarnings('ignore')
# Ensure Hugging Face login
try:
hf_token = os.getenv("HF_TOKEN")
if hf_token:
login(token=hf_token)
print("Login successful!")
except Exception as e:
print("Hugging Face Login failed:", e)
# CUDA and Memory Configurations
torch.backends.cuda.matmul.allow_tf32 = False
torch.backends.cudnn.allow_tf32 = False
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:64,garbage_collection_threshold:0.8,expandable_segments:True'
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class ModelManager:
"""Handles model loading and resource management"""
@staticmethod
def verify_model_path(checkpoint_path: str) -> str:
"""Verify and return valid model path"""
if os.path.exists(checkpoint_path):
return checkpoint_path
alternate_paths = [
f"{os.getcwd()}/checkpoint-500.zip",
"./checkpoint-500.zip",
"../checkpoint-500.zip"
]
for path in alternate_paths:
if os.path.exists(path):
return path
raise FileNotFoundError(
f"Model checkpoint not found in any of these locations: "
f"{[checkpoint_path] + alternate_paths}"
)
@staticmethod
def clear_gpu_memory():
"""Clear GPU memory cache"""
if torch.cuda.is_available():
torch.cuda.empty_cache()
gc.collect()
class PearlyBot:
def __init__(self, model_path: str = "./checkpoint-500.zip"):
self.setup_model(model_path)
self.setup_rag()
self.conversation_history = []
self.last_interaction_time = time.time()
self.interaction_cooldown = 1.0 # seconds
def setup_model(self, model_path: str):
"""Initialize the model with proper error handling"""
try:
logger.info("Starting model initialization...")
ModelManager.clear_gpu_memory()
# Verify model path
verified_path = ModelManager.verify_model_path(model_path)
logger.info(f"Using model checkpoint from: {verified_path}")
# Base model configuration
base_model_id = "google/gemma-2b"
logger.info(f"Loading base model: {base_model_id}")
# Load tokenizer
try:
self.tokenizer = AutoTokenizer.from_pretrained(base_model_id)
self.tokenizer.pad_token = self.tokenizer.eos_token
logger.info("Tokenizer loaded successfully")
except Exception as e:
logger.error(f"Failed to load tokenizer: {str(e)}")
raise
# Load model
try:
self.model = AutoModelForCausalLM.from_pretrained(
verified_path,
device_map="auto",
load_in_8bit=True,
torch_dtype=torch.float16,
low_cpu_mem_usage=True
)
self.model.eval()
logger.info("Model loaded successfully")
except Exception as e:
logger.error(f"Failed to load model: {str(e)}")
raise
except Exception as e:
logger.error(f"Error in model setup: {str(e)}")
raise
def setup_rag(self):
try:
logger.info("Setting up RAG system...")
# Load your knowledge base content
knowledge_base = {
"triage_scenarios.txt": """Medical Triage Scenarios and Responses:
EMERGENCY (999) SCENARIOS:
1. Cardiovascular:
- Chest pain/pressure
- Heart attack symptoms
- Irregular heartbeat with dizziness
Response: Immediate 999 call, sit/lie down, chew aspirin if available
2. Respiratory:
- Severe breathing difficulty
- Choking
- Unable to speak full sentences
Response: 999, sitting position, clear airway
3. Neurological:
- Stroke symptoms (FAST)
- Seizures
- Unconsciousness
Response: 999, recovery position if unconscious
4. Trauma:
- Severe bleeding
- Head injuries with confusion
- Major burns
Response: 999, apply direct pressure to bleeding
URGENT CARE (111) SCENARIOS:
1. Moderate Symptoms:
- Persistent fever
- Non-severe infections
- Minor injuries
Response: 111 contact, monitor symptoms
2. Minor Emergencies:
- Small cuts needing stitches
- Sprains and strains
- Mild allergic reactions
Response: 111 or urgent care visit
GP APPOINTMENT SCENARIOS:
1. Routine Care:
- Chronic condition review
- Medication reviews
- Non-urgent symptoms
Response: Book routine GP appointment
2. Preventive Care:
- Vaccinations
- Health screenings
- Regular check-ups
Response: Schedule with GP reception""",
"emergency_detection.txt": """Enhanced Emergency Detection Criteria:
IMMEDIATE LIFE THREATS:
1. Cardiac Symptoms:
- Chest pain/pressure/tightness
- Pain spreading to arms/jaw/neck
- Sweating with nausea
- Shortness of breath
2. Breathing Problems:
- Severe shortness of breath
- Blue lips or face
- Unable to complete sentences
- Choking/airway blockage
3. Neurological:
- FAST (Face, Arms, Speech, Time)
- Sudden confusion
- Severe headache
- Seizures
- Loss of consciousness
4. Severe Trauma:
- Heavy bleeding
- Deep wounds
- Head injury with confusion
- Severe burns
- Broken bones with deformity
5. Anaphylaxis:
- Sudden swelling
- Difficulty breathing
- Rapid onset rash
- Light-headedness
URGENT BUT NOT IMMEDIATE:
1. Moderate Symptoms:
- Persistent fever
- Dehydration
- Non-severe infections
- Minor injuries
2. Worsening Conditions:
- Increasing pain
- Progressive symptoms
- Medication reactions
RESPONSE PROTOCOLS:
1. For Life Threats:
- Immediate 999 call
- Clear first aid instructions
- Stay on line until help arrives
2. For Urgent Care:
- 111 contact
- Monitor for worsening
- Document symptoms""",
"gp_booking.txt": """GP Appointment Booking Templates:
APPOINTMENT TYPES:
1. Routine Appointments:
Template: "I need to book a routine appointment for [condition]. My availability is [times/dates]. My GP is Dr. [name] if available."
2. Follow-up Appointments:
Template: "I need a follow-up appointment regarding [condition] discussed on [date]. My previous appointment was with Dr. [name]."
3. Medication Reviews:
Template: "I need a medication review for [medication]. My last review was [date]."
BOOKING INFORMATION NEEDED:
1. Patient Details:
- Full name
- Date of birth
- NHS number (if known)
- Registered GP practice
2. Appointment Details:
- Nature of appointment
- Preferred times/dates
- Urgency level
- Special requirements
3. Contact Information:
- Phone number
- Alternative contact
- Preferred contact method
BOOKING PROCESS:
1. Online Booking:
- NHS app instructions
- Practice website guidance
- System navigation help
2. Phone Booking:
- Best times to call
- Required information
- Queue management tips
3. Special Circumstances:
- Interpreter needs
- Accessibility requirements
- Transport arrangements""",
"cultural_sensitivity.txt": """Cultural Sensitivity Guidelines:
CULTURAL AWARENESS:
1. Religious Considerations:
- Prayer times
- Religious observations
- Dietary restrictions
- Gender preferences for care
- Religious festivals/fasting periods
2. Language Support:
- Interpreter services
- Multi-language resources
- Clear communication methods
- Family involvement preferences
3. Cultural Beliefs:
- Traditional medicine practices
- Cultural health beliefs
- Family decision-making
- Privacy customs
COMMUNICATION APPROACHES:
1. Respectful Interaction:
- Use preferred names/titles
- Appropriate greetings
- Non-judgmental responses
- Active listening
2. Language Usage:
- Clear, simple terms
- Avoid medical jargon
- Confirm understanding
- Respect silence/pauses
3. Non-verbal Communication:
- Eye contact customs
- Personal space
- Body language awareness
- Gesture sensitivity
SPECIFIC CONSIDERATIONS:
1. South Asian Communities:
- Family involvement
- Gender sensitivity
- Traditional medicine
- Language diversity
2. Middle Eastern Communities:
- Gender-specific care
- Religious observations
- Family hierarchies
- Privacy concerns
3. African/Caribbean Communities:
- Traditional healers
- Community involvement
- Historical medical mistrust
- Cultural specific conditions
4. Eastern European Communities:
- Direct communication
- Family involvement
- Medical documentation
- Language support
INCLUSIVE PRACTICES:
1. Appointment Scheduling:
- Religious holidays
- Prayer times
- Family availability
- Interpreter needs
2. Treatment Planning:
- Cultural preferences
- Traditional practices
- Family involvement
- Dietary requirements
3. Support Services:
- Community resources
- Cultural organizations
- Language services
- Social support""",
"service_boundaries.txt": """Service Limitations and Professional Boundaries:
CLEAR BOUNDARIES:
1. Medical Advice:
- No diagnoses
- No prescriptions
- No treatment recommendations
- No medical procedures
- No second opinions
2. Emergency Services:
- Clear referral criteria
- Documented responses
- Follow-up protocols
- Handover procedures
3. Information Sharing:
- Confidentiality limits
- Data protection
- Record keeping
- Information governance
PROFESSIONAL CONDUCT:
1. Communication:
- Professional language
- Emotional boundaries
- Personal distance
- Service scope
2. Service Delivery:
- No financial transactions
- No personal relationships
- Clear role definition
- Professional limits"""
}
os.makedirs("knowledge_base", exist_ok=True)
# Create and process documents
documents = []
for filename, content in knowledge_base.items():
with open(f"knowledge_base/{filename}", "w") as f:
f.write(content)
documents.append(content)
# Setup embeddings and vector store
self.embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=300,
chunk_overlap=100
)
texts = text_splitter.split_text("\n\n".join(documents))
self.vector_store = FAISS.from_texts(texts, self.embeddings)
logger.info("RAG system setup complete")
except Exception as e:
logger.error(f"Error setting up RAG: {str(e)}")
raise
def get_relevant_context(self, query):
try:
docs = self.vector_store.similarity_search(query, k=3)
return "\n".join(doc.page_content for doc in docs)
except Exception as e:
logger.error(f"Error retrieving context: {str(e)}")
return ""
@torch.inference_mode()
def generate_response(self, message: str, history: list) -> str:
"""Generate response using both fine-tuned model and RAG"""
try:
# Rate limiting
current_time = time.time()
if current_time - self.last_interaction_time < self.interaction_cooldown:
time.sleep(self.interaction_cooldown)
# Clear GPU memory before generation
ModelManager.clear_gpu_memory()
# Get RAG context
context = self.get_relevant_context(message)
# Format conversation history
conv_history = "\n".join([
f"User: {user}\nAssistant: {assistant}"
for user, assistant in history[-3:] # Keep last 3 turns
])
# Create prompt
prompt = f"""<start_of_turn>system
Using these medical guidelines:
{context}
Previous conversation:
{conv_history}
Guidelines:
1. Assess symptoms and severity
2. Ask relevant follow-up questions
3. Direct to appropriate care (999, 111, or GP)
4. Show empathy and cultural sensitivity
5. Never diagnose or recommend treatments
<end_of_turn>
<start_of_turn>user
{message}
<end_of_turn>
<start_of_turn>assistant"""
# Generate response
try:
inputs = self.tokenizer(
prompt,
return_tensors="pt",
truncation=True,
max_length=512
).to(self.model.device)
outputs = self.model.generate(
**inputs,
max_new_tokens=256,
min_new_tokens=20,
do_sample=True,
temperature=0.7,
top_p=0.9,
repetition_penalty=1.2,
no_repeat_ngram_size=3
)
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
response = response.split("<start_of_turn>assistant")[-1].strip()
if "<end_of_turn>" in response:
response = response.split("<end_of_turn>")[0].strip()
self.last_interaction_time = time.time()
return response
except torch.cuda.OutOfMemoryError:
ModelManager.clear_gpu_memory()
logger.error("GPU out of memory, cleared cache and retrying...")
return "I apologize, but I'm experiencing technical difficulties. Please try again."
except Exception as e:
logger.error(f"Error generating response: {str(e)}")
return "I apologize, but I encountered an error. Please try again."
def handle_feedback(self, message: str, response: str, feedback: int):
"""Handle user feedback for responses"""
try:
timestamp = datetime.now().isoformat()
feedback_data = {
"message": message,
"response": response,
"feedback": feedback,
"timestamp": timestamp
}
# Log feedback
logger.info(f"Feedback received: {feedback_data}")
# Here you could:
# 1. Store feedback in a database
# 2. Send to monitoring system
# 3. Use for model improvements
return True
except Exception as e:
logger.error(f"Error handling feedback: {e}")
return False
def __del__(self):
"""Cleanup resources"""
try:
if hasattr(self, 'model'):
del self.model
ModelManager.clear_gpu_memory()
except Exception as e:
logger.error(f"Error in cleanup: {e}")
def process_feedback(positive: bool, comment: str, history: List[Dict[str, str]]):
try:
if not history or len(history) < 2:
return gr.update(value="")
last_user_msg = history[-2]["content"] if isinstance(history[-2], dict) else history[-2][0]
last_bot_msg = history[-1]["content"] if isinstance(history[-1], dict) else history[-1][1]
bot.handle_feedback(
message=last_user_msg,
response=last_bot_msg,
feedback=1 if positive else -1
)
return gr.update(value="")
except Exception as e:
logger.error(f"Error processing feedback: {e}")
return gr.update(value="")
def create_demo():
"""Set up Gradio interface for the chatbot with enhanced styling and functionality."""
try:
# Initialize bot
bot = PearlyBot()
def chat(message: str, history: list):
"""Handle chat interactions"""
try:
if not message.strip():
return history
# Generate response
response = bot.generate_response(message, history)
# Update history with proper formatting
history.append({
"role": "user",
"content": message
})
history.append({
"role": "assistant",
"content": response
})
return history
except Exception as e:
logger.error(f"Chat error: {e}")
return history + [{
"role": "assistant",
"content": "I apologize, but I'm experiencing technical difficulties. For emergencies, please call 999."
}]
def process_feedback(positive: bool, comment: str, history: list):
try:
if not history or len(history) < 2:
return gr.update(value="")
last_user_msg = history[-2]["content"] if isinstance(history[-2], dict) else history[-2][0]
last_bot_msg = history[-1]["content"] if isinstance(history[-1], dict) else history[-1][1]
bot.handle_feedback(
message=last_user_msg,
response=last_bot_msg,
feedback=1 if positive else -1
)
return gr.update(value="")
except Exception as e:
logger.error(f"Error processing feedback: {e}")
return gr.update(value="")
# Create enhanced Gradio interface
with gr.Blocks(theme=gr.themes.Soft(
primary_hue="blue",
secondary_hue="indigo",
neutral_hue="slate",
font=gr.themes.GoogleFont("Inter")
)) as demo:
# Custom CSS for enhanced styling
gr.HTML("""
<style>
.container { max-width: 900px; margin: auto; }
.header { text-align: center; padding: 20px; }
.emergency-banner {
background-color: #ff4444;
color: white;
padding: 10px;
text-align: center;
font-weight: bold;
margin-bottom: 20px;
}
.feature-card {
padding: 15px;
border-radius: 10px;
text-align: center;
transition: transform 0.2s;
color: white;
font-weight: bold;
}
.feature-card:nth-child(1) { background: linear-gradient(135deg, #2193b0, #6dd5ed); }
.feature-card:nth-child(2) { background: linear-gradient(135deg, #834d9b, #d04ed6); }
.feature-card:nth-child(3) { background: linear-gradient(135deg, #ff4b1f, #ff9068); }
.feature-card:nth-child(4) { background: linear-gradient(135deg, #38ef7d, #11998e); }
.feature-card:hover {
transform: translateY(-5px);
box-shadow: 0 5px 15px rgba(0,0,0,0.2);
}
.feature-card span.emoji {
font-size: 2em;
display: block;
margin-bottom: 10px;
}
.message-textbox textarea { resize: none; }
#thumb-up, #thumb-down {
min-width: 60px;
padding: 8px;
margin: 5px;
}
.chatbot-message {
padding: 12px;
margin: 8px 0;
border-radius: 8px;
}
.user-message { background-color: #e3f2fd; }
.assistant-message { background-color: #f5f5f5; }
.feedback-section {
margin-top: 20px;
padding: 15px;
border-radius: 8px;
background-color: #f8f9fa;
}
</style>
""")
# Event Handlers - Moved inside the gr.Blocks context
msg.submit(chat, [msg, chatbot], [chatbot]).then(
lambda: gr.update(value=""), None, [msg]
)
submit.click(chat, [msg, chatbot], [chatbot]).then(
lambda: gr.update(value=""), None, [msg]
)
# Feedback handlers
feedback_positive.click(
lambda h: process_feedback(True, feedback_text.value, h),
inputs=[chatbot],
outputs=[feedback_text]
)
feedback_negative.click(
lambda h: process_feedback(False, feedback_text.value, h),
inputs=[chatbot],
outputs=[feedback_text]
)
# Clear chat
clear.click(lambda: None, None, chatbot)
# Add queue for handling multiple users
demo.queue(concurrency_count=1, max_size=10)
# Emergency Banner
gr.HTML("""
<div class="emergency-banner">
🚨 For medical emergencies, always call 999 immediately 🚨
</div>
""")
# Header Section
with gr.Row(elem_classes="header"):
gr.Markdown("""
# GP Medical Triage Assistant - Pearly
Welcome to your personal medical triage assistant. I'm here to help assess your symptoms and guide you to appropriate care.
""")
# Main Features Grid
gr.HTML("""
<div class="features-grid">
<div class="feature-card">
<span class="emoji">πŸ₯</span>
<div>GP Appointments</div>
</div>
<div class="feature-card">
<span class="emoji">πŸ”</span>
<div>Symptom Assessment</div>
</div>
<div class="feature-card">
<span class="emoji">⚑</span>
<div>Urgent Care Guide</div>
</div>
<div class="feature-card">
<span class="emoji">πŸ’Š</span>
<div>Medical Advice</div>
</div>
</div>
""")
# Chat Interface
with gr.Row():
with gr.Column(scale=4):
chatbot = gr.Chatbot(
value=[{
"role": "assistant",
"content": "Hello! I'm Pearly, your GP medical assistant. How can I help you today?"
}],
height=500,
elem_id="chatbot",
type="messages",
show_label=False
)
with gr.Row():
msg = gr.Textbox(
label="Your message",
placeholder="Type your message here...",
lines=2,
scale=4,
autofocus=True,
submit_on_enter=True
)
submit = gr.Button("Send", variant="primary", scale=1)
with gr.Column(scale=1):
# Quick Actions Panel
gr.Markdown("### Quick Actions")
emergency_btn = gr.Button("🚨 Emergency Info", variant="secondary")
nhs_111_btn = gr.Button("πŸ“ž NHS 111 Info", variant="secondary")
booking_btn = gr.Button("πŸ“… GP Booking", variant="secondary")
# Controls and Feedback
gr.Markdown("### Controls")
clear = gr.Button("πŸ—‘οΈ Clear Chat")
gr.Markdown("### Feedback")
with gr.Row():
feedback_positive = gr.Button("πŸ‘", elem_id="thumb-up")
feedback_negative = gr.Button("πŸ‘Ž", elem_id="thumb-down")
feedback_text = gr.Textbox(
label="Additional comments",
placeholder="Tell us more...",
lines=2,
visible=True
)
feedback_submit = gr.Button("Submit Feedback", visible=True)
# Examples and Information
with gr.Accordion("Example Messages", open=False):
gr.Examples([
["I've been having severe headaches for the past week"],
["I need to book a routine checkup"],
["I'm feeling very anxious lately and need help"],
["My child has had a fever for 2 days"],
["I need information about COVID-19 testing"]
], inputs=msg)
with gr.Accordion("NHS Services Guide", open=False):
gr.Markdown("""
### Emergency Services (999)
- Life-threatening emergencies
- Severe injuries
- Suspected heart attack or stroke
### NHS 111
- Urgent but non-emergency situations
- Medical advice needed
- Unsure where to go
### GP Services
- Routine check-ups
- Non-urgent medical issues
- Prescription renewals
""")
return demo
except Exception as e:
logger.error(f"Error creating demo: {e}")
raise
if __name__ == "__main__":
try:
# Initialize logging
logging.basicConfig(level=logging.INFO)
# Load environment variables
load_dotenv()
# Create and launch demo
demo = create_demo()
demo.launch(
server_name="0.0.0.0",
server_port=7860,
show_error=True
)
except Exception as e:
logger.error(f"Application startup failed: {e}")
raise