Spaces:

PearlIsa
/

pearly_med_triage_chatbot_kagglex

Runtime error

App Files Files Community

PearlIsa commited on Nov 9, 2024

Commit

e49dc64

•

1 Parent(s): faf3edb

Update app.py

Browse files

Files changed (1) hide show

app.py +385 -379

app.py CHANGED Viewed

@@ -63,369 +63,376 @@ os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:64,garbage_collection
 os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
-def prepare_initial_datasets(batch_size=8):
-    print("Loading datasets with memory-optimized batch processing...")
-    def process_medqa_batch(examples):
-        results = []
-        inputs = examples['input']
-        instructions = examples['instruction']
-        outputs = examples['output']
-        for inp, inst, out in zip(inputs, instructions, outputs):
-            results.append({
-                "input": f"{inp} {inst}",
-                "output": out
-            })
-        return results
-    def process_meddia_batch(examples):
-        results = []
-        inputs = examples['input']
-        outputs = examples['output']
-        for inp, out in zip(inputs, outputs):
-            results.append({
-                "input": inp,
-                "output": out
-            })
-        return results
-    def process_persona_batch(examples):
-        results = []
-        personalities = examples['personality']
-        utterances = examples['utterances']
-        for pers, utts in zip(personalities, utterances):
-            try:
-                # Process personality list
-                personality = ' '.join([
-                    p for p in pers
-                    if isinstance(p, str)
-                ])
-                # Process utterances
-                if utts and len(utts) > 0:
-                    utterance = utts[0]
-                    history = []
-                    # Process history
-                    if 'history' in utterance and utterance['history']:
-                        history = [
-                            h for h in utterance['history']
-                            if isinstance(h, str)
-                        ]
-                    history_text = ' '.join(history)
-                    # Get candidate response
-                    candidate = utterance.get('candidates', [''])[0] if utterance.get('candidates') else ''
-                    if personality or history_text:
-                        results.append({
-                            "input": f"{personality} {history_text}".strip(),
-                            "output": candidate
-                        })
-            except Exception as e:
-                print(f"Error processing persona batch item: {e}")
-                continue
-        return results
-    # Load and process each dataset separately
-    print("Processing MedQA dataset...")
-    medqa = load_dataset("medalpaca/medical_meadow_medqa", split="train[:500]")
-    medqa_processed = []
-    for i in tqdm(range(0, len(medqa), batch_size), desc="Processing MedQA"):
-        batch = medqa[i:i + batch_size]
-        medqa_processed.extend(process_medqa_batch(batch))
-        if i % (batch_size * 5) == 0:
-            torch.cuda.empty_cache()
-    print("Processing MedDiagnosis dataset...")
-    meddia = load_dataset("wasiqnauman/medical-diagnosis-synthetic", split="train[:500]")
-    meddia_processed = []
-    for i in tqdm(range(0, len(meddia), batch_size), desc="Processing MedDiagnosis"):
-        batch = meddia[i:i + batch_size]
-        meddia_processed.extend(process_meddia_batch(batch))
-        if i % (batch_size * 5) == 0:
-            torch.cuda.empty_cache()
-    print("Processing Persona-Chat dataset...")
-    persona = load_dataset("AlekseyKorshuk/persona-chat", split="train[:500]")
-    persona_processed = []
-    for i in tqdm(range(0, len(persona), batch_size), desc="Processing Persona-Chat"):
-        batch = persona[i:i + batch_size]
-        persona_processed.extend(process_persona_batch(batch))
-        if i % (batch_size * 5) == 0:
-            torch.cuda.empty_cache()
-    torch.cuda.empty_cache()
-    print("Creating final dataset...")
-    all_processed = persona_processed + medqa_processed + meddia_processed
-    valid_data = {
-        "input": [],
-        "output": []
-    }
-    for item in all_processed:
-        if item["input"].strip() and item["output"].strip():
-            valid_data["input"].append(item["input"])
-            valid_data["output"].append(item["output"])
-    final_dataset = Dataset.from_dict(valid_data)
-    print(f"Final dataset size: {len(final_dataset)}")
-    return final_dataset
-def prepare_dataset(dataset, tokenizer, max_length=256, batch_size=4):
-    def tokenize_batch(examples):
-        formatted_texts = []
-        for i in range(0, len(examples['input']), batch_size):
-            sub_batch_inputs = examples['input'][i:i + batch_size]
-            sub_batch_outputs = examples['output'][i:i + batch_size]
-            for input_text, output_text in zip(sub_batch_inputs, sub_batch_outputs):
-                try:
-                    formatted_text = f"""<start_of_turn>user
-{input_text}
-<end_of_turn>
-<start_of_turn>assistant
-{output_text}
-<end_of_turn>"""
-                    formatted_texts.append(formatted_text)
-                except Exception as e:
-                    print(f"Error formatting text: {e}")
-                    continue
-        tokenized = tokenizer(
-            formatted_texts,
-            padding="max_length",
-            truncation=True,
-            max_length=max_length,
-            return_tensors=None
-        )
-        tokenized["labels"] = tokenized["input_ids"].copy()
-        return tokenized
-    print(f"Tokenizing dataset in small batches (size={batch_size})...")
-    tokenized_dataset = dataset.map(
-        tokenize_batch,
-        batched=True,
-        batch_size=batch_size,
-        remove_columns=dataset.column_names,
-        desc="Tokenizing dataset",
-        load_from_cache_file=False
-    )
-    return tokenized_dataset
-def setup_model_and_tokenizer(model_name="google/gemma-2b"):
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    tokenizer.pad_token = tokenizer.eos_token
-    from transformers import BitsAndBytesConfig
-    bnb_config = BitsAndBytesConfig(
-        load_in_8bit=True,
-        bnb_8bit_compute_dtype=torch.float16,
-        llm_int8_enable_fp32_cpu_offload=True
-    )
-    model = AutoModelForCausalLM.from_pretrained(
-        model_name,
-        device_map="auto",
-        quantization_config=bnb_config,
-        torch_dtype=torch.float16,
-        low_cpu_mem_usage=True
-    )
-    model = prepare_model_for_kbit_training(model)
-    lora_config = LoraConfig(
-        r=4,
-        lora_alpha=16,
-        target_modules=["q_proj", "v_proj"],
-        lora_dropout=0.05,
-        bias="none",
-        task_type="CAUSAL_LM"
-    )
-    model = get_peft_model(model, lora_config)
-    model.print_trainable_parameters()
-    return model, tokenizer
-def setup_training_arguments(output_dir="./pearly_fine_tuned"):
-    return TrainingArguments(
-        output_dir=output_dir,
-        num_train_epochs=1,
-        per_device_train_batch_size=1,
-        gradient_accumulation_steps=16,
-        warmup_steps=50,
-        logging_steps=10,
-        save_steps=200,
-        learning_rate=2e-4,
-        fp16=True,
-        gradient_checkpointing=True,
-        gradient_checkpointing_kwargs={"use_reentrant": False},
-        optim="adamw_8bit",
-        max_grad_norm=0.3,
-        weight_decay=0.001,
-        logging_dir="./logs",
-        save_total_limit=2,
-        remove_unused_columns=False,
-        dataloader_pin_memory=False,
-        max_steps=500,
-        report_to=["none"],
-    )
-def main():
-    torch.backends.cuda.matmul.allow_tf32 = False
-    torch.backends.cudnn.allow_tf32 = False
-    torch.cuda.empty_cache()
-    if torch.cuda.is_available():
-        torch.cuda.reset_peak_memory_stats()
-    print("Preparing initial datasets...")
-    combined_dataset = prepare_initial_datasets(batch_size=4)
-    print(f"\nDataset size: {len(combined_dataset)}")
-    print(f"Column names: {combined_dataset.column_names}")
-    if len(combined_dataset) > 0:
-        print("\nSample input-output pair:")
-        print(f"Input: {combined_dataset[0]['input'][:100]}...")
-        print(f"Output: {combined_dataset[0]['output'][:100]}...")
-    print("\nSetting up model and tokenizer...")
-    model, tokenizer = setup_model_and_tokenizer()
-    print("\nPreparing dataset for training...")
-    processed_dataset = prepare_dataset(
-        combined_dataset,
-        tokenizer,
-        max_length=256,
-        batch_size=2
-    )
-    torch.cuda.empty_cache()
-    training_args = setup_training_arguments()
-    trainer = Trainer(
-        model=model,
-        args=training_args,
-        train_dataset=processed_dataset,
-        tokenizer=tokenizer,
-    )
-    print("\nStarting training...")
-    try:
-        trainer.train()
-    except Exception as e:
-        print(f"Training error: {e}")
-        torch.cuda.empty_cache()
-        raise e
-    finally:
-        torch.cuda.empty_cache()
-    print("\nSaving model...")
-    trainer.save_model()
-    print("Training completed!")
-DISCLAIMER = """
-IMPORTANT MEDICAL DISCLAIMER:
-Pearly is an AI medical triage assistant designed to help direct you to appropriate medical services.
-Pearly DOES NOT:
-- Make medical diagnoses
-- Prescribe medications
-- Provide specific treatment recommendations
-- Replace professional medical advice
-Always consult qualified healthcare professionals for medical advice and treatment.
-In case of emergency, call 999 immediately.
-"""
 class PearlyBot:
-    def __init__(self, model_path="./pearly_fine_tuned", embedding_model="sentence-transformers/all-MiniLM-L6-v2"):
-        print("Loading saved model...")
-        print(DISCLAIMER)
-        # Clean memory
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-        # Load tokenizer and model directly from saved path
-        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
-        self.model = AutoModelForCausalLM.from_pretrained(
-            model_path,
-            torch_dtype=torch.float16,
-            low_cpu_mem_usage=True,
-            device_map="auto"
-        )
-        self.model.eval()  # Set to evaluation mode
-        # Initialize RAG components
-        self.embeddings = HuggingFaceEmbeddings(model_name=embedding_model)
-        self.vector_store = None
         self.conversation_history = []
-    def initialize_rag(self, documents_path="./knowledge_base"):
-        """Initialize RAG system"""
-        print("Loading knowledge base...")
-        text_splitter = RecursiveCharacterTextSplitter(
-            chunk_size=300,
-            chunk_overlap=100,
-            separators=["\n\n", "\n", ".", "!", "?", ":"]
-        )
-        documents = []
-        for filename in os.listdir(documents_path):
-            if filename.endswith('.txt'):
-                loader = TextLoader(os.path.join(documents_path, filename))
-                documents.extend(loader.load())
-        texts = text_splitter.split_documents(documents)
-        self.vector_store = FAISS.from_documents(texts, self.embeddings)
-        self.retriever = self.vector_store.as_retriever(
-            search_type="similarity",
-            search_kwargs={"k": 5}
-        )
-        print("Knowledge base loaded successfully!")
-    def get_relevant_context(self, user_input):
-        if not self.retriever:
             return ""
-        docs = self.retriever.get_relevant_documents(user_input)
-        return "\n\n".join([doc.page_content for doc in docs])
-    def generate_response(self, user_input):
-        context = self.get_relevant_context(user_input)
-        history = "\n".join([
-            f"User: {turn['user']}\nAssistant: {turn['assistant']}\n"
-            for turn in self.conversation_history[-3:]
-        ])
-        prompt = f"""<start_of_turn>system
-As Pearly, I use the following medical guidelines to help triage patients:
 {context}
-Previous Conversation:
-{history}
-Based on these guidelines, I will:
 1. Assess symptoms and severity
 2. Ask relevant follow-up questions
 3. Direct to appropriate care (999, 111, or GP)
@@ -433,18 +440,18 @@ Based on these guidelines, I will:
 5. Never diagnose or recommend treatments
 <end_of_turn>
 <start_of_turn>user
-{user_input}
 <end_of_turn>
 <start_of_turn>assistant"""
-        inputs = self.tokenizer(
-            prompt,
-            return_tensors="pt",
-            truncation=True,
-            max_length=512
-        ).to(self.model.device)
-        with torch.no_grad():
             outputs = self.model.generate(
                 **inputs,
                 max_new_tokens=256,
@@ -452,21 +459,20 @@ Based on these guidelines, I will:
                 do_sample=True,
                 temperature=0.7,
                 top_p=0.9,
-                repetition_penalty=1.2,
-                pad_token_id=self.tokenizer.pad_token_id
             )
-        response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-        response = response.split("<start_of_turn>assistant")[-1].strip()
-        if "<end_of_turn>" in response:
-            response = response.split("<end_of_turn>")[0].strip()
-        self.conversation_history.append({
-            "user": user_input,
-            "assistant": response
-        })
-        return response
 def create_demo():
     """Set up Gradio interface for the chatbot with enhanced styling and functionality."""
@@ -475,9 +481,9 @@ def create_demo():
         @gr.routes.get("/health")
         def health_check():
             return {"status": "healthy"}
-        bot = AdaptiveMedicalBot()
-        def chat(message: str, history: List[Dict[str, str]]):
             try:
                 if not message.strip():
                     return history

 os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
+# Setup logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 class PearlyBot:
+    def __init__(self):
+        self.setup_model()
+        self.setup_rag()
         self.conversation_history = []
+    def setup_model(self):
+        try:
+            logger.info("Loading local checkpoint...")
+            # Load from the local checkpoint in your Space
+            checkpoint_path = "checkpoint-500.zip"  # Path to your uploaded checkpoint
+            base_model_id = "google/gemma-2b"  # Your base model
+            # Load tokenizer from base model
+            self.tokenizer = AutoTokenizer.from_pretrained(base_model_id)
+            # Load model with checkpoint
+            self.model = AutoModelForCausalLM.from_pretrained(
+                checkpoint_path,
+                device_map="auto",
+                load_in_8bit=True,
+                torch_dtype=torch.float16,
+                low_cpu_mem_usage=True
+            )
+            self.model.eval()
+            logger.info("Model loaded successfully")
+        except Exception as e:
+            logger.error(f"Error loading model: {str(e)}")
+            raise
+    def setup_rag(self):
+        try:
+            logger.info("Setting up RAG system...")
+            # Load your knowledge base content
+            knowledge_base = {
+                "triage_scenarios.txt": """Medical Triage Scenarios and Responses:
+EMERGENCY (999) SCENARIOS:
+1. Cardiovascular:
+- Chest pain/pressure
+- Heart attack symptoms
+- Irregular heartbeat with dizziness
+Response: Immediate 999 call, sit/lie down, chew aspirin if available
+2. Respiratory:
+- Severe breathing difficulty
+- Choking
+- Unable to speak full sentences
+Response: 999, sitting position, clear airway
+3. Neurological:
+- Stroke symptoms (FAST)
+- Seizures
+- Unconsciousness
+Response: 999, recovery position if unconscious
+4. Trauma:
+- Severe bleeding
+- Head injuries with confusion
+- Major burns
+Response: 999, apply direct pressure to bleeding
+URGENT CARE (111) SCENARIOS:
+1. Moderate Symptoms:
+- Persistent fever
+- Non-severe infections
+- Minor injuries
+Response: 111 contact, monitor symptoms
+2. Minor Emergencies:
+- Small cuts needing stitches
+- Sprains and strains
+- Mild allergic reactions
+Response: 111 or urgent care visit
+GP APPOINTMENT SCENARIOS:
+1. Routine Care:
+- Chronic condition review
+- Medication reviews
+- Non-urgent symptoms
+Response: Book routine GP appointment
+2. Preventive Care:
+- Vaccinations
+- Health screenings
+- Regular check-ups
+Response: Schedule with GP reception""",
+                "emergency_detection.txt": """Enhanced Emergency Detection Criteria:
+IMMEDIATE LIFE THREATS:
+1. Cardiac Symptoms:
+- Chest pain/pressure/tightness
+- Pain spreading to arms/jaw/neck
+- Sweating with nausea
+- Shortness of breath
+2. Breathing Problems:
+- Severe shortness of breath
+- Blue lips or face
+- Unable to complete sentences
+- Choking/airway blockage
+3. Neurological:
+- FAST (Face, Arms, Speech, Time)
+- Sudden confusion
+- Severe headache
+- Seizures
+- Loss of consciousness
+4. Severe Trauma:
+- Heavy bleeding
+- Deep wounds
+- Head injury with confusion
+- Severe burns
+- Broken bones with deformity
+5. Anaphylaxis:
+- Sudden swelling
+- Difficulty breathing
+- Rapid onset rash
+- Light-headedness
+URGENT BUT NOT IMMEDIATE:
+1. Moderate Symptoms:
+- Persistent fever
+- Dehydration
+- Non-severe infections
+- Minor injuries
+2. Worsening Conditions:
+- Increasing pain
+- Progressive symptoms
+- Medication reactions
+RESPONSE PROTOCOLS:
+1. For Life Threats:
+- Immediate 999 call
+- Clear first aid instructions
+- Stay on line until help arrives
+2. For Urgent Care:
+- 111 contact
+- Monitor for worsening
+- Document symptoms""",
+                "gp_booking.txt": """GP Appointment Booking Templates:
+APPOINTMENT TYPES:
+1. Routine Appointments:
+Template: "I need to book a routine appointment for [condition]. My availability is [times/dates]. My GP is Dr. [name] if available."
+2. Follow-up Appointments:
+Template: "I need a follow-up appointment regarding [condition] discussed on [date]. My previous appointment was with Dr. [name]."
+3. Medication Reviews:
+Template: "I need a medication review for [medication]. My last review was [date]."
+BOOKING INFORMATION NEEDED:
+1. Patient Details:
+- Full name
+- Date of birth
+- NHS number (if known)
+- Registered GP practice
+2. Appointment Details:
+- Nature of appointment
+- Preferred times/dates
+- Urgency level
+- Special requirements
+3. Contact Information:
+- Phone number
+- Alternative contact
+- Preferred contact method
+BOOKING PROCESS:
+1. Online Booking:
+- NHS app instructions
+- Practice website guidance
+- System navigation help
+2. Phone Booking:
+- Best times to call
+- Required information
+- Queue management tips
+3. Special Circumstances:
+- Interpreter needs
+- Accessibility requirements
+- Transport arrangements""",
+                "cultural_sensitivity.txt": """Cultural Sensitivity Guidelines:
+CULTURAL AWARENESS:
+1. Religious Considerations:
+- Prayer times
+- Religious observations
+- Dietary restrictions
+- Gender preferences for care
+- Religious festivals/fasting periods
+2. Language Support:
+- Interpreter services
+- Multi-language resources
+- Clear communication methods
+- Family involvement preferences
+3. Cultural Beliefs:
+- Traditional medicine practices
+- Cultural health beliefs
+- Family decision-making
+- Privacy customs
+COMMUNICATION APPROACHES:
+1. Respectful Interaction:
+- Use preferred names/titles
+- Appropriate greetings
+- Non-judgmental responses
+- Active listening
+2. Language Usage:
+- Clear, simple terms
+- Avoid medical jargon
+- Confirm understanding
+- Respect silence/pauses
+3. Non-verbal Communication:
+- Eye contact customs
+- Personal space
+- Body language awareness
+- Gesture sensitivity
+SPECIFIC CONSIDERATIONS:
+1. South Asian Communities:
+- Family involvement
+- Gender sensitivity
+- Traditional medicine
+- Language diversity
+2. Middle Eastern Communities:
+- Gender-specific care
+- Religious observations
+- Family hierarchies
+- Privacy concerns
+3. African/Caribbean Communities:
+- Traditional healers
+- Community involvement
+- Historical medical mistrust
+- Cultural specific conditions
+4. Eastern European Communities:
+- Direct communication
+- Family involvement
+- Medical documentation
+- Language support
+INCLUSIVE PRACTICES:
+1. Appointment Scheduling:
+- Religious holidays
+- Prayer times
+- Family availability
+- Interpreter needs
+2. Treatment Planning:
+- Cultural preferences
+- Traditional practices
+- Family involvement
+- Dietary requirements
+3. Support Services:
+- Community resources
+- Cultural organizations
+- Language services
+- Social support""",
+                "service_boundaries.txt": """Service Limitations and Professional Boundaries:
+CLEAR BOUNDARIES:
+1. Medical Advice:
+- No diagnoses
+- No prescriptions
+- No treatment recommendations
+- No medical procedures
+- No second opinions
+2. Emergency Services:
+- Clear referral criteria
+- Documented responses
+- Follow-up protocols
+- Handover procedures
+3. Information Sharing:
+- Confidentiality limits
+- Data protection
+- Record keeping
+- Information governance
+PROFESSIONAL CONDUCT:
+1. Communication:
+- Professional language
+- Emotional boundaries
+- Personal distance
+- Service scope
+2. Service Delivery:
+- No financial transactions
+- No personal relationships
+- Clear role definition
+- Professional limits"""
+            }
+            os.makedirs("knowledge_base", exist_ok=True)
+            # Create and process documents
+            documents = []
+            for filename, content in knowledge_base.items():
+                with open(f"knowledge_base/{filename}", "w") as f:
+                    f.write(content)
+                documents.append(content)
+            # Setup embeddings and vector store
+            self.embeddings = HuggingFaceEmbeddings(
+                model_name="sentence-transformers/all-MiniLM-L6-v2"
+            )
+            text_splitter = RecursiveCharacterTextSplitter(
+                chunk_size=300,
+                chunk_overlap=100
+            )
+            texts = text_splitter.split_text("\n\n".join(documents))
+            self.vector_store = FAISS.from_texts(texts, self.embeddings)
+            logger.info("RAG system setup complete")
+        except Exception as e:
+            logger.error(f"Error setting up RAG: {str(e)}")
+            raise
+    def get_relevant_context(self, query):
+        try:
+            docs = self.vector_store.similarity_search(query, k=3)
+            return "\n".join(doc.page_content for doc in docs)
+        except Exception as e:
+            logger.error(f"Error retrieving context: {str(e)}")
             return ""
+    @torch.inference_mode()
+    def generate_response(self, message: str, history: list) -> str:
+        try:
+            # Get RAG context
+            context = self.get_relevant_context(message)
+            # Format conversation history
+            conv_history = "\n".join([
+                f"User: {user}\nAssistant: {assistant}"
+                for user, assistant in history[-3:]  # Keep last 3 turns
+            ])
+            # Create prompt
+            prompt = f"""<start_of_turn>system
+Using these medical guidelines:
 {context}
+Previous conversation:
+{conv_history}
+Guidelines:
 1. Assess symptoms and severity
 2. Ask relevant follow-up questions
 3. Direct to appropriate care (999, 111, or GP)
 5. Never diagnose or recommend treatments
 <end_of_turn>
 <start_of_turn>user
+{message}
 <end_of_turn>
 <start_of_turn>assistant"""
+            # Generate response
+            inputs = self.tokenizer(
+                prompt,
+                return_tensors="pt",
+                truncation=True,
+                max_length=512
+            ).to(self.model.device)
             outputs = self.model.generate(
                 **inputs,
                 max_new_tokens=256,
                 do_sample=True,
                 temperature=0.7,
                 top_p=0.9,
+                repetition_penalty=1.2
             )
+            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            response = response.split("<start_of_turn>assistant")[-1].strip()
+            if "<end_of_turn>" in response:
+                response = response.split("<end_of_turn>")[0].strip()
+            return response
+        except Exception as e:
+            logger.error(f"Error generating response: {str(e)}")
+            return "I apologize, but I encountered an error. Please try again."
 def create_demo():
     """Set up Gradio interface for the chatbot with enhanced styling and functionality."""
         @gr.routes.get("/health")
         def health_check():
             return {"status": "healthy"}
+        bot = PearlyBot()  # ✅
+        def chat(message: str, history: list):  # ✅
             try:
                 if not message.strip():
                     return history