Spaces:

lukiod
/

VHA1

Sleeping

App Files Files Community

lukiod commited on Nov 9, 2024

Commit

54c24d5

verified ·

1 Parent(s): 6ed2a87

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -59

app.py CHANGED Viewed

@@ -2,28 +2,29 @@ import gradio as gr
 import pandas as pd
 from datetime import datetime
 import torch
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 import gc
-from typing import List, Dict, Optional
 import os
 class ModelHandler:
     def __init__(self):
-        self.model_name = "google/flan-t5-large"
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
-        self.tokenizer = None
-        self.model = None
         self.initialize_model()
     def initialize_model(self):
         try:
-            self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
-            self.model = AutoModelForSeq2SeqLM.from_pretrained(
                 self.model_name,
                 torch_dtype=torch.float32,
                 low_cpu_mem_usage=True
             )
             self.model.to(self.device)
             return True
         except Exception as e:
             print(f"Error initializing model: {str(e)}")
@@ -31,42 +32,39 @@ class ModelHandler:
     def generate_response(self, prompt: str, max_length: int = 512) -> str:
         try:
-            gc.collect()
-            if torch.cuda.is_available():
-                torch.cuda.empty_cache()
-            inputs = self.tokenizer(
-                prompt,
                 return_tensors="pt",
                 truncation=True,
                 max_length=512
-            ).to(self.device)
-            with torch.no_grad():
-                outputs = self.model.generate(
-                    inputs.input_ids,
-                    max_length=max_length,
-                    num_beams=2,
-                    temperature=0.7,
-                    no_repeat_ngram_size=3,
-                    length_penalty=1.0
-                )
             response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-            del outputs, inputs
             gc.collect()
             if torch.cuda.is_available():
                 torch.cuda.empty_cache()
             return response
-        except Exception as e:
-            return f"Error generating response: {str(e)}"
-    def clear_memory(self):
-        gc.collect()
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
 class HealthData:
     def __init__(self):
@@ -116,36 +114,36 @@ class HealthAssistant:
         self.data = HealthData()
         self.request_count = 0
-    def _create_prompt(self, message: str, history: List = None) -> str:
-        prompt_parts = ["You are a helpful healthcare assistant."]
-        # Add health context
-        health_context = self.data.get_health_context()
-        if health_context != "No health data available.":
-            prompt_parts.append(f"Current health information:\n{health_context}")
-        # Add conversation history
-        if history:
-            prompt_parts.append("Previous conversation:")
-            for user_msg, bot_msg in history[-3:]:
-                prompt_parts.append(f"User: {user_msg}")
-                prompt_parts.append(f"Assistant: {bot_msg}")
-        # Add current question
-        prompt_parts.append(f"User: {message}")
-        prompt_parts.append("Assistant:")
-        return "\n\n".join(prompt_parts)
     def get_response(self, message: str, history: List = None) -> str:
-        self.request_count += 1
-        prompt = self._create_prompt(message, history)
-        response = self.model.generate_response(prompt)
-        if self.request_count % 5 == 0:
-            self.model.clear_memory()
-        return response
 class HealthAssistantUI:
     def __init__(self):

 import pandas as pd
 from datetime import datetime
 import torch
+from transformers import T5Tokenizer, T5ForConditionalGeneration
 import gc
+from typing import List, Dict
 import os
 class ModelHandler:
     def __init__(self):
+        self.model_name = "google/flan-t5-base"
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        print(f"Using device: {self.device}")
         self.initialize_model()
     def initialize_model(self):
         try:
+            print(f"Loading model: {self.model_name}")
+            self.tokenizer = T5Tokenizer.from_pretrained(self.model_name)
+            self.model = T5ForConditionalGeneration.from_pretrained(
                 self.model_name,
                 torch_dtype=torch.float32,
                 low_cpu_mem_usage=True
             )
             self.model.to(self.device)
+            print("Model loaded successfully")
             return True
         except Exception as e:
             print(f"Error initializing model: {str(e)}")
     def generate_response(self, prompt: str, max_length: int = 512) -> str:
         try:
+            # Format prompt for T5
+            formatted_prompt = f"Answer the health question: {prompt}"
+            # Generate response
+            input_ids = self.tokenizer(
+                formatted_prompt,
                 return_tensors="pt",
                 truncation=True,
                 max_length=512
+            ).input_ids.to(self.device)
+            outputs = self.model.generate(
+                input_ids,
+                max_length=max_length,
+                min_length=20,
+                num_beams=2,
+                temperature=0.7,
+                do_sample=True
+            )
             response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            # Memory cleanup
+            del outputs, input_ids
             gc.collect()
             if torch.cuda.is_available():
                 torch.cuda.empty_cache()
             return response
+        except Exception as e:
+            print(f"Error in generate_response: {str(e)}")
+            return "I apologize, but I encountered an error processing your request."
 class HealthData:
     def __init__(self):
         self.data = HealthData()
         self.request_count = 0
     def get_response(self, message: str, history: List = None) -> str:
+        try:
+            # Prepare context
+            context = self.data.get_health_context()
+            # Format prompt with context and history
+            prompt = "Given the following context:\n"
+            prompt += f"{context}\n\n"
+            if history:
+                prompt += "Previous conversation:\n"
+                for user_msg, bot_msg in history[-3:]:  # Last 3 exchanges
+                    prompt += f"User: {user_msg}\nAssistant: {bot_msg}\n"
+            prompt += f"Current question: {message}"
+            # Get response
+            response = self.model.generate_response(prompt)
+            # Memory management
+            if self.request_count % 5 == 0:
+                gc.collect()
+                if torch.cuda.is_available():
+                    torch.cuda.empty_cache()
+            return response
+        except Exception as e:
+            print(f"Error in get_response: {str(e)}")
+            return "I apologize, but I encountered an error. Please try again."
 class HealthAssistantUI:
     def __init__(self):