lukiod commited on
Commit
70448af
Β·
verified Β·
1 Parent(s): 25b6c4d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -97
app.py CHANGED
@@ -1,135 +1,150 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
4
- from qwen_vl_utils import process_vision_info
5
  import logging
6
  from typing import List, Dict
7
  import gc
8
  import os
9
 
10
- # Setup logging
11
- logging.basicConfig(level=logging.INFO)
 
 
 
12
  logger = logging.getLogger(__name__)
13
 
 
 
 
 
14
  class HealthAssistant:
15
- def __init__(self):
16
- self.model_name = "Qwen/Qwen2-VL-7B-Instruct"
 
 
 
 
 
17
  self.model = None
18
  self.tokenizer = None
19
- self.processor = None
20
  self.metrics = []
21
  self.medications = []
22
  self.initialize_model()
23
 
24
  def initialize_model(self):
25
  try:
26
- logger.info("Loading Qwen2-VL model...")
27
- # Initialize model with default settings
28
- self.model = Qwen2VLForConditionalGeneration.from_pretrained(
 
 
29
  self.model_name,
30
- torch_dtype="auto",
31
- device_map="auto",
32
  trust_remote_code=True
33
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
- # Initialize processor
36
- self.processor = AutoProcessor.from_pretrained(self.model_name)
37
- logger.info("Model loaded successfully")
 
 
 
 
38
  except Exception as e:
39
- logger.error(f"Error initializing model: {e}")
40
- raise
 
 
 
 
 
 
 
41
 
42
  def generate_response(self, message: str, history: List = None) -> str:
43
  try:
44
- # Format messages for Qwen2-VL
45
- messages = self._format_messages(message, history)
46
-
47
- # Prepare for inference using qwen_vl_utils
48
- text = self.processor.apply_chat_template(
49
- messages,
50
- tokenize=False,
51
- add_generation_prompt=True
52
- )
53
-
54
- # Process vision info (empty for text-only)
55
- image_inputs, video_inputs = process_vision_info(messages)
56
 
57
- # Prepare inputs
58
- inputs = self.processor(
59
- text=[text],
60
- images=image_inputs,
61
- videos=video_inputs,
62
  padding=True,
63
- return_tensors="pt"
64
- )
65
-
66
- # Move to appropriate device
67
- inputs = inputs.to(self.model.device)
68
 
69
- # Generate response
70
- generated_ids = self.model.generate(
71
- **inputs,
72
- max_new_tokens=128,
73
- do_sample=True,
74
- temperature=0.7,
75
- top_p=0.9
76
- )
 
 
 
 
77
 
78
- # Trim and decode response
79
- generated_ids_trimmed = [
80
- out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
81
- ]
82
-
83
- output_text = self.processor.batch_decode(
84
- generated_ids_trimmed,
85
- skip_special_tokens=True,
86
- clean_up_tokenization_spaces=False
87
- )[0]
88
 
89
  # Cleanup
90
- del inputs, generated_ids, generated_ids_trimmed
91
  gc.collect()
92
- torch.cuda.empty_cache() if torch.cuda.is_available() else None
93
 
94
- return output_text.strip()
95
 
96
  except Exception as e:
97
- logger.error(f"Error generating response: {e}")
98
  return "I apologize, but I encountered an error. Please try again."
99
 
100
- def _format_messages(self, message: str, history: List = None) -> List[Dict]:
101
- """Format messages for Qwen2-VL"""
102
- messages = []
103
-
104
- # Add health context as system message
105
- health_context = self._get_health_context()
106
- if health_context:
107
- messages.append({
108
- "role": "system",
109
- "content": [{"type": "text", "text": f"Health Context:\n{health_context}"}]
110
- })
111
-
112
- # Add conversation history
113
  if history:
114
- for user_msg, assistant_msg in history[-3:]:
115
- messages.extend([
116
- {
117
- "role": "user",
118
- "content": [{"type": "text", "text": user_msg}]
119
- },
120
- {
121
- "role": "assistant",
122
- "content": [{"type": "text", "text": assistant_msg}]
123
- }
124
  ])
125
-
126
- # Add current message
127
- messages.append({
128
- "role": "user",
129
- "content": [{"type": "text", "text": message}]
130
- })
131
-
132
- return messages
133
 
134
  def _get_health_context(self) -> str:
135
  context_parts = []
@@ -180,7 +195,15 @@ class HealthAssistant:
180
 
181
  class GradioInterface:
182
  def __init__(self):
183
- self.assistant = HealthAssistant()
 
 
 
 
 
 
 
 
184
 
185
  def chat_response(self, message: str, history: List) -> tuple:
186
  if not message.strip():
@@ -211,7 +234,6 @@ class GradioInterface:
211
  gr.Markdown("# πŸ₯ AI Health Assistant")
212
 
213
  with gr.Tabs():
214
- # Chat Interface
215
  with gr.Tab("πŸ’¬ Health Chat"):
216
  chatbot = gr.Chatbot(
217
  height=450,
@@ -227,7 +249,6 @@ class GradioInterface:
227
  send_btn = gr.Button("Send", scale=1)
228
  clear_btn = gr.Button("Clear Chat")
229
 
230
- # Health Metrics
231
  with gr.Tab("πŸ“Š Health Metrics"):
232
  with gr.Row():
233
  weight_input = gr.Number(label="Weight (kg)")
@@ -236,7 +257,6 @@ class GradioInterface:
236
  metrics_btn = gr.Button("Save Metrics")
237
  metrics_status = gr.Markdown()
238
 
239
- # Medication Manager
240
  with gr.Tab("πŸ’Š Medication Manager"):
241
  with gr.Row():
242
  med_name = gr.Textbox(label="Medication Name")
@@ -246,7 +266,6 @@ class GradioInterface:
246
  med_btn = gr.Button("Add Medication")
247
  med_status = gr.Markdown()
248
 
249
- # Event handlers
250
  msg.submit(self.chat_response, [msg, chatbot], [msg, chatbot])
251
  send_btn.click(self.chat_response, [msg, chatbot], [msg, chatbot])
252
  clear_btn.click(lambda: [], None, chatbot)
@@ -267,8 +286,10 @@ class GradioInterface:
267
 
268
  def main():
269
  try:
 
270
  interface = GradioInterface()
271
  demo = interface.create_interface()
 
272
  demo.launch(
273
  share=False,
274
  server_name="0.0.0.0",
@@ -277,6 +298,7 @@ def main():
277
  )
278
  except Exception as e:
279
  logger.error(f"Error starting application: {e}")
 
280
 
281
  if __name__ == "__main__":
282
  main()
 
1
  import gradio as gr
2
  import torch
3
+ import transformers
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM
5
  import logging
6
  from typing import List, Dict
7
  import gc
8
  import os
9
 
10
+ # Setup logging with more detail
11
+ logging.basicConfig(
12
+ level=logging.INFO,
13
+ format='%(asctime)s - %(levelname)s - %(message)s'
14
+ )
15
  logger = logging.getLogger(__name__)
16
 
17
+ # Set environment variables for better stability
18
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
19
+ transformers.logging.set_verbosity_info()
20
+
21
  class HealthAssistant:
22
+ def __init__(self, use_smaller_model=True):
23
+ # Use a smaller model for testing/CPU
24
+ if use_smaller_model:
25
+ self.model_name = "facebook/opt-125m" # Much smaller model for testing
26
+ else:
27
+ self.model_name = "Qwen/Qwen2-VL-7B-Instruct"
28
+
29
  self.model = None
30
  self.tokenizer = None
 
31
  self.metrics = []
32
  self.medications = []
33
  self.initialize_model()
34
 
35
  def initialize_model(self):
36
  try:
37
+ logger.info(f"Starting model initialization: {self.model_name}")
38
+
39
+ # First try loading tokenizer
40
+ logger.info("Loading tokenizer...")
41
+ self.tokenizer = AutoTokenizer.from_pretrained(
42
  self.model_name,
 
 
43
  trust_remote_code=True
44
  )
45
+ if self.tokenizer is None:
46
+ raise ValueError("Failed to load tokenizer")
47
+ logger.info("Tokenizer loaded successfully")
48
+
49
+ # Then load the model
50
+ logger.info("Loading model...")
51
+ self.model = AutoModelForCausalLM.from_pretrained(
52
+ self.model_name,
53
+ torch_dtype=torch.float32, # Use float32 for CPU
54
+ low_cpu_mem_usage=True,
55
+ trust_remote_code=True
56
+ )
57
+ if self.model is None:
58
+ raise ValueError("Failed to load model")
59
+
60
+ # Move model to CPU explicitly
61
+ self.model = self.model.to("cpu")
62
+ logger.info("Model loaded successfully and moved to CPU")
63
 
64
+ # Set padding token if needed
65
+ if self.tokenizer.pad_token is None:
66
+ self.tokenizer.pad_token = self.tokenizer.eos_token
67
+ logger.info("Set padding token")
68
+
69
+ return True
70
+
71
  except Exception as e:
72
+ logger.error(f"Error in model initialization: {str(e)}")
73
+ raise RuntimeError(f"Model initialization failed: {str(e)}")
74
+
75
+ def is_initialized(self):
76
+ """Check if model is properly initialized"""
77
+ return (self.model is not None and
78
+ self.tokenizer is not None and
79
+ hasattr(self.model, 'generate') and
80
+ hasattr(self.tokenizer, 'encode'))
81
 
82
  def generate_response(self, message: str, history: List = None) -> str:
83
  try:
84
+ if not self.is_initialized():
85
+ raise RuntimeError("Model not properly initialized")
86
+
87
+ # Prepare prompt
88
+ prompt = self._prepare_prompt(message, history)
 
 
 
 
 
 
 
89
 
90
+ # Tokenize
91
+ inputs = self.tokenizer(
92
+ prompt,
93
+ return_tensors="pt",
 
94
  padding=True,
95
+ truncation=True,
96
+ max_length=512
97
+ ).to("cpu") # Ensure CPU tensor
 
 
98
 
99
+ # Generate
100
+ with torch.no_grad():
101
+ outputs = self.model.generate(
102
+ inputs["input_ids"],
103
+ max_new_tokens=128,
104
+ num_beams=1,
105
+ do_sample=True,
106
+ temperature=0.7,
107
+ top_p=0.9,
108
+ pad_token_id=self.tokenizer.pad_token_id,
109
+ eos_token_id=self.tokenizer.eos_token_id
110
+ )
111
 
112
+ # Decode
113
+ response = self.tokenizer.decode(
114
+ outputs[0][inputs["input_ids"].shape[1]:],
115
+ skip_special_tokens=True
116
+ )
 
 
 
 
 
117
 
118
  # Cleanup
119
+ del outputs, inputs
120
  gc.collect()
 
121
 
122
+ return response.strip()
123
 
124
  except Exception as e:
125
+ logger.error(f"Error generating response: {str(e)}")
126
  return "I apologize, but I encountered an error. Please try again."
127
 
128
+ def _prepare_prompt(self, message: str, history: List = None) -> str:
129
+ parts = [
130
+ "You are a helpful healthcare assistant. Provide accurate and helpful information.",
131
+ self._get_health_context() or "No health data available yet."
132
+ ]
133
+
 
 
 
 
 
 
 
134
  if history:
135
+ parts.append("Previous conversation:")
136
+ for user_msg, bot_msg in history[-3:]:
137
+ parts.extend([
138
+ f"User: {user_msg}",
139
+ f"Assistant: {bot_msg}"
 
 
 
 
 
140
  ])
141
+
142
+ parts.extend([
143
+ f"User: {message}",
144
+ "Assistant:"
145
+ ])
146
+
147
+ return "\n\n".join(parts)
 
148
 
149
  def _get_health_context(self) -> str:
150
  context_parts = []
 
195
 
196
  class GradioInterface:
197
  def __init__(self):
198
+ try:
199
+ logger.info("Initializing Health Assistant...")
200
+ self.assistant = HealthAssistant(use_smaller_model=True) # Use smaller model for testing
201
+ if not self.assistant.is_initialized():
202
+ raise RuntimeError("Health Assistant failed to initialize properly")
203
+ logger.info("Health Assistant initialized successfully")
204
+ except Exception as e:
205
+ logger.error(f"Failed to initialize Health Assistant: {e}")
206
+ raise
207
 
208
  def chat_response(self, message: str, history: List) -> tuple:
209
  if not message.strip():
 
234
  gr.Markdown("# πŸ₯ AI Health Assistant")
235
 
236
  with gr.Tabs():
 
237
  with gr.Tab("πŸ’¬ Health Chat"):
238
  chatbot = gr.Chatbot(
239
  height=450,
 
249
  send_btn = gr.Button("Send", scale=1)
250
  clear_btn = gr.Button("Clear Chat")
251
 
 
252
  with gr.Tab("πŸ“Š Health Metrics"):
253
  with gr.Row():
254
  weight_input = gr.Number(label="Weight (kg)")
 
257
  metrics_btn = gr.Button("Save Metrics")
258
  metrics_status = gr.Markdown()
259
 
 
260
  with gr.Tab("πŸ’Š Medication Manager"):
261
  with gr.Row():
262
  med_name = gr.Textbox(label="Medication Name")
 
266
  med_btn = gr.Button("Add Medication")
267
  med_status = gr.Markdown()
268
 
 
269
  msg.submit(self.chat_response, [msg, chatbot], [msg, chatbot])
270
  send_btn.click(self.chat_response, [msg, chatbot], [msg, chatbot])
271
  clear_btn.click(lambda: [], None, chatbot)
 
286
 
287
  def main():
288
  try:
289
+ logger.info("Starting application...")
290
  interface = GradioInterface()
291
  demo = interface.create_interface()
292
+ logger.info("Launching Gradio interface...")
293
  demo.launch(
294
  share=False,
295
  server_name="0.0.0.0",
 
298
  )
299
  except Exception as e:
300
  logger.error(f"Error starting application: {e}")
301
+ raise
302
 
303
  if __name__ == "__main__":
304
  main()