Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -3,16 +3,25 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
|
3 |
import torch
|
4 |
import spaces
|
5 |
|
|
|
|
|
|
|
|
|
6 |
#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
|
|
7 |
#print(f"Using device: {device}")
|
8 |
-
|
|
|
|
|
|
|
9 |
model_name = "ruslanmv/Medical-Llama3-8B"
|
10 |
-
|
11 |
-
model = AutoModelForCausalLM.from_pretrained(model_name)
|
12 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
13 |
tokenizer.pad_token = tokenizer.eos_token
|
14 |
|
15 |
-
@spaces.GPU
|
16 |
def askme(symptoms, question):
|
17 |
sys_message = '''\
|
18 |
You are an AI Medical Assistant trained on a vast dataset of health information. Please be thorough and
|
@@ -21,7 +30,7 @@ def askme(symptoms, question):
|
|
21 |
content = symptoms + " " + question
|
22 |
messages = [{"role": "system", "content": sys_message}, {"role": "user", "content": content}]
|
23 |
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
24 |
-
inputs = tokenizer(prompt, return_tensors="pt").to(device)
|
25 |
outputs = model.generate(**inputs, max_new_tokens=200, use_cache=True)
|
26 |
response_text = tokenizer.batch_decode(outputs)[0].strip()
|
27 |
answer = response_text.split('<|im_start|>assistant')[-1].strip()
|
|
|
3 |
import torch
|
4 |
import spaces
|
5 |
|
6 |
+
IS_SPACES_ZERO = os.environ.get("SPACES_ZERO_GPU", "0") == "1"
|
7 |
+
IS_SPACE = os.environ.get("SPACE_ID", None) is not None
|
8 |
+
|
9 |
+
#device = "cuda" if torch.cuda.is_available() else "cpu"
|
10 |
#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
11 |
+
#dtype = torch.float16
|
12 |
+
LOW_MEMORY = os.getenv("LOW_MEMORY", "0") == "1"
|
13 |
#print(f"Using device: {device}")
|
14 |
+
#print(f"Using dtype: {dtype}")
|
15 |
+
print(f"low memory: {LOW_MEMORY}")
|
16 |
+
|
17 |
+
device = "cuda"
|
18 |
model_name = "ruslanmv/Medical-Llama3-8B"
|
19 |
+
# Move model and tokenizer to the CUDA device
|
20 |
+
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
|
21 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True).to(device)
|
22 |
tokenizer.pad_token = tokenizer.eos_token
|
23 |
|
24 |
+
@spaces.GPU
|
25 |
def askme(symptoms, question):
|
26 |
sys_message = '''\
|
27 |
You are an AI Medical Assistant trained on a vast dataset of health information. Please be thorough and
|
|
|
30 |
content = symptoms + " " + question
|
31 |
messages = [{"role": "system", "content": sys_message}, {"role": "user", "content": content}]
|
32 |
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
33 |
+
inputs = tokenizer(prompt, return_tensors="pt").to(device) # Ensure inputs are on CUDA device
|
34 |
outputs = model.generate(**inputs, max_new_tokens=200, use_cache=True)
|
35 |
response_text = tokenizer.batch_decode(outputs)[0].strip()
|
36 |
answer = response_text.split('<|im_start|>assistant')[-1].strip()
|