not-lain commited on
Commit
4e81072
1 Parent(s): 1141e21

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -10
app.py CHANGED
@@ -6,22 +6,21 @@ import numpy as np
6
  from torch.nn import functional as F
7
  import os
8
  from threading import Thread
9
- token = os.environ["HF_TOKEN"]
10
 
11
- model = AutoModelForCausalLM.from_pretrained("google/gemma-2b-it", torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,token=token)
 
 
 
 
12
  tok = AutoTokenizer.from_pretrained("google/gemma-2b-it",token=token)
13
  # using CUDA for an optimal experience
14
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 
15
  model = model.to(device)
16
 
17
 
18
- start_message = ""
19
-
20
- def user(message, history):
21
- # Append the user's message to the conversation history
22
- return "", history + [[message, ""]]
23
-
24
-
25
  def chat(message, history):
26
  chat = []
27
  for item in history:
 
6
  from torch.nn import functional as F
7
  import os
8
  from threading import Thread
9
+ import spaces
10
 
11
+ token = os.environ["HF_TOKEN"]
12
+ model = AutoModelForCausalLM.from_pretrained("google/gemma-2b-it",
13
+ # torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
14
+ torch_dtype=torch.float16,
15
+ token=token)
16
  tok = AutoTokenizer.from_pretrained("google/gemma-2b-it",token=token)
17
  # using CUDA for an optimal experience
18
+ # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
19
+ device = torch.device('cuda')
20
  model = model.to(device)
21
 
22
 
23
+ @spaces.GPU
 
 
 
 
 
 
24
  def chat(message, history):
25
  chat = []
26
  for item in history: