Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -6,11 +6,11 @@ from threading import Thread
|
|
6 |
import spaces
|
7 |
|
8 |
token = os.environ["HF_TOKEN"]
|
9 |
-
model = AutoModelForCausalLM.from_pretrained("google/gemma-
|
10 |
# torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
11 |
torch_dtype=torch.float16,
|
12 |
token=token)
|
13 |
-
tok = AutoTokenizer.from_pretrained("google/gemma-
|
14 |
# using CUDA for an optimal experience
|
15 |
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
16 |
device = torch.device('cuda')
|
@@ -57,5 +57,5 @@ demo = gr.ChatInterface(fn=chat,
|
|
57 |
chatbot=gr.Chatbot(show_label=True, show_share_button=True, show_copy_button=True, likeable=True, layout="bubble", bubble_full_width=False),
|
58 |
theme="Soft",
|
59 |
examples=[["Write me a poem about Machine Learning."]],
|
60 |
-
title="
|
61 |
-
demo.launch()
|
|
|
6 |
import spaces
|
7 |
|
8 |
token = os.environ["HF_TOKEN"]
|
9 |
+
model = AutoModelForCausalLM.from_pretrained("google/gemma-7b-it",
|
10 |
# torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
11 |
torch_dtype=torch.float16,
|
12 |
token=token)
|
13 |
+
tok = AutoTokenizer.from_pretrained("google/gemma-7b-it",token=token)
|
14 |
# using CUDA for an optimal experience
|
15 |
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
16 |
device = torch.device('cuda')
|
|
|
57 |
chatbot=gr.Chatbot(show_label=True, show_share_button=True, show_copy_button=True, likeable=True, layout="bubble", bubble_full_width=False),
|
58 |
theme="Soft",
|
59 |
examples=[["Write me a poem about Machine Learning."]],
|
60 |
+
title="Text Streaming")
|
61 |
+
demo.launch()
|