Update space
Browse files- .build_config +6 -0
- app.py +7 -0
.build_config
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# .build_config
|
2 |
+
build:
|
3 |
+
cuda: "11.8"
|
4 |
+
python_version: "3.10"
|
5 |
+
system_packages:
|
6 |
+
- "nvidia-cuda-toolkit"
|
app.py
CHANGED
@@ -3,6 +3,13 @@ from huggingface_hub import InferenceClient
|
|
3 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
4 |
from peft import PeftModel
|
5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
# Load model and tokenizer
|
7 |
base_model = AutoModelForCausalLM.from_pretrained("unsloth/Llama-3.2-3B-Instruct-bnb-4bit")
|
8 |
model = PeftModel.from_pretrained(base_model, "emeses/lab2_model")
|
|
|
3 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
4 |
from peft import PeftModel
|
5 |
|
6 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
7 |
+
base_model = AutoModelForCausalLM.from_pretrained(
|
8 |
+
"unsloth/Llama-3.2-3B-Instruct-bnb-4bit",
|
9 |
+
device_map="auto",
|
10 |
+
torch_dtype=torch.float16
|
11 |
+
)
|
12 |
+
|
13 |
# Load model and tokenizer
|
14 |
base_model = AutoModelForCausalLM.from_pretrained("unsloth/Llama-3.2-3B-Instruct-bnb-4bit")
|
15 |
model = PeftModel.from_pretrained(base_model, "emeses/lab2_model")
|