Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -13,20 +13,27 @@ from peft import PeftModel
|
|
13 |
import transformers
|
14 |
|
15 |
from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig
|
|
|
16 |
|
17 |
tokenizer = LlamaTokenizer.from_pretrained("decapoda-research/llama-7b-hf")
|
18 |
|
|
|
|
|
|
|
|
|
19 |
model = LlamaForCausalLM.from_pretrained(
|
20 |
"decapoda-research/llama-7b-hf",
|
21 |
-
load_in_8bit=True,
|
22 |
-
torch_dtype=torch.float16,
|
23 |
-
|
24 |
-
device_map={"":"cpu"},
|
25 |
-
max_memory={"cpu":"
|
|
|
26 |
)
|
27 |
model = PeftModel.from_pretrained(
|
28 |
model, "tloen/alpaca-lora-7b",
|
29 |
-
torch_dtype=torch.float16
|
|
|
30 |
)
|
31 |
|
32 |
device = "cpu"
|
|
|
13 |
import transformers
|
14 |
|
15 |
from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig
|
16 |
+
from transformers import BitsAndBytesConfig
|
17 |
|
18 |
tokenizer = LlamaTokenizer.from_pretrained("decapoda-research/llama-7b-hf")
|
19 |
|
20 |
+
|
21 |
+
|
22 |
+
quantization_config = BitsAndBytesConfig(llm_int8_enable_fp32_cpu_offload=True)
|
23 |
+
|
24 |
model = LlamaForCausalLM.from_pretrained(
|
25 |
"decapoda-research/llama-7b-hf",
|
26 |
+
# load_in_8bit=True,
|
27 |
+
# torch_dtype=torch.float16,
|
28 |
+
device_map="auto",
|
29 |
+
# device_map={"":"cpu"},
|
30 |
+
max_memory={"cpu":"15GiB"}
|
31 |
+
quantization_config=quantization_config
|
32 |
)
|
33 |
model = PeftModel.from_pretrained(
|
34 |
model, "tloen/alpaca-lora-7b",
|
35 |
+
# torch_dtype=torch.float16,
|
36 |
+
device_map={"":"cpu"},
|
37 |
)
|
38 |
|
39 |
device = "cpu"
|