mustapha commited on
Commit
6a59529
1 Parent(s): b74d347

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -6
app.py CHANGED
@@ -13,20 +13,27 @@ from peft import PeftModel
13
  import transformers
14
 
15
  from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig
 
16
 
17
  tokenizer = LlamaTokenizer.from_pretrained("decapoda-research/llama-7b-hf")
18
 
 
 
 
 
19
  model = LlamaForCausalLM.from_pretrained(
20
  "decapoda-research/llama-7b-hf",
21
- load_in_8bit=True,
22
- torch_dtype=torch.float16,
23
- # device_map="auto",
24
- device_map={"":"cpu"},
25
- max_memory={"cpu":"12GiB"}
 
26
  )
27
  model = PeftModel.from_pretrained(
28
  model, "tloen/alpaca-lora-7b",
29
- torch_dtype=torch.float16
 
30
  )
31
 
32
  device = "cpu"
 
13
  import transformers
14
 
15
  from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig
16
+ from transformers import BitsAndBytesConfig
17
 
18
  tokenizer = LlamaTokenizer.from_pretrained("decapoda-research/llama-7b-hf")
19
 
20
+
21
+
22
+ quantization_config = BitsAndBytesConfig(llm_int8_enable_fp32_cpu_offload=True)
23
+
24
  model = LlamaForCausalLM.from_pretrained(
25
  "decapoda-research/llama-7b-hf",
26
+ # load_in_8bit=True,
27
+ # torch_dtype=torch.float16,
28
+ device_map="auto",
29
+ # device_map={"":"cpu"},
30
+ max_memory={"cpu":"15GiB"}
31
+ quantization_config=quantization_config
32
  )
33
  model = PeftModel.from_pretrained(
34
  model, "tloen/alpaca-lora-7b",
35
+ # torch_dtype=torch.float16,
36
+ device_map={"":"cpu"},
37
  )
38
 
39
  device = "cpu"