minhdang commited on
Commit
12625fd
1 Parent(s): 71bf837

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -3
app.py CHANGED
@@ -12,6 +12,8 @@ key = os.environ.get("key")
12
  from huggingface_hub import login
13
  login(key)
14
  from bitnet import replace_linears_in_hf
 
 
15
  # os.system("pip install flash-attn --no-build-isolation")
16
  nf4_config = BitsAndBytesConfig(
17
  load_in_4bit=True,
@@ -22,12 +24,12 @@ nf4_config = BitsAndBytesConfig(
22
 
23
  model_id = "CohereForAI/c4ai-command-r-v01"
24
  tokenizer = AutoTokenizer.from_pretrained(model_id)
25
- model = AutoModelForCausalLM.from_pretrained(model_id,
26
  # load_in_8bit=True,
27
- quantization_config=nf4_config,
28
  # attn_implementation="flash_attention_2",
29
  # torch_dtype = torch.bfloat16,
30
- device_map="auto"
31
  )
32
 
33
  # replace_linears_in_hf(model)
 
12
  from huggingface_hub import login
13
  login(key)
14
  from bitnet import replace_linears_in_hf
15
+ os.system("mkdir c4ai-command-r-v01-exl2")
16
+ os.system("huggingface-cli download bartowski/c4ai-command-r-v01-exl2 --revision 6_5 --local-dir c4ai-command-r-v01-exl2 --local-dir-use-symlinks False")
17
  # os.system("pip install flash-attn --no-build-isolation")
18
  nf4_config = BitsAndBytesConfig(
19
  load_in_4bit=True,
 
24
 
25
  model_id = "CohereForAI/c4ai-command-r-v01"
26
  tokenizer = AutoTokenizer.from_pretrained(model_id)
27
+ model = AutoModelForCausalLM.from_pretrained("c4ai-command-r-v01-exl2",
28
  # load_in_8bit=True,
29
+ #quantization_config=nf4_config,
30
  # attn_implementation="flash_attention_2",
31
  # torch_dtype = torch.bfloat16,
32
+ #device_map="auto"
33
  )
34
 
35
  # replace_linears_in_hf(model)