minhdang commited on
Commit
51dbac2
1 Parent(s): 97a347f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -4
app.py CHANGED
@@ -4,10 +4,11 @@ import transformers
4
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig,AwqConfig
5
  import torch
6
  import os
 
7
  key = os.environ.get("key")
8
  from huggingface_hub import login
9
  login(key)
10
-
11
 
12
  nf4_config = BitsAndBytesConfig(
13
  load_in_4bit=True,
@@ -16,15 +17,17 @@ nf4_config = BitsAndBytesConfig(
16
  bnb_4bit_compute_dtype=torch.bfloat16
17
  )
18
 
19
- model_id = "CohereForAI/c4ai-command-r-v01"
20
  tokenizer = AutoTokenizer.from_pretrained(model_id)
21
  model = AutoModelForCausalLM.from_pretrained(model_id,
22
  # load_in_8bit=True,
23
- quantization_config=nf4_config,
24
- # torch_dtype = torch.bfloat16,
25
  # device_map="auto"
26
  )
27
 
 
 
28
  @spaces.GPU
29
  def generate_response(user_input, max_new_tokens, temperature):
30
  os.system("nvidia-smi")
 
4
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig,AwqConfig
5
  import torch
6
  import os
7
+ import bitnet
8
  key = os.environ.get("key")
9
  from huggingface_hub import login
10
  login(key)
11
+ from bitnet import replace_linears_in_hf
12
 
13
  nf4_config = BitsAndBytesConfig(
14
  load_in_4bit=True,
 
17
  bnb_4bit_compute_dtype=torch.bfloat16
18
  )
19
 
20
+ model_id = "Nexusflow/Starling-LM-7B-beta"
21
  tokenizer = AutoTokenizer.from_pretrained(model_id)
22
  model = AutoModelForCausalLM.from_pretrained(model_id,
23
  # load_in_8bit=True,
24
+ # quantization_config=nf4_config,
25
+ torch_dtype = torch.bfloat16,
26
  # device_map="auto"
27
  )
28
 
29
+ replace_linears_in_hf(model)
30
+ model..to('cuda').eval()
31
  @spaces.GPU
32
  def generate_response(user_input, max_new_tokens, temperature):
33
  os.system("nvidia-smi")