vilarin commited on
Commit
1ec2e60
1 Parent(s): a622d4a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -4
app.py CHANGED
@@ -10,11 +10,12 @@ from threading import Thread
10
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
11
  MODEL_ID = "CohereForAI/aya-23-8B"
12
  MODEL_ID2 = "CohereForAI/aya-23-35B"
13
- MODEL_NAME = MODEL_ID2.split("/")[-1]
 
14
 
15
  TITLE = "<h1><center>Aya-23-Chatbox</center></h1>"
16
 
17
- DESCRIPTION = f'<h3><center>MODEL: <a href="https://hf.co/{MODEL_ID}">{MODEL_NAME}</a></center></h3>'
18
 
19
  CSS = """
20
  .duplicate-button {
@@ -49,13 +50,13 @@ if USE_FLASH_ATTENTION:
49
  attn_implementation="flash_attention_2"
50
 
51
  model = AutoModelForCausalLM.from_pretrained(
52
- MODEL_ID2,
53
  quantization_config=quantization_config,
54
  attn_implementation=attn_implementation,
55
  torch_dtype=torch.bfloat16,
56
  device_map="auto",
57
  )
58
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID2)
59
 
60
  @spaces.GPU
61
  def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int):
 
10
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
11
  MODEL_ID = "CohereForAI/aya-23-8B"
12
  MODEL_ID2 = "CohereForAI/aya-23-35B"
13
+ MODELS = os.environ.get("MODELS")
14
+ MODEL_NAME = MODELS.split("/")[-1]
15
 
16
  TITLE = "<h1><center>Aya-23-Chatbox</center></h1>"
17
 
18
+ DESCRIPTION = f'<h3><center>MODEL: <a href="https://hf.co/{MODELS}">{MODEL_NAME}</a></center></h3>'
19
 
20
  CSS = """
21
  .duplicate-button {
 
50
  attn_implementation="flash_attention_2"
51
 
52
  model = AutoModelForCausalLM.from_pretrained(
53
+ MODELS,
54
  quantization_config=quantization_config,
55
  attn_implementation=attn_implementation,
56
  torch_dtype=torch.bfloat16,
57
  device_map="auto",
58
  )
59
+ tokenizer = AutoTokenizer.from_pretrained(MODELS)
60
 
61
  @spaces.GPU
62
  def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int):