BramVanroy commited on
Commit
974c121
1 Parent(s): 9a56c6d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -17,7 +17,7 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "8192"))
17
 
18
  model_id = "BramVanroy/fietje-2b-chat"
19
  avatar_url = "https://huggingface.co/spaces/BramVanroy/fietje-2b/resolve/main/img/fietje-2b-avatar.png"
20
- model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto")
21
  model = model.to("cuda" if torch.cuda.is_available() else "cpu")
22
  tokenizer = AutoTokenizer.from_pretrained(model_id)
23
  tokenizer.pad_token_id = tokenizer.eos_token_id
@@ -116,7 +116,7 @@ chat_interface = gr.ChatInterface(
116
  minimum=0,
117
  maximum=20,
118
  step=1,
119
- value=8,
120
  ),
121
  gr.Checkbox(
122
  label="Do sample",
 
17
 
18
  model_id = "BramVanroy/fietje-2b-chat"
19
  avatar_url = "https://huggingface.co/spaces/BramVanroy/fietje-2b/resolve/main/img/fietje-2b-avatar.png"
20
+ model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_8bit=True, attn_implementation="flash_attention_2")
21
  model = model.to("cuda" if torch.cuda.is_available() else "cpu")
22
  tokenizer = AutoTokenizer.from_pretrained(model_id)
23
  tokenizer.pad_token_id = tokenizer.eos_token_id
 
116
  minimum=0,
117
  maximum=20,
118
  step=1,
119
+ value=0,
120
  ),
121
  gr.Checkbox(
122
  label="Do sample",