AtAndDev commited on
Commit
e7dfb54
1 Parent(s): b2ac935

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -7,8 +7,8 @@ import spaces
7
  import gradio as gr
8
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TextIteratorStreamer
9
 
10
- subprocess.run('pip install -U transformers tokenizers', shell=True)
11
- subprocess.run('pip install -U flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
12
 
13
  MODEL_ID = "AtAndDev/marco-qwq-7B"
14
  CHAT_TEMPLATE = "ChatML"
@@ -110,7 +110,7 @@ quantization_config = BitsAndBytesConfig(
110
  load_in_4bit=True,
111
  bnb_4bit_compute_dtype=torch.bfloat16
112
  )
113
- tokenizer = AutoTokenizer.from_pretrained('AtAndDev/marco-qwq-7B')
114
  model = AutoModelForCausalLM.from_pretrained(
115
  MODEL_ID,
116
  device_map="auto",
 
7
  import gradio as gr
8
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TextIteratorStreamer
9
 
10
+ subprocess.run('pip install --upgrade transformers tokenizers', shell=True)
11
+ subprocess.run('pip install --upgrade flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
12
 
13
  MODEL_ID = "AtAndDev/marco-qwq-7B"
14
  CHAT_TEMPLATE = "ChatML"
 
110
  load_in_4bit=True,
111
  bnb_4bit_compute_dtype=torch.bfloat16
112
  )
113
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
114
  model = AutoModelForCausalLM.from_pretrained(
115
  MODEL_ID,
116
  device_map="auto",