Heng666 commited on
Commit
95c51fd
·
verified ·
1 Parent(s): 19b814b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -6
app.py CHANGED
@@ -16,8 +16,9 @@ from threading import Thread
16
  # model.generation_config = GenerationConfig.from_pretrained(model_name_or_path)
17
 
18
  model_name_or_path = "scutcyr/BianQue-2"
 
19
  tokenizer = AutoTokenizer.from_pretrained(model_name_or_path,trust_remote_code=True)
20
- model = AutoModel.from_pretrained(model_name_or_path, trust_remote_code=True)
21
 
22
  # using CUDA for an optimal experience
23
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
@@ -43,17 +44,17 @@ def predict(message, history):
43
  messages = "</s>".join(["</s>".join(["\n<|user|>:" + item[0], "\n<|assistant|>:" + item[1]])
44
  for item in history_transformer_format])
45
  model_inputs = tokenizer([messages], return_tensors="pt").to(device)
46
- streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
47
  generate_kwargs = dict(
48
  model_inputs,
49
  streamer=streamer,
50
- max_new_tokens=1024,
51
  do_sample=True,
52
- top_p=0.95,
53
  top_k=50,
54
- temperature=0.7,
55
  num_beams=1,
56
- stopping_criteria=StoppingCriteriaList([stop])
57
  )
58
  t = Thread(target=model.generate, kwargs=generate_kwargs)
59
  t.start() # Starting the generation in a separate thread.
 
16
  # model.generation_config = GenerationConfig.from_pretrained(model_name_or_path)
17
 
18
  model_name_or_path = "scutcyr/BianQue-2"
19
+ model = AutoModel.from_pretrained(model_name_or_path, trust_remote_code=True).half()
20
  tokenizer = AutoTokenizer.from_pretrained(model_name_or_path,trust_remote_code=True)
21
+
22
 
23
  # using CUDA for an optimal experience
24
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 
44
  messages = "</s>".join(["</s>".join(["\n<|user|>:" + item[0], "\n<|assistant|>:" + item[1]])
45
  for item in history_transformer_format])
46
  model_inputs = tokenizer([messages], return_tensors="pt").to(device)
47
+ streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
48
  generate_kwargs = dict(
49
  model_inputs,
50
  streamer=streamer,
51
+ max_new_tokens=2048,
52
  do_sample=True,
53
+ top_p=0.75,
54
  top_k=50,
55
+ temperature=0.95,
56
  num_beams=1,
57
+ # stopping_criteria=StoppingCriteriaList([stop]) 暫時拿掉
58
  )
59
  t = Thread(target=model.generate, kwargs=generate_kwargs)
60
  t.start() # Starting the generation in a separate thread.