Azure99 commited on
Commit
41e00a4
·
verified ·
1 Parent(s): d723168

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -29
app.py CHANGED
@@ -5,48 +5,30 @@ import spaces
5
  import torch
6
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
7
 
8
- MAX_INPUT_LIMIT = 3584
9
- MAX_NEW_TOKENS = 1536
10
- MODEL_NAME = "Azure99/blossom-v5.1-9b"
11
 
12
  model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.bfloat16, device_map="auto")
13
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
14
 
15
 
16
  def get_input_ids(inst, history):
17
- prefix = ("A chat between a human and an artificial intelligence bot. "
18
- "The bot gives helpful, detailed, and polite answers to the human's questions.")
19
- patterns = []
20
- for conv in history:
21
- patterns.append(f'\n|Human|: {conv[0]}\n|Bot|: ')
22
- patterns.append(f'{conv[1]}')
23
- patterns.append(f'\n|Human|: {inst}\n|Bot|: ')
24
- patterns[0] = prefix + patterns[0]
25
-
26
- input_ids = []
27
- for i, pattern in enumerate(patterns):
28
- input_ids += tokenizer.encode(pattern, add_special_tokens=(i == 0))
29
- if i % 2 == 1:
30
- input_ids += [tokenizer.eos_token_id]
31
- return input_ids
32
-
33
-
34
- def generate(generation_kwargs):
35
- with torch.no_grad():
36
- Thread(target=model.generate, kwargs=generation_kwargs).start()
37
 
38
 
39
  @spaces.GPU
40
  def chat(inst, history, temperature, top_p, repetition_penalty):
41
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
42
  input_ids = get_input_ids(inst, history)
43
- if len(input_ids) > MAX_INPUT_LIMIT:
44
- yield "The input is too long, please clear the history."
45
- return
46
- generation_kwargs = dict(input_ids=torch.tensor([input_ids]).to(model.device),
47
  streamer=streamer, do_sample=True, max_new_tokens=MAX_NEW_TOKENS,
48
  temperature=temperature, top_p=top_p, repetition_penalty=repetition_penalty)
49
- generate(generation_kwargs)
 
50
 
51
  outputs = ""
52
  for new_text in streamer:
@@ -87,7 +69,7 @@ additional_inputs = [
87
  gr.ChatInterface(chat,
88
  chatbot=gr.Chatbot(show_label=False, height=500, show_copy_button=True, render_markdown=True),
89
  textbox=gr.Textbox(placeholder="", container=False, scale=7),
90
- title="Blossom 9B Demo",
91
  description='Hello, I am Blossom, an open source conversational large language model.🌠'
92
  '<a href="https://github.com/Azure99/BlossomLM">GitHub</a>',
93
  theme="soft",
 
5
  import torch
6
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
7
 
8
+ MAX_NEW_TOKENS = 2048
9
+ MODEL_NAME = "Azure99/Blossom-V6-14B"
 
10
 
11
  model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.bfloat16, device_map="auto")
12
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
13
 
14
 
15
  def get_input_ids(inst, history):
16
+ conversation = []
17
+ for user, assistant in history:
18
+ conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
19
+ conversation.append({"role": "user", "content": inst})
20
+ return tokenizer.apply_chat_template(conversation, return_tensors="pt").to(model.device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
 
23
  @spaces.GPU
24
  def chat(inst, history, temperature, top_p, repetition_penalty):
25
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
26
  input_ids = get_input_ids(inst, history)
27
+ generation_kwargs = dict(input_ids=input_ids,
 
 
 
28
  streamer=streamer, do_sample=True, max_new_tokens=MAX_NEW_TOKENS,
29
  temperature=temperature, top_p=top_p, repetition_penalty=repetition_penalty)
30
+
31
+ Thread(target=model.generate, kwargs=generation_kwargs).start()
32
 
33
  outputs = ""
34
  for new_text in streamer:
 
69
  gr.ChatInterface(chat,
70
  chatbot=gr.Chatbot(show_label=False, height=500, show_copy_button=True, render_markdown=True),
71
  textbox=gr.Textbox(placeholder="", container=False, scale=7),
72
+ title="Blossom 14B Demo",
73
  description='Hello, I am Blossom, an open source conversational large language model.🌠'
74
  '<a href="https://github.com/Azure99/BlossomLM">GitHub</a>',
75
  theme="soft",