rphrp1985 commited on
Commit
6390b56
1 Parent(s): e2ec341

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -17
app.py CHANGED
@@ -6,6 +6,29 @@ from huggingface_hub import InferenceClient
6
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
7
  """
8
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  @spaces.GPU(duration=120)
11
  def respond(
@@ -18,27 +41,41 @@ def respond(
18
  ):
19
  messages = [{"role": "system", "content": system_message}]
20
 
21
- for val in history:
22
- if val[0]:
23
- messages.append({"role": "user", "content": val[0]})
24
- if val[1]:
25
- messages.append({"role": "assistant", "content": val[1]})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
- messages.append({"role": "user", "content": message})
28
 
29
- response = ""
30
 
31
- for message in client.chat_completion(
32
- messages,
33
- max_tokens=max_tokens,
34
- stream=True,
35
- temperature=temperature,
36
- top_p=top_p,
37
- ):
38
- token = message.choices[0].delta.content
39
 
40
- response += token
41
- yield response
42
 
43
  """
44
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 
6
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
7
  """
8
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
9
+ # pip install 'git+https://github.com/huggingface/transformers.git'
10
+
11
+ from transformers import AutoTokenizer, AutoModelForCausalLM
12
+
13
+ model_id = "CohereForAI/c4ai-command-r-plus"
14
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
15
+ model = AutoModelForCausalLM.from_pretrained(model_id)
16
+
17
+ # Format message with the command-r-plus chat template
18
+ messages = [{"role": "user", "content": "Hello, how are you?"}]
19
+ input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
20
+ ## <BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
21
+
22
+ gen_tokens = model.generate(
23
+ input_ids,
24
+ max_new_tokens=100,
25
+ do_sample=True,
26
+ temperature=0.3,
27
+ )
28
+
29
+ gen_text = tokenizer.decode(gen_tokens[0])
30
+ print(gen_text)
31
+
32
 
33
  @spaces.GPU(duration=120)
34
  def respond(
 
41
  ):
42
  messages = [{"role": "system", "content": system_message}]
43
 
44
+ messages = [{"role": "user", "content": "Hello, how are you?"}]
45
+ input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
46
+ ## <BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
47
+
48
+ gen_tokens = model.generate(
49
+ input_ids,
50
+ max_new_tokens=100,
51
+ do_sample=True,
52
+ temperature=0.3,
53
+ )
54
+
55
+ gen_text = tokenizer.decode(gen_tokens[0])
56
+ print(gen_text)
57
+ yield gen_text
58
+ # for val in history:
59
+ # if val[0]:
60
+ # messages.append({"role": "user", "content": val[0]})
61
+ # if val[1]:
62
+ # messages.append({"role": "assistant", "content": val[1]})
63
 
64
+ # messages.append({"role": "user", "content": message})
65
 
66
+ # response = ""
67
 
68
+ # for message in client.chat_completion(
69
+ # messages,
70
+ # max_tokens=max_tokens,
71
+ # stream=True,
72
+ # temperature=temperature,
73
+ # top_p=top_p,
74
+ # ):
75
+ # token = message.choices[0].delta.content
76
 
77
+ # response += token
78
+ # yield response
79
 
80
  """
81
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface