ylacombe commited on
Commit
3922171
·
1 Parent(s): 5b0a35e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -16
app.py CHANGED
@@ -14,9 +14,9 @@ import uuid
14
  from TTS.api import TTS
15
  tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1", gpu=True)
16
 
17
- title = "Speak with Llama2 70B"
18
 
19
- DESCRIPTION = """# Speak with Llama2 70B"""
20
  css = """.toast-wrap { display: none !important } """
21
 
22
  from huggingface_hub import HfApi
@@ -26,7 +26,7 @@ api = HfApi(token=HF_TOKEN)
26
 
27
  repo_id = "ylacombe/voice-chat-with-lama"
28
 
29
- system_message = "\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
30
  temperature = 0.9
31
  top_p = 0.6
32
  repetition_penalty = 1.2
@@ -41,9 +41,49 @@ from transformers import pipeline
41
  import numpy as np
42
 
43
  from gradio_client import Client
 
 
44
 
45
  whisper_client = Client("https://sanchit-gandhi-whisper-large-v2.hf.space/")
46
- text_client = Client("https://ysharma-explore-llamav2-with-tgi.hf.space/")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
 
49
  def transcribe(wav_path):
@@ -82,15 +122,7 @@ def bot(history, system_prompt=""):
82
  system_prompt = system_message
83
 
84
  history[-1][1] = ""
85
- for character in text_client.submit(
86
- history,
87
- system_prompt,
88
- temperature,
89
- 4096,
90
- temperature,
91
- repetition_penalty,
92
- api_name="/chat"
93
- ):
94
  history[-1][1] = character
95
  yield history
96
 
@@ -177,12 +209,10 @@ with gr.Blocks(title=title) as demo:
177
  This Space demonstrates how to speak to a chatbot, based solely on open-source models.
178
  It relies on 3 models:
179
  1. [Whisper-large-v2](https://huggingface.co/spaces/sanchit-gandhi/whisper-large-v2) as an ASR model, to transcribe recorded audio to text. It is called through a [gradio client](https://www.gradio.app/docs/client).
180
- 2. [Llama-2-70b-chat-hf](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) as the chat model, the actual chat model. It is also called through a [gradio client](https://www.gradio.app/docs/client).
181
  3. [Coqui's XTTS](https://huggingface.co/spaces/coqui/xtts) as a TTS model, to generate the chatbot answers. This time, the model is hosted locally.
182
 
183
  Note:
184
- - As a derivate work of [Llama-2-70b-chat](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) by Meta,
185
- this demo is governed by the original [license](https://huggingface.co/spaces/ysharma/Explore_llamav2_with_TGI/blob/main/LICENSE.txt) and [acceptable use policy](https://huggingface.co/spaces/ysharma/Explore_llamav2_with_TGI/blob/main/USE_POLICY.md).
186
  - By using this demo you agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml""")
187
  demo.queue()
188
  demo.launch(debug=True)
 
14
  from TTS.api import TTS
15
  tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1", gpu=True)
16
 
17
+ title = "Voice chat with Mistral 7B Instruct"
18
 
19
+ DESCRIPTION = """# Voice chat with Mistral 7B Instruct"""
20
  css = """.toast-wrap { display: none !important } """
21
 
22
  from huggingface_hub import HfApi
 
26
 
27
  repo_id = "ylacombe/voice-chat-with-lama"
28
 
29
+ system_message = "\nYou are a helpful, respectful and honest assistant. Your answers are short, ideally a few words long, if it is possible. Always answer as helpfully as possible, while being safe.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
30
  temperature = 0.9
31
  top_p = 0.6
32
  repetition_penalty = 1.2
 
41
  import numpy as np
42
 
43
  from gradio_client import Client
44
+ from huggingface_hub import InferenceClient
45
+
46
 
47
  whisper_client = Client("https://sanchit-gandhi-whisper-large-v2.hf.space/")
48
+ text_client = InferenceClient(
49
+ "mistralai/Mistral-7B-Instruct-v0.1"
50
+ )
51
+
52
+
53
+ def format_prompt(message, history):
54
+ prompt = "<s>"
55
+ for user_prompt, bot_response in history:
56
+ prompt += f"[INST] {user_prompt} [/INST]"
57
+ prompt += f" {bot_response}</s> "
58
+ prompt += f"[INST] {message} [/INST]"
59
+ return prompt
60
+
61
+ def generate(
62
+ prompt, history, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0,
63
+ ):
64
+ temperature = float(temperature)
65
+ if temperature < 1e-2:
66
+ temperature = 1e-2
67
+ top_p = float(top_p)
68
+
69
+ generate_kwargs = dict(
70
+ temperature=temperature,
71
+ max_new_tokens=max_new_tokens,
72
+ top_p=top_p,
73
+ repetition_penalty=repetition_penalty,
74
+ do_sample=True,
75
+ seed=42,
76
+ )
77
+
78
+ formatted_prompt = format_prompt(prompt, history)
79
+
80
+ stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
81
+ output = ""
82
+
83
+ for response in stream:
84
+ output += response.token.text
85
+ yield output
86
+ return output
87
 
88
 
89
  def transcribe(wav_path):
 
122
  system_prompt = system_message
123
 
124
  history[-1][1] = ""
125
+ for character in generate(system_prompt, history):
 
 
 
 
 
 
 
 
126
  history[-1][1] = character
127
  yield history
128
 
 
209
  This Space demonstrates how to speak to a chatbot, based solely on open-source models.
210
  It relies on 3 models:
211
  1. [Whisper-large-v2](https://huggingface.co/spaces/sanchit-gandhi/whisper-large-v2) as an ASR model, to transcribe recorded audio to text. It is called through a [gradio client](https://www.gradio.app/docs/client).
212
+ 2. [Mistral-7b-instruct](https://huggingface.co/spaces/osanseviero/mistral-super-fast) as the chat model, the actual chat model. It is called from [huggingface_hub](https://huggingface.co/docs/huggingface_hub/guides/inference).
213
  3. [Coqui's XTTS](https://huggingface.co/spaces/coqui/xtts) as a TTS model, to generate the chatbot answers. This time, the model is hosted locally.
214
 
215
  Note:
 
 
216
  - By using this demo you agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml""")
217
  demo.queue()
218
  demo.launch(debug=True)