lhzstar commited on
Commit
436ce71
β€’
1 Parent(s): d2b6583

new commits

Browse files
.gitignore CHANGED
@@ -20,4 +20,6 @@ launch.json
20
  *.m4a
21
  *.csv
22
  input_audios/
23
- syn_results/
 
 
 
20
  *.m4a
21
  *.csv
22
  input_audios/
23
+ syn_results/
24
+ falcon-7b-instruct/
25
+ flan-t5-large/
app.py CHANGED
@@ -1,54 +1,19 @@
1
  from celebbot import CelebBot
2
  import streamlit as st
3
- import re
4
- import spacy
5
- import json
6
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModel
7
  from utils import *
8
 
9
 
10
- @st.cache_resource
11
- def get_seq2seq_model(model_id):
12
- return AutoModelForSeq2SeqLM.from_pretrained(model_id)
13
-
14
- @st.cache_resource
15
- def get_auto_model(model_id):
16
- return AutoModel.from_pretrained(model_id)
17
-
18
- @st.cache_resource
19
- def get_tokenizer(model_id):
20
- return AutoTokenizer.from_pretrained(model_id)
21
-
22
- @st.cache_data
23
- def get_celeb_data(fpath):
24
- with open(fpath) as json_file:
25
- return json.load(json_file)
26
-
27
- @st.cache_resource
28
- def preprocess_text(name, gender, text, model_id):
29
- lname = name.split(" ")[-1]
30
- lname_regex = re.compile(rf'\b({lname})\b')
31
- name_regex = re.compile(rf'\b({name})\b')
32
- lnames = lname+"’s" if not lname.endswith("s") else lname+"’"
33
- lnames_regex = re.compile(rf'\b({lnames})\b')
34
- names = name+"’s" if not name.endswith("s") else name+"’"
35
- names_regex = re.compile(rf'\b({names})\b')
36
- if gender == "M":
37
- text = re.sub(he_regex, "I", text)
38
- text = re.sub(his_regex, "my", text)
39
- elif gender == "F":
40
- text = re.sub(she_regex, "I", text)
41
- text = re.sub(her_regex, "my", text)
42
- text = re.sub(names_regex, "my", text)
43
- text = re.sub(lnames_regex, "my", text)
44
- text = re.sub(name_regex, "I", text)
45
- text = re.sub(lname_regex, "I", text)
46
- spacy_model = spacy.load(model_id)
47
- texts = [i.text.strip() for i in spacy_model(text).sents]
48
- return spacy_model, texts
49
-
50
  def main():
 
51
  hide_footer()
 
 
 
 
 
 
 
52
  if "messages" not in st.session_state:
53
  st.session_state["messages"] = []
54
  if "QA_model_path" not in st.session_state:
@@ -57,65 +22,80 @@ def main():
57
  st.session_state["sentTr_model_path"] = "sentence-transformers/all-mpnet-base-v2"
58
  if "start_chat" not in st.session_state:
59
  st.session_state["start_chat"] = False
60
-
61
-
62
- model_list = ["base", "large", "xl", "xxl"]
63
-
64
- for message in st.session_state["messages"]:
65
- with st.chat_message(message["role"]):
66
- st.markdown(message["content"])
67
-
68
- celeb_data = get_celeb_data(f'data.json')
69
-
70
- # Create a Form Component on the Sidebar for accepting input data and parameters
71
- celeb_name = st.sidebar.selectbox('Choose a celebrity', options=list(celeb_data.keys()))
72
- celeb_gender = celeb_data[celeb_name]["gender"]
73
- knowledge = celeb_data[celeb_name]["knowledge"]
74
- model_choice = st.sidebar.selectbox("Choose Your Flan-T5 model",options=model_list)
75
- st.session_state["QA_model_path"] = f"google/flan-t5-{model_choice}"
76
-
77
- # submitted = st.form_submit_button(label="Start Chatting")
78
- # if submitted:
79
- # st.session_state["start_chat"] = True
80
-
81
-
82
- # if st.session_state["start_chat"]:
83
-
84
- celeb_bot = CelebBot(celeb_name,
85
- get_tokenizer(st.session_state["QA_model_path"]),
86
- get_seq2seq_model(st.session_state["QA_model_path"]),
87
- get_tokenizer(st.session_state["sentTr_model_path"]),
88
- get_auto_model(st.session_state["sentTr_model_path"]),
89
- *preprocess_text(celeb_name, celeb_gender, knowledge, "en_core_web_sm")
90
- )
91
-
92
- prompt = st.chat_input("Say something")
93
- print(prompt)
94
- if prompt:
95
- celeb_bot.text = prompt
96
- # Display user message in chat message container
97
- st.chat_message("user").markdown(prompt)
98
- # Add user message to chat history
99
- st.session_state["messages"].append({"role": "user", "content": prompt})
100
-
101
- # Add assistant response to chat history
102
- response = celeb_bot.question_answer()
103
 
104
- # disable autoplay to play in HTML
105
- b64 = celeb_bot.text_to_speech(autoplay=False)
106
- md = f"""
107
- <p>{response}</p>
108
- <audio controls autoplay style="display:none;">
109
- <source src="data:audio/wav;base64,{b64}" type="audio/wav">
110
- Your browser does not support the audio element.
111
- </audio>
112
- """
113
- st.chat_message("assistant").markdown(
114
- md,
115
- unsafe_allow_html=True,
116
- )
117
- # Display assistant response in chat message container
118
- st.session_state["messages"].append({"role": "assistant", "content": response})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
 
121
  if __name__ == "__main__":
 
1
  from celebbot import CelebBot
2
  import streamlit as st
3
+ from streamlit_mic_recorder import speech_to_text
 
 
 
4
  from utils import *
5
 
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  def main():
8
+
9
  hide_footer()
10
+ model_list = ["flan-t5-large", "flan-t5-xl", "Falcon-7b-instruct"]
11
+ celeb_data = get_celeb_data(f'data.json')
12
+
13
+ st.sidebar.header("CelebChat")
14
+ expander = st.sidebar.expander('About the app')
15
+ with expander:
16
+ st.markdown("This app is a demo of celebrity chatting!")
17
  if "messages" not in st.session_state:
18
  st.session_state["messages"] = []
19
  if "QA_model_path" not in st.session_state:
 
22
  st.session_state["sentTr_model_path"] = "sentence-transformers/all-mpnet-base-v2"
23
  if "start_chat" not in st.session_state:
24
  st.session_state["start_chat"] = False
25
+ if "prompt" not in st.session_state:
26
+ st.session_state["prompt"] = None
27
+
28
+ def start_chat(name, model_id):
29
+ print(name, model_id)
30
+ if name != '' and model_id != '':
31
+ st.session_state["start_chat"] = True
32
+ else:
33
+ st.session_state["start_chat"] = False
34
+
35
+ with st.sidebar.form("my_form"):
36
+ print("enter form")
37
+ st.session_state["celeb_name"] = st.selectbox('Choose a celebrity', options=list(celeb_data.keys()))
38
+ model_id=st.selectbox("Choose Your Flan-T5 model",options=model_list)
39
+ st.session_state["QA_model_path"] = f"google/{model_id}" if "flan-t5" in model_id else model_id
40
+
41
+ st.form_submit_button(label="Start Chatting", on_click=start_chat, args=(st.session_state["celeb_name"], st.session_state["QA_model_path"]))
42
+
43
+ if st.session_state["start_chat"]:
44
+ celeb_gender = celeb_data[st.session_state["celeb_name"]]["gender"]
45
+ knowledge = celeb_data[st.session_state["celeb_name"]]["knowledge"]
46
+ st.session_state["celeb_bot"] = CelebBot(st.session_state["celeb_name"],
47
+ get_tokenizer(st.session_state["QA_model_path"]),
48
+ get_seq2seq_model(st.session_state["QA_model_path"]) if "flan-t5" in st.session_state["QA_model_path"] else get_causal_model(st.session_state["QA_model_path"]),
49
+ get_tokenizer(st.session_state["sentTr_model_path"]),
50
+ get_auto_model(st.session_state["sentTr_model_path"]),
51
+ *preprocess_text(st.session_state["celeb_name"], celeb_gender, knowledge, "en_core_web_sm")
52
+ )
53
+
54
+ dialogue_container = st.container()
55
+ with dialogue_container:
56
+ for message in st.session_state["messages"]:
57
+ with st.chat_message(message["role"]):
58
+ st.markdown(message["content"])
59
+
60
+
61
+ if "_last_audio_id" not in st.session_state:
62
+ st.session_state["_last_audio_id"] = 0
63
+ with st.sidebar:
64
+ prompt_from_audio =speech_to_text(start_prompt="Start Recording",stop_prompt="Stop Recording",language='en',use_container_width=True, just_once=True,key='STT')
65
+ prompt_from_text = st.text_input('Or write something')
 
 
66
 
67
+ if prompt_from_audio != None:
68
+ st.session_state["prompt"] = prompt_from_audio
69
+ elif prompt_from_text != None:
70
+ st.session_state["prompt"] = prompt_from_text
71
+ print(st.session_state["prompt"])
72
+ if st.session_state["prompt"] != None and st.session_state["prompt"] != '':
73
+ st.session_state["celeb_bot"].text = st.session_state["prompt"]
74
+ # Display user message in chat message container
75
+ with dialogue_container:
76
+ st.chat_message("user").markdown(st.session_state["prompt"])
77
+ # Add user message to chat history
78
+ st.session_state["messages"].append({"role": "user", "content": st.session_state["prompt"]})
79
+
80
+ # Add assistant response to chat history
81
+ response = st.session_state["celeb_bot"].question_answer()
82
+
83
+ # disable autoplay to play in HTML
84
+ b64 = st.session_state["celeb_bot"].text_to_speech(autoplay=False)
85
+ md = f"""
86
+ <p>{response}</p>
87
+ <audio controls autoplay style="display:none;">
88
+ <source src="data:audio/wav;base64,{b64}" type="audio/wav">
89
+ Your browser does not support the audio element.
90
+ </audio>
91
+ """
92
+ with dialogue_container:
93
+ st.chat_message("assistant").markdown(
94
+ md,
95
+ unsafe_allow_html=True,
96
+ )
97
+ # Display assistant response in chat message container
98
+ st.session_state["messages"].append({"role": "assistant", "content": response})
99
 
100
 
101
  if __name__ == "__main__":
celebbot.py CHANGED
@@ -102,13 +102,13 @@ class CelebBot():
102
  self.text = f"Hello I am {self.name} the AI, what can I do for you?"
103
  ## have a conversation
104
  else:
105
- # if re.search(you_regex, self.text) != None:
106
- instruction1 = f'[Instruction] You are a celebrity named {self.name}. You need to answer the question based on knowledge and commonsense.'
107
 
108
- knowledge = self.retrieve_knowledge_assertions()
109
- # else:
110
- # instruction1 = f'[Instruction] You need to answer the question based on commonsense.'
111
- query = f"{instruction1} [knowledge] {knowledge} [question] {self.text} {self.name}!"
112
  input_ids = self.QA_tokenizer(f"{query}", return_tensors="pt").input_ids
113
  outputs = self.QA_model.generate(input_ids, max_length=1024)
114
  self.text = self.QA_tokenizer.decode(outputs[0], skip_special_tokens=True)
 
102
  self.text = f"Hello I am {self.name} the AI, what can I do for you?"
103
  ## have a conversation
104
  else:
105
+ if re.search(re.compile(rf'\b(you|your|{self.name})\b', flags=re.IGNORECASE), self.text) != None:
106
+ instruction1 = f'[Instruction] You are a celebrity named {self.name}. You need to answer the question based on knowledge and commonsense.'
107
 
108
+ knowledge = self.retrieve_knowledge_assertions()
109
+ else:
110
+ instruction1 = f'[Instruction] You need to answer the question based on commonsense.'
111
+ query = f"{instruction1} [knowledge] {knowledge} [question] {self.text}"
112
  input_ids = self.QA_tokenizer(f"{query}", return_tensors="pt").input_ids
113
  outputs = self.QA_model.generate(input_ids, max_length=1024)
114
  self.text = self.QA_tokenizer.decode(outputs[0], skip_special_tokens=True)
data.json CHANGED
The diff for this file is too large to render. See raw diff
 
embeds/Adele.npy ADDED
Binary file (1.15 kB). View file
 
embeds/Barack_Obama.npy ADDED
Binary file (1.15 kB). View file
 
requirements.txt CHANGED
@@ -23,9 +23,10 @@ torchaudio==0.11.0
23
  tensorflow-cpu==2.9.0
24
  denoiser==0.1.5
25
  SpeechRecognition==3.10.0
26
- transformers==4.25.1
27
  streamlit==1.27.2
28
  sentence-transformers==2.2.2
29
  evaluate==0.4.1
30
  https://huggingface.co/spacy/en_core_web_sm/resolve/main/en_core_web_sm-any-py3-none-any.whl
31
  protobuf==3.20
 
 
23
  tensorflow-cpu==2.9.0
24
  denoiser==0.1.5
25
  SpeechRecognition==3.10.0
26
+ transformers==4.27.1
27
  streamlit==1.27.2
28
  sentence-transformers==2.2.2
29
  evaluate==0.4.1
30
  https://huggingface.co/spacy/en_core_web_sm/resolve/main/en_core_web_sm-any-py3-none-any.whl
31
  protobuf==3.20
32
+ streamlit_mic_recorder==0.0.2
rtvc/synthesizer/utils/cleaners.py CHANGED
@@ -223,6 +223,7 @@ def english_cleaners_predict(text):
223
  text = expand_numbers(text)
224
  # text = split_conj(text)
225
  text = collapse_whitespace(text)
 
226
  return text
227
 
228
  def english_cleaners(text):
 
223
  text = expand_numbers(text)
224
  # text = split_conj(text)
225
  text = collapse_whitespace(text)
226
+ text = text.replace(',', '.')
227
  return text
228
 
229
  def english_cleaners(text):
run_tts.py CHANGED
@@ -27,7 +27,7 @@ from rtvc.utils.argutils import print_args
27
  from rtvc.utils.default_models import ensure_default_models
28
  from rtvc.vocoder import inference as vocoder
29
  from rtvc.vocoder.display import save_attention_multiple, save_spectrogram, save_stop_tokens
30
- from rtvc.synthesizer.utils.cleaners import english_cleaners
31
  from rtvc.speed_changer.fixSpeed import *
32
 
33
 
@@ -41,12 +41,12 @@ def tts(text, embed_name, nlp, autoplay=True):
41
 
42
  ensure_default_models(run_id, models_dir)
43
  synthesizer = Synthesizer_infer(list(models_dir.glob(f"{run_id}/synthesizer.pt"))[0])
44
-
45
  ## Generating the spectrogram
46
 
47
  # The synthesizer works in batch, so you need to put your data in a list or numpy array
48
  def split_text(text):
49
- text = english_cleaners(text)
50
  texts = [i.text.strip() for i in nlp(text).sents] # split paragraph to sentences
51
  return texts
52
 
@@ -81,8 +81,7 @@ def tts(text, embed_name, nlp, autoplay=True):
81
 
82
  # Synthesizing the waveform is fairly straightforward. Remember that the longer the
83
  # spectrogram, the more time-efficient the vocoder.
84
- wav = Synthesizer_infer.griffin_lim(spec)
85
-
86
  wav = vocoder.waveform_denoising(wav)
87
 
88
  # Add breaks
@@ -118,7 +117,7 @@ def tts(text, embed_name, nlp, autoplay=True):
118
 
119
 
120
  if __name__ == "__main__":
121
- text = "Continuing without audio playback. Suppress this message"
122
- embed_name = "Cate_Blanchett"
123
  nlp = spacy.load('en_core_web_sm')
124
  tts(text, embed_name, nlp)
 
27
  from rtvc.utils.default_models import ensure_default_models
28
  from rtvc.vocoder import inference as vocoder
29
  from rtvc.vocoder.display import save_attention_multiple, save_spectrogram, save_stop_tokens
30
+ from rtvc.synthesizer.utils.cleaners import english_cleaners_predict
31
  from rtvc.speed_changer.fixSpeed import *
32
 
33
 
 
41
 
42
  ensure_default_models(run_id, models_dir)
43
  synthesizer = Synthesizer_infer(list(models_dir.glob(f"{run_id}/synthesizer.pt"))[0])
44
+ # vocoder.load_model(list(models_dir.glob(f"{run_id}/vocoder.pt"))[0])
45
  ## Generating the spectrogram
46
 
47
  # The synthesizer works in batch, so you need to put your data in a list or numpy array
48
  def split_text(text):
49
+ text = english_cleaners_predict(text)
50
  texts = [i.text.strip() for i in nlp(text).sents] # split paragraph to sentences
51
  return texts
52
 
 
81
 
82
  # Synthesizing the waveform is fairly straightforward. Remember that the longer the
83
  # spectrogram, the more time-efficient the vocoder.
84
+ wav = synthesizer.griffin_lim(spec)
 
85
  wav = vocoder.waveform_denoising(wav)
86
 
87
  # Add breaks
 
117
 
118
 
119
  if __name__ == "__main__":
120
+ text = "Adkins was raised by a young single mother in various working-class neighbourhoods of London. As a child, she enjoyed singing contemporary pop music and learned to play the guitar and the clarinet. However, it was not until her early teens, when she discovered rhythm-and-blues singer Etta James and other mid-20th-century performers, that she began to consider a musical career. While she honed her talents at a government-funded secondary school for the performing arts, a friend began posting songs Adkins had written and recorded onto the social networking Web site Myspace. Her music eventually caught the attention of record labels, and in 2006, several months after graduating, she signed a contract with XL Recordings."
121
+ embed_name = "Adele"
122
  nlp = spacy.load('en_core_web_sm')
123
  tts(text, embed_name, nlp)
utils.py CHANGED
@@ -1,7 +1,10 @@
1
  import re
 
 
 
2
  import streamlit as st
 
3
 
4
- you_regex = re.compile(r'\b(you|your)\b', flags=re.IGNORECASE)
5
  he_regex = re.compile(r'\b(he|him|himself)\b', flags=re.IGNORECASE)
6
  his_regex = re.compile(r'\b(his)\b', flags=re.IGNORECASE)
7
  she_regex = re.compile(r'\b(she|herself)\b', flags=re.IGNORECASE)
@@ -14,4 +17,56 @@ def hide_footer():
14
  footer {visibility: hidden;}
15
  </style>
16
  """
17
- st.markdown(hide_st_style, unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import re
2
+ import spacy
3
+ import json
4
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, AutoModel
5
  import streamlit as st
6
+ import whisper
7
 
 
8
  he_regex = re.compile(r'\b(he|him|himself)\b', flags=re.IGNORECASE)
9
  his_regex = re.compile(r'\b(his)\b', flags=re.IGNORECASE)
10
  she_regex = re.compile(r'\b(she|herself)\b', flags=re.IGNORECASE)
 
17
  footer {visibility: hidden;}
18
  </style>
19
  """
20
+ st.markdown(hide_st_style, unsafe_allow_html=True)
21
+
22
+ @st.cache_resource
23
+ def get_whisper_model(model_url:str='tiny'):
24
+ print("--------------------------------------------")
25
+ print("Attempting to load Whisper ...")
26
+ model = whisper.load_model(model_url, device='cpu')
27
+ print("Succesfully loaded Whisper")
28
+ return model
29
+
30
+ @st.cache_resource
31
+ def get_seq2seq_model(model_id):
32
+ return AutoModelForSeq2SeqLM.from_pretrained(model_id)
33
+
34
+ @st.cache_resource
35
+ def get_causal_model(model_id):
36
+ return AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)
37
+
38
+ @st.cache_resource
39
+ def get_auto_model(model_id):
40
+ return AutoModel.from_pretrained(model_id)
41
+
42
+ @st.cache_resource
43
+ def get_tokenizer(model_id):
44
+ return AutoTokenizer.from_pretrained(model_id)
45
+
46
+ @st.cache_data
47
+ def get_celeb_data(fpath):
48
+ with open(fpath) as json_file:
49
+ return json.load(json_file)
50
+
51
+ @st.cache_resource
52
+ def preprocess_text(name, gender, text, model_id):
53
+ lname = name.split(" ")[-1]
54
+ lname_regex = re.compile(rf'\b({lname})\b')
55
+ name_regex = re.compile(rf'\b({name})\b')
56
+ lnames = lname+"’s" if not lname.endswith("s") else lname+"’"
57
+ lnames_regex = re.compile(rf'\b({lnames})\b')
58
+ names = name+"’s" if not name.endswith("s") else name+"’"
59
+ names_regex = re.compile(rf'\b({names})\b')
60
+ if gender == "M":
61
+ text = re.sub(he_regex, "I", text)
62
+ text = re.sub(his_regex, "my", text)
63
+ elif gender == "F":
64
+ text = re.sub(she_regex, "I", text)
65
+ text = re.sub(her_regex, "my", text)
66
+ text = re.sub(names_regex, "my", text)
67
+ text = re.sub(lnames_regex, "my", text)
68
+ text = re.sub(name_regex, "I", text)
69
+ text = re.sub(lname_regex, "I", text)
70
+ spacy_model = spacy.load(model_id)
71
+ texts = [i.text.strip() for i in spacy_model(text).sents]
72
+ return spacy_model, texts