lhzstar commited on
Commit
2a846a9
1 Parent(s): b55470f

new commits

Browse files
Files changed (3) hide show
  1. celebbot.py +9 -18
  2. run_eval.py +19 -22
  3. test.py +0 -5
celebbot.py CHANGED
@@ -50,10 +50,6 @@ class CelebBot():
50
  self.text = ""
51
  print(f"me --> No audio recognized")
52
 
53
-
54
- def wake_up(self, text):
55
- return True if "hey " + self.name in text.lower() else False
56
-
57
  def text_to_speech(self, autoplay=True):
58
  import run_tts
59
  return run_tts.tts(self.text, "_".join(self.name.split(" ")), self.spacy_model, autoplay)
@@ -98,21 +94,16 @@ class CelebBot():
98
 
99
  def question_answer(self, instruction1='', knowledge=''):
100
  if self.text != "":
101
- ## wake up
102
- if self.wake_up(self.text) is True:
103
- self.text = f"Hello I am {self.name} the AI, what can I do for you?"
104
- ## have a conversation
105
  else:
106
- if re.search(re.compile(rf'\b(you|your|{self.name})\b', flags=re.IGNORECASE), self.text) != None:
107
- instruction1 = f"You are a celebrity named {self.name}. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
108
-
109
- knowledge = self.retrieve_knowledge_assertions()
110
- else:
111
- instruction1 = f"Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
112
- query = f"Context: {instruction1} {knowledge}\n\nQuestion: {self.text}\n\nAnswer:"
113
- input_ids = self.QA_tokenizer(f"{query}", return_tensors="pt").input_ids
114
- outputs = self.QA_model.generate(input_ids, max_length=1024)
115
- self.text = self.QA_tokenizer.decode(outputs[0], skip_special_tokens=True)
116
 
117
  # instruction2 = f'[Instruction] You are a celebrity named {self.name}. You need to answer the question based on knowledge'
118
  # query = f"{instruction2} [knowledge] {self.text} {answer} [question] {self.name}, {self.text}"
 
50
  self.text = ""
51
  print(f"me --> No audio recognized")
52
 
 
 
 
 
53
  def text_to_speech(self, autoplay=True):
54
  import run_tts
55
  return run_tts.tts(self.text, "_".join(self.name.split(" ")), self.spacy_model, autoplay)
 
94
 
95
  def question_answer(self, instruction1='', knowledge=''):
96
  if self.text != "":
97
+ if re.search(re.compile(rf'\b(you|your|{self.name})\b', flags=re.IGNORECASE), self.text) != None:
98
+ instruction1 = f"You are a celebrity named {self.name}. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
99
+
100
+ knowledge = self.retrieve_knowledge_assertions()
101
  else:
102
+ instruction1 = f"Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
103
+ query = f"Context: {instruction1} {knowledge}\n\nQuestion: {self.text}\n\nAnswer:"
104
+ input_ids = self.QA_tokenizer(f"{query}", return_tensors="pt").input_ids
105
+ outputs = self.QA_model.generate(input_ids, max_length=1024)
106
+ self.text = self.QA_tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
107
 
108
  # instruction2 = f'[Instruction] You are a celebrity named {self.name}. You need to answer the question based on knowledge'
109
  # query = f"{instruction2} [knowledge] {self.text} {answer} [question] {self.name}, {self.text}"
run_eval.py CHANGED
@@ -4,27 +4,30 @@ import spacy
4
  import json
5
  import evaluate
6
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModel
 
7
 
8
  from utils import *
9
  from celebbot import CelebBot
10
 
11
- DEBUG=True
12
  QA_MODEL_ID = "google/flan-t5-xl"
13
  SENTTR_MODEL_ID = "sentence-transformers/all-mpnet-base-v2"
 
14
 
15
  def evaluate_system():
16
- with open("data.json") as json_file:
 
 
17
  celeb_data = json.load(json_file)
18
- references = [val['answers'] for val in list(celeb_data.values())]
19
  references = list(itertools.chain.from_iterable(references))
20
  predictions = []
21
 
22
  QA_tokenizer = AutoTokenizer.from_pretrained(QA_MODEL_ID)
23
- QA_model = AutoModelForSeq2SeqLM.from_pretrained(QA_MODEL_ID)
24
  sentTr_tokenizer = AutoTokenizer.from_pretrained(SENTTR_MODEL_ID)
25
- sentTr_model = AutoModel.from_pretrained(SENTTR_MODEL_ID)
26
 
27
- for name in list(celeb_data.keys()):
28
  gender = celeb_data[name]["gender"]
29
  knowledge = celeb_data[name]["knowledge"]
30
 
@@ -50,22 +53,16 @@ def evaluate_system():
50
  knowledge_sents = [i.text.strip() for i in spacy_model(knowledge).sents]
51
 
52
  ai = CelebBot(name, QA_tokenizer, QA_model, sentTr_tokenizer, sentTr_model, spacy_model, knowledge_sents)
53
- for q in celeb_data[name]["questions"]:
54
- if not DEBUG:
55
- ai.speech_to_text()
56
- else:
57
- # ai.text = input("Your question: ")
58
- pass
59
-
60
- ai.text = q
61
- if ai.text != "":
62
- print("me --> ", ai.text)
63
-
64
- predictions.append(ai.question_answer())
65
 
66
- if not DEBUG:
67
- ai.text_to_speech()
68
- ai.text = ""
 
 
 
 
69
 
70
  file = open('predictions.txt','w')
71
  for prediction in predictions:
@@ -86,7 +83,7 @@ def evaluate_system():
86
 
87
  bertscore = evaluate.load("bertscore")
88
  results = bertscore.compute(predictions=predictions, references=references, rescale_with_baseline=True, lang="en")
89
- print(f"F1: {round(sum(results['f1'])/len(results['f1']))}")
90
 
91
  if __name__ == "__main__":
92
  evaluate_system()
 
4
  import json
5
  import evaluate
6
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModel
7
+ import torch
8
 
9
  from utils import *
10
  from celebbot import CelebBot
11
 
 
12
  QA_MODEL_ID = "google/flan-t5-xl"
13
  SENTTR_MODEL_ID = "sentence-transformers/all-mpnet-base-v2"
14
+ celeb_names = ["Cate Blanchett", "David Beckham", "Emma Watson", "Lady Gaga", "Madonna", "Mark Zuckerberg"]
15
 
16
  def evaluate_system():
17
+
18
+ device = 'cpu'
19
+ with open("data.json", encoding='utf-8') as json_file:
20
  celeb_data = json.load(json_file)
21
+ references = [val['answers'] for key, val in list(celeb_data.items()) if key in celeb_names]
22
  references = list(itertools.chain.from_iterable(references))
23
  predictions = []
24
 
25
  QA_tokenizer = AutoTokenizer.from_pretrained(QA_MODEL_ID)
26
+ QA_model = AutoModelForSeq2SeqLM.from_pretrained(QA_MODEL_ID).to(device)
27
  sentTr_tokenizer = AutoTokenizer.from_pretrained(SENTTR_MODEL_ID)
28
+ sentTr_model = AutoModel.from_pretrained(SENTTR_MODEL_ID).to(device)
29
 
30
+ for name in celeb_names:
31
  gender = celeb_data[name]["gender"]
32
  knowledge = celeb_data[name]["knowledge"]
33
 
 
53
  knowledge_sents = [i.text.strip() for i in spacy_model(knowledge).sents]
54
 
55
  ai = CelebBot(name, QA_tokenizer, QA_model, sentTr_tokenizer, sentTr_model, spacy_model, knowledge_sents)
56
+ if re.search(re.compile(rf'\b(you|your|{ai.name})\b', flags=re.IGNORECASE), ai.text) != None:
57
+ instruction1 = f"You are a celebrity named {ai.name}. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
 
 
 
 
 
 
 
 
 
 
58
 
59
+ knowledge = ai.retrieve_knowledge_assertions()
60
+ else:
61
+ instruction1 = f"Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
62
+ queries = [f"Context: {instruction1} {knowledge}\n\nQuestion: {q}\n\nAnswer:" for q in celeb_data[name]["questions"]]
63
+ input_ids = ai.QA_tokenizer(f"{queries}", return_tensors="pt").input_ids.to(device)
64
+ outputs = ai.QA_model.generate(input_ids, max_length=1024)
65
+ predictions+= ai.QA_tokenizer.batch_decode(outputs, skip_special_tokens=True)
66
 
67
  file = open('predictions.txt','w')
68
  for prediction in predictions:
 
83
 
84
  bertscore = evaluate.load("bertscore")
85
  results = bertscore.compute(predictions=predictions, references=references, rescale_with_baseline=True, lang="en")
86
+ print(f"F1: {round(sum(results['f1'])/len(results['f1']), 2)}")
87
 
88
  if __name__ == "__main__":
89
  evaluate_system()
test.py DELETED
@@ -1,5 +0,0 @@
1
- import evaluate
2
-
3
- bertscore = evaluate.load("bertscore")
4
- results = bertscore.compute(predictions=["I am from Toronto."], references=["Hey"],rescale_with_baseline=True, lang="en")
5
- print(results)