Spaces:

liuhaozhe6788
/

CelebChat

Running

App Files Files Community

lhzstar commited on Nov 1, 2023

Commit

2a846a9

•

1 Parent(s): b55470f

new commits

Browse files

Files changed (3) hide show

celebbot.py +9 -18
run_eval.py +19 -22
test.py +0 -5

celebbot.py CHANGED Viewed

@@ -50,10 +50,6 @@ class CelebBot():
                 self.text = ""
                 print(f"me -->  No audio recognized")
-    def wake_up(self, text):
-        return True if "hey " + self.name in text.lower() else False
     def text_to_speech(self, autoplay=True):
         import run_tts
         return run_tts.tts(self.text, "_".join(self.name.split(" ")), self.spacy_model, autoplay)
@@ -98,21 +94,16 @@ class CelebBot():
     def question_answer(self, instruction1='', knowledge=''):
         if self.text != "":
-            ## wake up
-            if self.wake_up(self.text) is True:
-                self.text = f"Hello I am {self.name} the AI, what can I do for you?"
-            ## have a conversation
             else:
-                if re.search(re.compile(rf'\b(you|your|{self.name})\b', flags=re.IGNORECASE), self.text) != None:
-                    instruction1 = f"You are a celebrity named {self.name}.  Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
-                    knowledge = self.retrieve_knowledge_assertions()
-                else:
-                    instruction1 = f"Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
-                query = f"Context: {instruction1} {knowledge}\n\nQuestion: {self.text}\n\nAnswer:"
-                input_ids = self.QA_tokenizer(f"{query}", return_tensors="pt").input_ids
-                outputs = self.QA_model.generate(input_ids, max_length=1024)
-                self.text = self.QA_tokenizer.decode(outputs[0], skip_special_tokens=True)
             #     instruction2 = f'[Instruction] You are a celebrity named {self.name}. You need to answer the question based on knowledge'
             #     query = f"{instruction2} [knowledge] {self.text} {answer} [question] {self.name}, {self.text}"

                 self.text = ""
                 print(f"me -->  No audio recognized")
     def text_to_speech(self, autoplay=True):
         import run_tts
         return run_tts.tts(self.text, "_".join(self.name.split(" ")), self.spacy_model, autoplay)
     def question_answer(self, instruction1='', knowledge=''):
         if self.text != "":
+            if re.search(re.compile(rf'\b(you|your|{self.name})\b', flags=re.IGNORECASE), self.text) != None:
+                instruction1 = f"You are a celebrity named {self.name}.  Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
+                knowledge = self.retrieve_knowledge_assertions()
             else:
+                instruction1 = f"Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
+            query = f"Context: {instruction1} {knowledge}\n\nQuestion: {self.text}\n\nAnswer:"
+            input_ids = self.QA_tokenizer(f"{query}", return_tensors="pt").input_ids
+            outputs = self.QA_model.generate(input_ids, max_length=1024)
+            self.text = self.QA_tokenizer.decode(outputs[0], skip_special_tokens=True)
             #     instruction2 = f'[Instruction] You are a celebrity named {self.name}. You need to answer the question based on knowledge'
             #     query = f"{instruction2} [knowledge] {self.text} {answer} [question] {self.name}, {self.text}"

run_eval.py CHANGED Viewed

@@ -4,27 +4,30 @@ import spacy
 import json
 import evaluate
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModel
 from utils import *
 from celebbot import CelebBot
-DEBUG=True
 QA_MODEL_ID = "google/flan-t5-xl"
 SENTTR_MODEL_ID = "sentence-transformers/all-mpnet-base-v2"
 def evaluate_system():
-    with open("data.json") as json_file:
         celeb_data = json.load(json_file)
-    references = [val['answers'] for val in list(celeb_data.values())]
     references = list(itertools.chain.from_iterable(references))
     predictions = []
     QA_tokenizer = AutoTokenizer.from_pretrained(QA_MODEL_ID)
-    QA_model = AutoModelForSeq2SeqLM.from_pretrained(QA_MODEL_ID)
     sentTr_tokenizer = AutoTokenizer.from_pretrained(SENTTR_MODEL_ID)
-    sentTr_model = AutoModel.from_pretrained(SENTTR_MODEL_ID)
-    for name in list(celeb_data.keys()):
         gender = celeb_data[name]["gender"]
         knowledge = celeb_data[name]["knowledge"]
@@ -50,22 +53,16 @@ def evaluate_system():
         knowledge_sents = [i.text.strip() for i in spacy_model(knowledge).sents]
         ai = CelebBot(name, QA_tokenizer, QA_model, sentTr_tokenizer, sentTr_model, spacy_model, knowledge_sents)
-        for q in celeb_data[name]["questions"]:
-            if not DEBUG:
-                ai.speech_to_text()
-            else:
-                # ai.text = input("Your question: ")
-                pass
-            ai.text = q
-            if ai.text != "":
-                print("me --> ", ai.text)
-                predictions.append(ai.question_answer())
-                if not DEBUG:
-                    ai.text_to_speech()
-            ai.text = ""
     file = open('predictions.txt','w')
     for prediction in predictions:
@@ -86,7 +83,7 @@ def evaluate_system():
     bertscore = evaluate.load("bertscore")
     results = bertscore.compute(predictions=predictions, references=references, rescale_with_baseline=True, lang="en")
-    print(f"F1: {round(sum(results['f1'])/len(results['f1']))}")
 if __name__ == "__main__":
     evaluate_system()

 import json
 import evaluate
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModel
+import torch
 from utils import *
 from celebbot import CelebBot
 QA_MODEL_ID = "google/flan-t5-xl"
 SENTTR_MODEL_ID = "sentence-transformers/all-mpnet-base-v2"
+celeb_names = ["Cate Blanchett", "David Beckham", "Emma Watson", "Lady Gaga", "Madonna", "Mark Zuckerberg"]
 def evaluate_system():
+    device = 'cpu'
+    with open("data.json", encoding='utf-8') as json_file:
         celeb_data = json.load(json_file)
+    references = [val['answers'] for key, val in list(celeb_data.items()) if key in celeb_names]
     references = list(itertools.chain.from_iterable(references))
     predictions = []
     QA_tokenizer = AutoTokenizer.from_pretrained(QA_MODEL_ID)
+    QA_model = AutoModelForSeq2SeqLM.from_pretrained(QA_MODEL_ID).to(device)
     sentTr_tokenizer = AutoTokenizer.from_pretrained(SENTTR_MODEL_ID)
+    sentTr_model = AutoModel.from_pretrained(SENTTR_MODEL_ID).to(device)
+    for name in celeb_names:
         gender = celeb_data[name]["gender"]
         knowledge = celeb_data[name]["knowledge"]
         knowledge_sents = [i.text.strip() for i in spacy_model(knowledge).sents]
         ai = CelebBot(name, QA_tokenizer, QA_model, sentTr_tokenizer, sentTr_model, spacy_model, knowledge_sents)
+        if re.search(re.compile(rf'\b(you|your|{ai.name})\b', flags=re.IGNORECASE), ai.text) != None:
+            instruction1 = f"You are a celebrity named {ai.name}.  Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
+            knowledge = ai.retrieve_knowledge_assertions()
+        else:
+            instruction1 = f"Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
+        queries = [f"Context: {instruction1} {knowledge}\n\nQuestion: {q}\n\nAnswer:" for q in celeb_data[name]["questions"]]
+        input_ids = ai.QA_tokenizer(f"{queries}", return_tensors="pt").input_ids.to(device)
+        outputs = ai.QA_model.generate(input_ids, max_length=1024)
+        predictions+= ai.QA_tokenizer.batch_decode(outputs, skip_special_tokens=True)
     file = open('predictions.txt','w')
     for prediction in predictions:
     bertscore = evaluate.load("bertscore")
     results = bertscore.compute(predictions=predictions, references=references, rescale_with_baseline=True, lang="en")
+    print(f"F1: {round(sum(results['f1'])/len(results['f1']), 2)}")
 if __name__ == "__main__":
     evaluate_system()

test.py DELETED Viewed

@@ -1,5 +0,0 @@
-import evaluate
-bertscore = evaluate.load("bertscore")
-results = bertscore.compute(predictions=["I am from Toronto."], references=["Hey"],rescale_with_baseline=True,  lang="en")
-print(results)