Spaces:
Runtime error
Runtime error
lhzstar
commited on
Commit
Β·
5beab45
1
Parent(s):
a2127f4
new commits
Browse files- celebbot.py +38 -40
- embeds/Helen_Mirren.npy +0 -0
- rtvc/synthesizer/utils/cleaners.py +2 -2
- run_cli.py +12 -27
- run_eval.py +43 -67
- run_tts.py +2 -2
- utils.py +1 -7
celebbot.py
CHANGED
@@ -35,7 +35,7 @@ class CelebBot():
|
|
35 |
with sr.Microphone() as mic:
|
36 |
recognizer.adjust_for_ambient_noise(mic, duration=1)
|
37 |
# flag = input("Are you ready to record?\nProceed (Y/n)")
|
38 |
-
|
39 |
# try:
|
40 |
# assert flag=='Y'
|
41 |
# except:
|
@@ -62,8 +62,8 @@ class CelebBot():
|
|
62 |
return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
|
63 |
# Tokenize sentences
|
64 |
encoded_input = self.sentTr_tokenizer(texts, padding=True, truncation=True, return_tensors='pt')
|
65 |
-
encoded_input["input_ids"] = encoded_input["input_ids"]
|
66 |
-
encoded_input["attention_mask"] = encoded_input["attention_mask"]
|
67 |
|
68 |
# Compute token embeddings
|
69 |
with torch.no_grad():
|
@@ -76,9 +76,9 @@ class CelebBot():
|
|
76 |
sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
|
77 |
|
78 |
return sentence_embeddings
|
79 |
-
|
80 |
def retrieve_knowledge_assertions(self, change_person=True):
|
81 |
-
question_embeddings = self.sentence_embeds_inference([self.
|
82 |
|
83 |
all_knowledge_embeddings = self.sentence_embeds_inference(self.all_knowledge)
|
84 |
similarity = cosine_similarity(all_knowledge_embeddings.cpu(), question_embeddings.cpu())
|
@@ -89,23 +89,19 @@ class CelebBot():
|
|
89 |
|
90 |
# similarities = np.array(similarity)[top_K]
|
91 |
|
92 |
-
print(*all_knowledge_assertions, sep='\n')
|
93 |
-
|
94 |
-
knowledge = ' '.join(all_knowledge_assertions)
|
95 |
|
96 |
if change_person:
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
def third_to_first_person(self, text):
|
|
|
102 |
name = self.name.split(" ")[-1].lower()
|
103 |
doc = self.spacy_model(text)
|
104 |
transformed_text = []
|
105 |
|
106 |
-
for i, token in enumerate(doc):
|
107 |
-
if token.text == "pen":
|
108 |
-
print(token.text, token.dep_)
|
109 |
if self.gender == "M":
|
110 |
if token.text.lower() == "he":
|
111 |
transformed_text.append("I")
|
@@ -114,54 +110,56 @@ class CelebBot():
|
|
114 |
elif token.text.lower() == "his":
|
115 |
transformed_text.append("my")
|
116 |
elif token.text.lower() == name and token.dep_ in ["nsubj", "nsubjpass"]:
|
117 |
-
transformed_text.append("I")
|
118 |
-
|
119 |
-
elif token.text == "'s" and doc[i-1].text.lower() == name:
|
120 |
transformed_text[-1] = "my"
|
121 |
-
|
|
|
|
|
|
|
122 |
else:
|
123 |
-
transformed_text.append(token.text)
|
124 |
elif self.gender == "F":
|
125 |
if token.text.lower() == "she":
|
126 |
transformed_text.append("I")
|
127 |
elif token.text.lower() == "her":
|
128 |
-
if i != len(doc)-
|
129 |
-
transformed_text.append("my")
|
130 |
else:
|
131 |
-
transformed_text.append("me")
|
132 |
elif token.text.lower() == name and token.dep_ in ["nsubj", "nsubjpass"]:
|
133 |
-
transformed_text.append("I")
|
134 |
-
|
135 |
-
elif token.text == "'s" and doc[i-1].text.lower() == name:
|
136 |
transformed_text[-1] = "my"
|
137 |
-
|
|
|
|
|
|
|
138 |
else:
|
139 |
-
transformed_text.append(token.text)
|
140 |
|
141 |
-
return "
|
142 |
|
143 |
def question_answer(self, instruction='', knowledge=''):
|
|
|
144 |
if self.text != "":
|
145 |
-
if re.search(re.compile(rf'\b(you|your|
|
146 |
-
|
147 |
-
if re.search(re.compile(rf'\b(you|your|yours)\b', flags=re.IGNORECASE), self.text) != None:
|
148 |
-
knowledge = self.retrieve_knowledge_assertions()
|
149 |
-
else:
|
150 |
-
knowledge = self.retrieve_knowledge_assertions(change_person=False)
|
151 |
else:
|
152 |
-
|
|
|
153 |
query = f"Context: {instruction} {knowledge}\n\nQuestion: {self.text}\n\nAnswer:"
|
154 |
-
input_ids = self.QA_tokenizer(f"{query}", return_tensors="pt").input_ids
|
155 |
-
outputs = self.QA_model.generate(input_ids, max_length=1024, min_length=8,do_sample=True, temperature=0.2, repetition_penalty=
|
156 |
self.text = self.QA_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
157 |
-
|
158 |
return self.text
|
159 |
|
160 |
@staticmethod
|
161 |
def action_time():
|
162 |
return f"it's {datetime.datetime.now().time().strftime('%H:%M')}"
|
163 |
|
164 |
-
@staticmethod
|
165 |
def save_kb(kb, filename):
|
166 |
with open(filename, "wb") as f:
|
167 |
pickle.dump(kb, f)
|
|
|
35 |
with sr.Microphone() as mic:
|
36 |
recognizer.adjust_for_ambient_noise(mic, duration=1)
|
37 |
# flag = input("Are you ready to record?\nProceed (Y/n)")
|
38 |
+
|
39 |
# try:
|
40 |
# assert flag=='Y'
|
41 |
# except:
|
|
|
62 |
return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
|
63 |
# Tokenize sentences
|
64 |
encoded_input = self.sentTr_tokenizer(texts, padding=True, truncation=True, return_tensors='pt')
|
65 |
+
encoded_input["input_ids"] = encoded_input["input_ids"].to(self.sentTr_model.device)
|
66 |
+
encoded_input["attention_mask"] = encoded_input["attention_mask"].to(self.sentTr_model.device)
|
67 |
|
68 |
# Compute token embeddings
|
69 |
with torch.no_grad():
|
|
|
76 |
sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
|
77 |
|
78 |
return sentence_embeddings
|
79 |
+
|
80 |
def retrieve_knowledge_assertions(self, change_person=True):
|
81 |
+
question_embeddings = self.sentence_embeds_inference([self.text])
|
82 |
|
83 |
all_knowledge_embeddings = self.sentence_embeds_inference(self.all_knowledge)
|
84 |
similarity = cosine_similarity(all_knowledge_embeddings.cpu(), question_embeddings.cpu())
|
|
|
89 |
|
90 |
# similarities = np.array(similarity)[top_K]
|
91 |
|
92 |
+
# print(*all_knowledge_assertions, sep='\n')
|
|
|
|
|
93 |
|
94 |
if change_person:
|
95 |
+
all_knowledge_assertions = [self.third_to_first_person(sent) for sent in all_knowledge_assertions]
|
96 |
+
return " ".join(all_knowledge_assertions)
|
97 |
+
|
|
|
98 |
def third_to_first_person(self, text):
|
99 |
+
text = text.replace(" ", " ")
|
100 |
name = self.name.split(" ")[-1].lower()
|
101 |
doc = self.spacy_model(text)
|
102 |
transformed_text = []
|
103 |
|
104 |
+
for i, token in enumerate(doc):
|
|
|
|
|
105 |
if self.gender == "M":
|
106 |
if token.text.lower() == "he":
|
107 |
transformed_text.append("I")
|
|
|
110 |
elif token.text.lower() == "his":
|
111 |
transformed_text.append("my")
|
112 |
elif token.text.lower() == name and token.dep_ in ["nsubj", "nsubjpass"]:
|
113 |
+
transformed_text.append("I")
|
114 |
+
elif token.text == "βs" and doc[i-1].text.lower() == name:
|
|
|
115 |
transformed_text[-1] = "my"
|
116 |
+
elif token.text.lower() == "their":
|
117 |
+
transformed_text.append("our")
|
118 |
+
elif token.text.lower() == "they":
|
119 |
+
transformed_text.append("we")
|
120 |
else:
|
121 |
+
transformed_text.append(token.text)
|
122 |
elif self.gender == "F":
|
123 |
if token.text.lower() == "she":
|
124 |
transformed_text.append("I")
|
125 |
elif token.text.lower() == "her":
|
126 |
+
if i != len(doc)-1 and doc[i+2].dep_ in ["nsubj", "nsubjpass", "dobj", "appos", "dative", "attr", "amod", "nummod", "compound", "pobj", "pcomp"]:
|
127 |
+
transformed_text.append("my")
|
128 |
else:
|
129 |
+
transformed_text.append("me")
|
130 |
elif token.text.lower() == name and token.dep_ in ["nsubj", "nsubjpass"]:
|
131 |
+
transformed_text.append("I")
|
132 |
+
elif token.text == "βs" and doc[i-1].text.lower() == name:
|
|
|
133 |
transformed_text[-1] = "my"
|
134 |
+
elif token.text.lower() == "their":
|
135 |
+
transformed_text.append("our")
|
136 |
+
elif token.text.lower() == "they":
|
137 |
+
transformed_text.append("we")
|
138 |
else:
|
139 |
+
transformed_text.append(token.text)
|
140 |
|
141 |
+
return "".join(transformed_text)
|
142 |
|
143 |
def question_answer(self, instruction='', knowledge=''):
|
144 |
+
instruction = f"Your name is {self.name}. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
|
145 |
if self.text != "":
|
146 |
+
if re.search(re.compile(rf'\b(you|your|yours)\b', flags=re.IGNORECASE), self.text) != None:
|
147 |
+
knowledge = self.retrieve_knowledge_assertions()
|
|
|
|
|
|
|
|
|
148 |
else:
|
149 |
+
knowledge = self.retrieve_knowledge_assertions(change_person=False)
|
150 |
+
|
151 |
query = f"Context: {instruction} {knowledge}\n\nQuestion: {self.text}\n\nAnswer:"
|
152 |
+
input_ids = self.QA_tokenizer(f"{query}", return_tensors="pt").input_ids.to(self.QA_model.device)
|
153 |
+
outputs = self.QA_model.generate(input_ids, max_length=1024, min_length=8, do_sample=True, temperature=0.2, repetition_penalty=2.5)
|
154 |
self.text = self.QA_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
155 |
+
# self.text = " ".join([i.text.strip().capitalize() for i in self.spacy_model(self.text).sents])
|
156 |
return self.text
|
157 |
|
158 |
@staticmethod
|
159 |
def action_time():
|
160 |
return f"it's {datetime.datetime.now().time().strftime('%H:%M')}"
|
161 |
|
162 |
+
@staticmethod
|
163 |
def save_kb(kb, filename):
|
164 |
with open(filename, "wb") as f:
|
165 |
pickle.dump(kb, f)
|
embeds/Helen_Mirren.npy
CHANGED
Binary files a/embeds/Helen_Mirren.npy and b/embeds/Helen_Mirren.npy differ
|
|
rtvc/synthesizer/utils/cleaners.py
CHANGED
@@ -30,8 +30,8 @@ _alphabet2pronunciation = {
|
|
30 |
'g': 'jee',
|
31 |
'H': 'eiich',
|
32 |
'h': 'eiich',
|
33 |
-
'I': '
|
34 |
-
'i': '
|
35 |
'J': 'jay',
|
36 |
'j': 'jay',
|
37 |
'K': 'kay',
|
|
|
30 |
'g': 'jee',
|
31 |
'H': 'eiich',
|
32 |
'h': 'eiich',
|
33 |
+
'I': 'I',
|
34 |
+
'i': 'I',
|
35 |
'J': 'jay',
|
36 |
'j': 'jay',
|
37 |
'K': 'kay',
|
run_cli.py
CHANGED
@@ -10,8 +10,7 @@ QA_MODEL_ID = "google/flan-t5-large"
|
|
10 |
SENTTR_MODEL_ID = "sentence-transformers/all-mpnet-base-v2"
|
11 |
|
12 |
def main():
|
13 |
-
|
14 |
-
celeb_data = json.load(json_file)
|
15 |
message = "Please choose your favorite celebrity from\n"\
|
16 |
"1. Cate Blanchett\n"\
|
17 |
"2. David Beckham\n"\
|
@@ -25,37 +24,23 @@ def main():
|
|
25 |
sentTr_tokenizer = AutoTokenizer.from_pretrained(SENTTR_MODEL_ID)
|
26 |
sentTr_model = AutoModel.from_pretrained(SENTTR_MODEL_ID)
|
27 |
|
28 |
-
|
29 |
-
gender = celeb_data[
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
names_regex = re.compile(rf'\b({names})\b')
|
39 |
-
if gender == "M":
|
40 |
-
knowledge = re.sub(he_regex, "I", knowledge)
|
41 |
-
knowledge = re.sub(his_regex, "my", knowledge)
|
42 |
-
elif gender == "F":
|
43 |
-
knowledge = re.sub(she_regex, "I", knowledge)
|
44 |
-
knowledge = re.sub(her_regex, "my", knowledge)
|
45 |
-
knowledge = re.sub(names_regex, "my", knowledge)
|
46 |
-
knowledge = re.sub(lnames_regex, "my", knowledge)
|
47 |
-
knowledge = re.sub(name_regex, "I", knowledge)
|
48 |
-
knowledge = re.sub(lname_regex, "I", knowledge)
|
49 |
-
|
50 |
-
spacy_model = spacy.load("en_core_web_sm")
|
51 |
knowledge_sents = [i.text.strip() for i in spacy_model(knowledge).sents]
|
52 |
|
53 |
-
ai = CelebBot(
|
54 |
|
55 |
answers = []
|
56 |
|
57 |
while True:
|
58 |
-
# for q in celeb_data[ai.name_id][ai.name]["questions"]:
|
59 |
if not DEBUG:
|
60 |
ai.speech_to_text()
|
61 |
else:
|
|
|
10 |
SENTTR_MODEL_ID = "sentence-transformers/all-mpnet-base-v2"
|
11 |
|
12 |
def main():
|
13 |
+
celeb_data = get_celeb_data("data.json")
|
|
|
14 |
message = "Please choose your favorite celebrity from\n"\
|
15 |
"1. Cate Blanchett\n"\
|
16 |
"2. David Beckham\n"\
|
|
|
24 |
sentTr_tokenizer = AutoTokenizer.from_pretrained(SENTTR_MODEL_ID)
|
25 |
sentTr_model = AutoModel.from_pretrained(SENTTR_MODEL_ID)
|
26 |
|
27 |
+
celeb_name = input(message)
|
28 |
+
gender = celeb_data[celeb_name]["gender"]
|
29 |
+
if celeb_name == "Madonna":
|
30 |
+
name = "Madonna-American-singer-and-actress"
|
31 |
+
elif celeb_name == "Anne Hathaway":
|
32 |
+
name = "Anne-Hathaway-American-actress"
|
33 |
+
else:
|
34 |
+
name="-".join(celeb_name.split(" "))
|
35 |
+
knowledge = get_article(f"https://www.britannica.com/biography/{name}")
|
36 |
+
spacy_model = spacy.load("en_core_web_lg")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
knowledge_sents = [i.text.strip() for i in spacy_model(knowledge).sents]
|
38 |
|
39 |
+
ai = CelebBot(celeb_name, gender, QA_tokenizer, QA_model, sentTr_tokenizer, sentTr_model, spacy_model, knowledge_sents)
|
40 |
|
41 |
answers = []
|
42 |
|
43 |
while True:
|
|
|
44 |
if not DEBUG:
|
45 |
ai.speech_to_text()
|
46 |
else:
|
run_eval.py
CHANGED
@@ -9,82 +9,58 @@ import torch
|
|
9 |
from utils import *
|
10 |
from celebbot import CelebBot
|
11 |
|
12 |
-
QA_MODEL_ID = "google/flan-t5-
|
13 |
SENTTR_MODEL_ID = "sentence-transformers/all-mpnet-base-v2"
|
14 |
celeb_names = ["Cate Blanchett", "David Beckham", "Emma Watson", "Lady Gaga", "Madonna", "Mark Zuckerberg"]
|
15 |
|
16 |
-
|
|
|
|
|
|
|
17 |
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
predictions = []
|
24 |
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
lname = name.split(" ")[-1]
|
35 |
-
lname_regex = re.compile(rf'\b({lname})\b')
|
36 |
-
name_regex = re.compile(rf'\b({name})\b')
|
37 |
-
lnames = lname+"βs" if not lname.endswith("s") else lname+"β"
|
38 |
-
lnames_regex = re.compile(rf'\b({lnames})\b')
|
39 |
-
names = name+"βs" if not name.endswith("s") else name+"β"
|
40 |
-
names_regex = re.compile(rf'\b({names})\b')
|
41 |
-
if gender == "M":
|
42 |
-
knowledge = re.sub(he_regex, "I", knowledge)
|
43 |
-
knowledge = re.sub(his_regex, "my", knowledge)
|
44 |
-
elif gender == "F":
|
45 |
-
knowledge = re.sub(she_regex, "I", knowledge)
|
46 |
-
knowledge = re.sub(her_regex, "my", knowledge)
|
47 |
-
knowledge = re.sub(names_regex, "my", knowledge)
|
48 |
-
knowledge = re.sub(lnames_regex, "my", knowledge)
|
49 |
-
knowledge = re.sub(name_regex, "I", knowledge)
|
50 |
-
knowledge = re.sub(lname_regex, "I", knowledge)
|
51 |
-
|
52 |
-
spacy_model = spacy.load("en_core_web_sm")
|
53 |
-
knowledge_sents = [i.text.strip() for i in spacy_model(knowledge).sents]
|
54 |
|
55 |
-
|
56 |
-
|
57 |
-
instruction1 = f"You are a celebrity named {ai.name}. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
|
58 |
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
predictions+= ai.QA_tokenizer.batch_decode(outputs, skip_special_tokens=True)
|
66 |
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
if __name__ == "__main__":
|
89 |
-
evaluate_system()
|
90 |
-
|
|
|
9 |
from utils import *
|
10 |
from celebbot import CelebBot
|
11 |
|
12 |
+
QA_MODEL_ID = "google/flan-t5-large"
|
13 |
SENTTR_MODEL_ID = "sentence-transformers/all-mpnet-base-v2"
|
14 |
celeb_names = ["Cate Blanchett", "David Beckham", "Emma Watson", "Lady Gaga", "Madonna", "Mark Zuckerberg"]
|
15 |
|
16 |
+
celeb_data = get_celeb_data("data.json")
|
17 |
+
references = [val['answers'] for key, val in list(celeb_data.items()) if key in celeb_names]
|
18 |
+
references = list(itertools.chain.from_iterable(references))
|
19 |
+
predictions = []
|
20 |
|
21 |
+
device = 'cpu'
|
22 |
+
QA_tokenizer = AutoTokenizer.from_pretrained(QA_MODEL_ID)
|
23 |
+
QA_model = AutoModelForSeq2SeqLM.from_pretrained(QA_MODEL_ID).to(device)
|
24 |
+
sentTr_tokenizer = AutoTokenizer.from_pretrained(SENTTR_MODEL_ID)
|
25 |
+
sentTr_model = AutoModel.from_pretrained(SENTTR_MODEL_ID).to(device)
|
|
|
26 |
|
27 |
+
for celeb_name in celeb_names:
|
28 |
+
gender = celeb_data[celeb_name]["gender"]
|
29 |
+
if celeb_name == "Madonna":
|
30 |
+
name = "Madonna-American-singer-and-actress"
|
31 |
+
elif celeb_name == "Anne Hathaway":
|
32 |
+
name = "Anne-Hathaway-American-actress"
|
33 |
+
else:
|
34 |
+
name="-".join(celeb_name.split(" "))
|
35 |
+
knowledge = get_article(f"https://www.britannica.com/biography/{name}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
+
spacy_model = spacy.load("en_core_web_lg")
|
38 |
+
knowledge_sents = [i.text.strip() for i in spacy_model(knowledge).sents]
|
|
|
39 |
|
40 |
+
ai = CelebBot(celeb_name, gender, QA_tokenizer, QA_model, sentTr_tokenizer, sentTr_model, spacy_model, knowledge_sents)
|
41 |
+
for q in celeb_data[celeb_name]["questions"]:
|
42 |
+
ai.text = q
|
43 |
+
response = ai.question_answer()
|
44 |
+
print("response:", response)
|
45 |
+
predictions.append(response)
|
|
|
46 |
|
47 |
+
file = open('predictions.txt','w')
|
48 |
+
for prediction in predictions:
|
49 |
+
file.write(prediction+"\n")
|
50 |
+
file.close()
|
51 |
|
52 |
+
bleu = evaluate.load("bleu")
|
53 |
+
results = bleu.compute(predictions=predictions, references=references, max_order=4)
|
54 |
+
print(f"BLEU: {round(results['bleu'], 2)}")
|
55 |
|
56 |
+
meteor = evaluate.load("meteor")
|
57 |
+
results = meteor.compute(predictions=predictions, references=references)
|
58 |
+
print(f"METEOR: {round(results['meteor'], 2)}")
|
59 |
|
60 |
+
rouge = evaluate.load("rouge")
|
61 |
+
results = rouge.compute(predictions=predictions, references=references)
|
62 |
+
print(f"ROUGE: {round(results['rougeL'], 2)}")
|
63 |
|
64 |
+
bertscore = evaluate.load("bertscore")
|
65 |
+
results = bertscore.compute(predictions=predictions, references=references, rescale_with_baseline=True, lang="en")
|
66 |
+
print(f"F1: {round(sum(results['f1'])/len(results['f1']), 2)}")
|
|
|
|
|
|
|
|
run_tts.py
CHANGED
@@ -119,8 +119,8 @@ def tts(text, embed_name, nlp, autoplay=True):
|
|
119 |
|
120 |
if __name__ == "__main__":
|
121 |
text = "Adkins was raised by a young single mother in various working-class neighbourhoods of London."
|
122 |
-
embed_name = "
|
123 |
-
nlp = spacy.load('
|
124 |
b64 = tts(text, embed_name, nlp, autoplay=False)
|
125 |
|
126 |
md = f"""
|
|
|
119 |
|
120 |
if __name__ == "__main__":
|
121 |
text = "Adkins was raised by a young single mother in various working-class neighbourhoods of London."
|
122 |
+
embed_name = "Helen_Mirren"
|
123 |
+
nlp = spacy.load('en_core_web_lg')
|
124 |
b64 = tts(text, embed_name, nlp, autoplay=False)
|
125 |
|
126 |
md = f"""
|
utils.py
CHANGED
@@ -36,7 +36,6 @@ def get_celeb_data(fpath):
|
|
36 |
with open(fpath, encoding='UTF-8') as json_file:
|
37 |
return json.load(json_file)
|
38 |
|
39 |
-
|
40 |
def get_article(url):
|
41 |
req = Request(
|
42 |
url=url,
|
@@ -61,7 +60,7 @@ def get_article(url):
|
|
61 |
# drop blank lines
|
62 |
text = ' '.join(chunk for chunk in chunks if chunk)
|
63 |
return text
|
64 |
-
|
65 |
except:
|
66 |
st.markdown("The internet is not stable.")
|
67 |
return ""
|
@@ -72,10 +71,5 @@ def get_spacy_model(model_id):
|
|
72 |
|
73 |
def preprocess_text(name, text:str, model_id):
|
74 |
spacy_model = get_spacy_model(model_id)
|
75 |
-
|
76 |
-
text = text.replace("β", "'")
|
77 |
-
text = text.replace("β", "'")
|
78 |
-
text = text.replace("β", "\"")
|
79 |
-
text = text.replace("β", "\"")
|
80 |
texts = [i.text.strip() for i in spacy_model(text).sents]
|
81 |
return spacy_model, texts
|
|
|
36 |
with open(fpath, encoding='UTF-8') as json_file:
|
37 |
return json.load(json_file)
|
38 |
|
|
|
39 |
def get_article(url):
|
40 |
req = Request(
|
41 |
url=url,
|
|
|
60 |
# drop blank lines
|
61 |
text = ' '.join(chunk for chunk in chunks if chunk)
|
62 |
return text
|
63 |
+
|
64 |
except:
|
65 |
st.markdown("The internet is not stable.")
|
66 |
return ""
|
|
|
71 |
|
72 |
def preprocess_text(name, text:str, model_id):
|
73 |
spacy_model = get_spacy_model(model_id)
|
|
|
|
|
|
|
|
|
|
|
74 |
texts = [i.text.strip() for i in spacy_model(text).sents]
|
75 |
return spacy_model, texts
|