Fix problems
Browse files
app.py
CHANGED
@@ -3,6 +3,9 @@ from transformers import pipeline
|
|
3 |
import numpy as np
|
4 |
import pandas as pd
|
5 |
from sentence_transformers import SentenceTransformer, util
|
|
|
|
|
|
|
6 |
|
7 |
# Loading in quotes dataset
|
8 |
df = pd.read_json("krishnamurti_df.json")
|
@@ -10,12 +13,13 @@ df = pd.read_json("krishnamurti_df.json")
|
|
10 |
# Loading back in our sentence similarity and language model
|
11 |
model = SentenceTransformer("msmarco-roberta-base-v3") # best performing model
|
12 |
|
13 |
-
krishnamurti_generator = pipeline("text-generation", model="distilgpt2"
|
14 |
|
15 |
############### DEFINING FUNCTIONS ###########################
|
16 |
|
17 |
def ask_krishnamurti(question):
|
18 |
-
answer = krishnamurti_generator(question,)[0]['generated_text']
|
|
|
19 |
return answer
|
20 |
|
21 |
def get_similar_quotes(question):
|
@@ -24,7 +28,7 @@ def get_similar_quotes(question):
|
|
24 |
ind = np.argpartition(sims, -5)[-5:]
|
25 |
similar_sentences = [df['Quotes'][i] for i in ind]
|
26 |
top5quotes = pd.DataFrame(data = similar_sentences, columns=["Quotes"], index=range(1,6))
|
27 |
-
top5quotes['Quotes'] = top5quotes['Quotes'].str[:
|
28 |
return top5quotes
|
29 |
|
30 |
def main(question):
|
|
|
3 |
import numpy as np
|
4 |
import pandas as pd
|
5 |
from sentence_transformers import SentenceTransformer, util
|
6 |
+
import nltk
|
7 |
+
from nltk import sent_tokenize
|
8 |
+
nltk.download("punkt")
|
9 |
|
10 |
# Loading in quotes dataset
|
11 |
df = pd.read_json("krishnamurti_df.json")
|
|
|
13 |
# Loading back in our sentence similarity and language model
|
14 |
model = SentenceTransformer("msmarco-roberta-base-v3") # best performing model
|
15 |
|
16 |
+
krishnamurti_generator = pipeline("text-generation", model="distilgpt2")
|
17 |
|
18 |
############### DEFINING FUNCTIONS ###########################
|
19 |
|
20 |
def ask_krishnamurti(question):
|
21 |
+
answer = krishnamurti_generator(question, min_length=40, max_length=60)[0]['generated_text'] # generate about 50 word tokens
|
22 |
+
answer = " ".join(sent_tokenize(answer)[:5]) # Get the first five sentences
|
23 |
return answer
|
24 |
|
25 |
def get_similar_quotes(question):
|
|
|
28 |
ind = np.argpartition(sims, -5)[-5:]
|
29 |
similar_sentences = [df['Quotes'][i] for i in ind]
|
30 |
top5quotes = pd.DataFrame(data = similar_sentences, columns=["Quotes"], index=range(1,6))
|
31 |
+
top5quotes['Quotes'] = top5quotes['Quotes'].str[:-1].str[:250] + "..."
|
32 |
return top5quotes
|
33 |
|
34 |
def main(question):
|