eliwill commited on
Commit
d2c1c9d
·
1 Parent(s): ce1f130

Fix problems

Browse files
Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -3,6 +3,9 @@ from transformers import pipeline
3
  import numpy as np
4
  import pandas as pd
5
  from sentence_transformers import SentenceTransformer, util
 
 
 
6
 
7
  # Loading in quotes dataset
8
  df = pd.read_json("krishnamurti_df.json")
@@ -10,12 +13,13 @@ df = pd.read_json("krishnamurti_df.json")
10
  # Loading back in our sentence similarity and language model
11
  model = SentenceTransformer("msmarco-roberta-base-v3") # best performing model
12
 
13
- krishnamurti_generator = pipeline("text-generation", model="distilgpt2",)
14
 
15
  ############### DEFINING FUNCTIONS ###########################
16
 
17
  def ask_krishnamurti(question):
18
- answer = krishnamurti_generator(question,)[0]['generated_text']
 
19
  return answer
20
 
21
  def get_similar_quotes(question):
@@ -24,7 +28,7 @@ def get_similar_quotes(question):
24
  ind = np.argpartition(sims, -5)[-5:]
25
  similar_sentences = [df['Quotes'][i] for i in ind]
26
  top5quotes = pd.DataFrame(data = similar_sentences, columns=["Quotes"], index=range(1,6))
27
- top5quotes['Quotes'] = top5quotes['Quotes'].str[:500] + "..."
28
  return top5quotes
29
 
30
  def main(question):
 
3
  import numpy as np
4
  import pandas as pd
5
  from sentence_transformers import SentenceTransformer, util
6
+ import nltk
7
+ from nltk import sent_tokenize
8
+ nltk.download("punkt")
9
 
10
  # Loading in quotes dataset
11
  df = pd.read_json("krishnamurti_df.json")
 
13
  # Loading back in our sentence similarity and language model
14
  model = SentenceTransformer("msmarco-roberta-base-v3") # best performing model
15
 
16
+ krishnamurti_generator = pipeline("text-generation", model="distilgpt2")
17
 
18
  ############### DEFINING FUNCTIONS ###########################
19
 
20
  def ask_krishnamurti(question):
21
+ answer = krishnamurti_generator(question, min_length=40, max_length=60)[0]['generated_text'] # generate about 50 word tokens
22
+ answer = " ".join(sent_tokenize(answer)[:5]) # Get the first five sentences
23
  return answer
24
 
25
  def get_similar_quotes(question):
 
28
  ind = np.argpartition(sims, -5)[-5:]
29
  similar_sentences = [df['Quotes'][i] for i in ind]
30
  top5quotes = pd.DataFrame(data = similar_sentences, columns=["Quotes"], index=range(1,6))
31
+ top5quotes['Quotes'] = top5quotes['Quotes'].str[:-1].str[:250] + "..."
32
  return top5quotes
33
 
34
  def main(question):