Jingxiang Mo commited on
Commit
d4e5967
1 Parent(s): 77e7345

Code optimization and response wrap

Browse files
Files changed (3) hide show
  1. .env +1 -0
  2. __pycache__/app.cpython-39.pyc +0 -0
  3. app.py +32 -9
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ OPENAI_API_KEY="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"
__pycache__/app.cpython-39.pyc CHANGED
Binary files a/__pycache__/app.cpython-39.pyc and b/__pycache__/app.cpython-39.pyc differ
 
app.py CHANGED
@@ -13,6 +13,7 @@ from transformers import (
13
  )
14
  from transformers.pipelines import AggregationStrategy
15
  import torch
 
16
 
17
 
18
  # =====[ DEFINE PIPELINE ]===== #
@@ -33,6 +34,9 @@ class KeyphraseExtractionPipeline(TokenClassificationPipeline):
33
  return np.unique([result.get("word").strip() for result in results])
34
 
35
 
 
 
 
36
  # =====[ LOAD PIPELINE ]===== #
37
  keyPhraseExtractionModel = "ml6team/keyphrase-extraction-kbir-inspec"
38
  extractor = KeyphraseExtractionPipeline(model=keyPhraseExtractionModel)
@@ -44,14 +48,18 @@ tokenizer = BertTokenizer.from_pretrained(
44
  )
45
 
46
 
47
- def keyphrases_extraction(text: str) -> str:
48
- keyphrases = extractor(text)
49
- return keyphrases
50
 
 
 
 
 
 
 
51
 
52
- def wikipedia_search(input: str) -> str:
53
  input = input.replace("\n", " ")
54
- keyphrases = keyphrases_extraction(input)
55
 
56
  wiki = wk.Wikipedia("en")
57
 
@@ -78,15 +86,23 @@ def wikipedia_search(input: str) -> str:
78
  return "I cannot answer this question"
79
 
80
 
81
- def answer_question(question):
 
 
 
 
 
 
 
 
 
82
  context = wikipedia_search(question)
83
  if (context == "I cannot answer this question") or (
84
  context == "Can you add more details to your question?"
85
  ):
86
  return context
87
 
88
- # Tokenize
89
- # Apply the tokenizer to the input text, treating them as a text-pair.
90
  input_ids = tokenizer.encode(question, context)
91
  question_ids = input_ids[: input_ids.index(tokenizer.sep_token_id) + 1]
92
 
@@ -157,7 +173,14 @@ def answer_question(question):
157
  scores.append((max_start_score, max_end_score, answer))
158
 
159
  # Compare scores for answers found and each paragraph and pick the most relevant.
160
- return max(scores, key=lambda x: x[0] + x[1])[2]
 
 
 
 
 
 
 
161
 
162
 
163
  # =====[ DEFINE INTERFACE ]===== #'
 
13
  )
14
  from transformers.pipelines import AggregationStrategy
15
  import torch
16
+ from dotenv import load_dotenv
17
 
18
 
19
  # =====[ DEFINE PIPELINE ]===== #
 
34
  return np.unique([result.get("word").strip() for result in results])
35
 
36
 
37
+ load_dotenv()
38
+ openai.api_key = os.getenv("OPENAI_API_KEY")
39
+
40
  # =====[ LOAD PIPELINE ]===== #
41
  keyPhraseExtractionModel = "ml6team/keyphrase-extraction-kbir-inspec"
42
  extractor = KeyphraseExtractionPipeline(model=keyPhraseExtractionModel)
 
48
  )
49
 
50
 
51
+ def wikipedia_search(input: str) -> str:
52
+ """Perform a Wikipedia search using keyphrases.
 
53
 
54
+ Args:
55
+ input (str): The input text.
56
+
57
+ Returns:
58
+ str: The summary of the Wikipedia page.
59
+ """
60
 
 
61
  input = input.replace("\n", " ")
62
+ keyphrases = extractor(input)
63
 
64
  wiki = wk.Wikipedia("en")
65
 
 
86
  return "I cannot answer this question"
87
 
88
 
89
+ def answer_question(question: str) -> str:
90
+ """Answer the question using the context from the Wikipedia search.
91
+
92
+ Args:
93
+ question (str): The input question.
94
+
95
+ Returns:
96
+ str: The answer to the question.
97
+ """
98
+
99
  context = wikipedia_search(question)
100
  if (context == "I cannot answer this question") or (
101
  context == "Can you add more details to your question?"
102
  ):
103
  return context
104
 
105
+ # Tokenize and split input
 
106
  input_ids = tokenizer.encode(question, context)
107
  question_ids = input_ids[: input_ids.index(tokenizer.sep_token_id) + 1]
108
 
 
173
  scores.append((max_start_score, max_end_score, answer))
174
 
175
  # Compare scores for answers found and each paragraph and pick the most relevant.
176
+ answer = max(scores, key=lambda x: x[0] + x[1])[2]
177
+
178
+ response = openai.Completion.create(
179
+ model="text-davinci-003",
180
+ prompt="Answer the question " + question + "using this answer: " + answer,
181
+ max_tokens=3000,
182
+ )
183
+ return response.choices[0].text.replace("\n\n", " ")
184
 
185
 
186
  # =====[ DEFINE INTERFACE ]===== #'