pvanand commited on
Commit
9078d82
·
verified ·
1 Parent(s): 5a22824

Update helper_functions_api.py

Browse files
Files changed (1) hide show
  1. helper_functions_api.py +25 -30
helper_functions_api.py CHANGED
@@ -96,10 +96,8 @@ Guidelines for extraction:
96
  SysPromptDefault = "You are an expert AI, complete the given task. Do not add any additional comments."
97
  SysPromptSearch = """You are a search query generator, create a concise Google search query, focusing only on the main topic and omitting additional redundant details, include year if necessory, 2024, Do not add any additional comments. OUTPUT ONLY THE SEARCH QUERY
98
  #Additional instructions:
99
- ##Use the following search operators if necessory
100
- OR #to cover multiple topics
101
- * #wildcard to match any word or phrase
102
- AND #to include specific topics."""
103
 
104
  import tiktoken # Used to limit tokens
105
  encoding = tiktoken.encoding_for_model("gpt-3.5-turbo") # Instead of Llama3 using available option/ replace if found anything better
@@ -172,32 +170,29 @@ def remove_stopwords(text):
172
  return ' '.join(filtered_text)
173
 
174
  def rephrase_content(data_format, content, query):
175
-
176
- if data_format == "Structured data":
177
- return together_response(f"""Here is the scraped website data:
178
- <scraped_data>
179
- {content}
180
- </scraped_data>
181
-
182
- Your task is to extract information from this data that is relevant to the following query:
183
- <query>{query}</query>
184
- """,
185
- SysPrompt=SysPromptData,
186
- max_tokens=900,
187
- )
188
- elif data_format == "Quantitative data":
189
- return together_response(
190
- f"return only the numerical or quantitative data regarding the query: {{{query}}} structured into .md tables, using the scraped context:{{{limit_tokens(content,token_limit=1000)}}}",
191
- SysPrompt=SysPromptData,
192
- max_tokens=500,
193
- )
194
- else:
195
- return together_response(
196
- f"return only the factual information regarding the query: {{{query}}} using the scraped context:{{{limit_tokens(content,token_limit=1000)}}}",
197
- SysPrompt=SysPromptData,
198
- max_tokens=500,
199
- )
200
-
201
 
202
  def fetch_content(url):
203
  try:
 
96
  SysPromptDefault = "You are an expert AI, complete the given task. Do not add any additional comments."
97
  SysPromptSearch = """You are a search query generator, create a concise Google search query, focusing only on the main topic and omitting additional redundant details, include year if necessory, 2024, Do not add any additional comments. OUTPUT ONLY THE SEARCH QUERY
98
  #Additional instructions:
99
+ ##Use the following search operator if necessory
100
+ OR #to cover multiple topics"""
 
 
101
 
102
  import tiktoken # Used to limit tokens
103
  encoding = tiktoken.encoding_for_model("gpt-3.5-turbo") # Instead of Llama3 using available option/ replace if found anything better
 
170
  return ' '.join(filtered_text)
171
 
172
  def rephrase_content(data_format, content, query):
173
+ try:
174
+ if data_format == "Structured data":
175
+ return together_response(
176
+ f"""return only the relevant information regarding the query: {{{query}}}. Output should be concise chunks of \
177
+ paragraphs or tables or both, extracted from the following scraped context {{{limit_tokens(content,token_limit=2000)}}}""",
178
+ SysPrompt=SysPromptData,
179
+ max_tokens=900,
180
+ )
181
+ elif data_format == "Quantitative data":
182
+ return together_response(
183
+ f"return only the numerical or quantitative data regarding the query: {{{query}}} structured into .md tables, using the scraped context:{{{limit_tokens(content,token_limit=2000)}}}",
184
+ SysPrompt=SysPromptData,
185
+ max_tokens=500,
186
+ )
187
+ else:
188
+ return together_response(
189
+ f"return only the relevant information regarding the query: {{{query}}} using the scraped context:{{{limit_tokens(content,token_limit=2000)}}}",
190
+ SysPrompt=SysPromptData,
191
+ max_tokens=500,
192
+ )
193
+ except Exception as e:
194
+ print(f"An error occurred: {str(e)}")
195
+ return limit_tokens(content,token_limit=500)
 
 
 
196
 
197
  def fetch_content(url):
198
  try: