pvanand commited on
Commit
5a22824
1 Parent(s): c25e4d4

Update helper_functions_api.py

Browse files
Files changed (1) hide show
  1. helper_functions_api.py +26 -11
helper_functions_api.py CHANGED
@@ -73,16 +73,26 @@ import trafilatura
73
  llm_default_small = "meta-llama/Llama-3-8b-chat-hf"
74
  llm_default_medium = "meta-llama/Llama-3-70b-chat-hf"
75
 
76
- SysPromptData = """You are expert in information extraction from the given context.
77
- Steps to follow:
78
- 1. Check if relevant factual data regarding <USER QUERY> is present in the <SCRAPED DATA>.
79
- - IF YES, extract the maximum relevant factual information related to <USER QUERY> from the <SCRAPED DATA>.
80
- - IF NO, then return "N/A"
81
 
82
- Rules to follow:
83
- - Return N/A if information is not present in the scraped data.
84
- - FORGET EVERYTHING YOU KNOW, Only output information that is present in the scraped data, DO NOT MAKE UP INFORMATION
 
 
 
 
 
 
 
 
 
85
  """
 
86
  SysPromptDefault = "You are an expert AI, complete the given task. Do not add any additional comments."
87
  SysPromptSearch = """You are a search query generator, create a concise Google search query, focusing only on the main topic and omitting additional redundant details, include year if necessory, 2024, Do not add any additional comments. OUTPUT ONLY THE SEARCH QUERY
88
  #Additional instructions:
@@ -164,9 +174,14 @@ def remove_stopwords(text):
164
  def rephrase_content(data_format, content, query):
165
 
166
  if data_format == "Structured data":
167
- return together_response(f"""
168
- <SCRAPED DATA>{content}</SCRAPED DATA>
169
- extract the maximum relevant factual information covering all aspects of <USER QUERY>{query}</USER QUERY> ONLY IF AVAILABLE in the scraped data.""",
 
 
 
 
 
170
  SysPrompt=SysPromptData,
171
  max_tokens=900,
172
  )
 
73
  llm_default_small = "meta-llama/Llama-3-8b-chat-hf"
74
  llm_default_medium = "meta-llama/Llama-3-70b-chat-hf"
75
 
76
+ # SysPromptData = """You are expert in information extraction from the given context.
77
+ # Steps to follow:
78
+ # 1. Check if relevant factual data regarding <USER QUERY> is present in the <SCRAPED DATA>.
79
+ # - IF YES, extract the maximum relevant factual information related to <USER QUERY> from the <SCRAPED DATA>.
80
+ # - IF NO, then return "N/A"
81
 
82
+ # Rules to follow:
83
+ # - Return N/A if information is not present in the scraped data.
84
+ # - FORGET EVERYTHING YOU KNOW, Only output information that is present in the scraped data, DO NOT MAKE UP INFORMATION
85
+ # """
86
+ SysPromptData = """
87
+ You are an AI assistant tasked with extracting relevant information from scraped website data based on a given query. Your goal is to provide accurate and concise information that directly relates to the query, using only the data provided.
88
+ Guidelines for extraction:
89
+ 1. Only use information present in the scraped data.
90
+ 2. Focus on extracting facts, tables, and direct quotes that are relevant to the query.
91
+ 3. If there is no relevant information in the scraped data, state that clearly.
92
+ 4. Do not make assumptions or add information not present in the data.
93
+ 5. If the query is ambiguous, interpret it in the most reasonable way based on the available data.
94
  """
95
+
96
  SysPromptDefault = "You are an expert AI, complete the given task. Do not add any additional comments."
97
  SysPromptSearch = """You are a search query generator, create a concise Google search query, focusing only on the main topic and omitting additional redundant details, include year if necessory, 2024, Do not add any additional comments. OUTPUT ONLY THE SEARCH QUERY
98
  #Additional instructions:
 
174
  def rephrase_content(data_format, content, query):
175
 
176
  if data_format == "Structured data":
177
+ return together_response(f"""Here is the scraped website data:
178
+ <scraped_data>
179
+ {content}
180
+ </scraped_data>
181
+
182
+ Your task is to extract information from this data that is relevant to the following query:
183
+ <query>{query}</query>
184
+ """,
185
  SysPrompt=SysPromptData,
186
  max_tokens=900,
187
  )