Spaces:
Running
Running
Update helper_functions_api.py
Browse files- helper_functions_api.py +26 -11
helper_functions_api.py
CHANGED
@@ -73,16 +73,26 @@ import trafilatura
|
|
73 |
llm_default_small = "meta-llama/Llama-3-8b-chat-hf"
|
74 |
llm_default_medium = "meta-llama/Llama-3-70b-chat-hf"
|
75 |
|
76 |
-
SysPromptData = """You are expert in information extraction from the given context.
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
|
82 |
-
|
83 |
-
|
84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
"""
|
|
|
86 |
SysPromptDefault = "You are an expert AI, complete the given task. Do not add any additional comments."
|
87 |
SysPromptSearch = """You are a search query generator, create a concise Google search query, focusing only on the main topic and omitting additional redundant details, include year if necessory, 2024, Do not add any additional comments. OUTPUT ONLY THE SEARCH QUERY
|
88 |
#Additional instructions:
|
@@ -164,9 +174,14 @@ def remove_stopwords(text):
|
|
164 |
def rephrase_content(data_format, content, query):
|
165 |
|
166 |
if data_format == "Structured data":
|
167 |
-
return together_response(f"""
|
168 |
-
|
169 |
-
|
|
|
|
|
|
|
|
|
|
|
170 |
SysPrompt=SysPromptData,
|
171 |
max_tokens=900,
|
172 |
)
|
|
|
73 |
llm_default_small = "meta-llama/Llama-3-8b-chat-hf"
|
74 |
llm_default_medium = "meta-llama/Llama-3-70b-chat-hf"
|
75 |
|
76 |
+
# SysPromptData = """You are expert in information extraction from the given context.
|
77 |
+
# Steps to follow:
|
78 |
+
# 1. Check if relevant factual data regarding <USER QUERY> is present in the <SCRAPED DATA>.
|
79 |
+
# - IF YES, extract the maximum relevant factual information related to <USER QUERY> from the <SCRAPED DATA>.
|
80 |
+
# - IF NO, then return "N/A"
|
81 |
|
82 |
+
# Rules to follow:
|
83 |
+
# - Return N/A if information is not present in the scraped data.
|
84 |
+
# - FORGET EVERYTHING YOU KNOW, Only output information that is present in the scraped data, DO NOT MAKE UP INFORMATION
|
85 |
+
# """
|
86 |
+
SysPromptData = """
|
87 |
+
You are an AI assistant tasked with extracting relevant information from scraped website data based on a given query. Your goal is to provide accurate and concise information that directly relates to the query, using only the data provided.
|
88 |
+
Guidelines for extraction:
|
89 |
+
1. Only use information present in the scraped data.
|
90 |
+
2. Focus on extracting facts, tables, and direct quotes that are relevant to the query.
|
91 |
+
3. If there is no relevant information in the scraped data, state that clearly.
|
92 |
+
4. Do not make assumptions or add information not present in the data.
|
93 |
+
5. If the query is ambiguous, interpret it in the most reasonable way based on the available data.
|
94 |
"""
|
95 |
+
|
96 |
SysPromptDefault = "You are an expert AI, complete the given task. Do not add any additional comments."
|
97 |
SysPromptSearch = """You are a search query generator, create a concise Google search query, focusing only on the main topic and omitting additional redundant details, include year if necessory, 2024, Do not add any additional comments. OUTPUT ONLY THE SEARCH QUERY
|
98 |
#Additional instructions:
|
|
|
174 |
def rephrase_content(data_format, content, query):
|
175 |
|
176 |
if data_format == "Structured data":
|
177 |
+
return together_response(f"""Here is the scraped website data:
|
178 |
+
<scraped_data>
|
179 |
+
{content}
|
180 |
+
</scraped_data>
|
181 |
+
|
182 |
+
Your task is to extract information from this data that is relevant to the following query:
|
183 |
+
<query>{query}</query>
|
184 |
+
""",
|
185 |
SysPrompt=SysPromptData,
|
186 |
max_tokens=900,
|
187 |
)
|