Update app.py
Browse files
app.py
CHANGED
@@ -6,18 +6,18 @@ from langchain.docstore.document import Document
|
|
6 |
from datasets import load_dataset, concatenate_datasets
|
7 |
|
8 |
st.set_page_config(
|
9 |
-
page_title="
|
10 |
-
page_icon="
|
11 |
layout="wide"
|
12 |
)
|
13 |
|
14 |
class RetrieverTool(Tool):
|
15 |
name = "retriever"
|
16 |
-
description = "Uses BM25 search to retrieve relevant
|
17 |
inputs = {
|
18 |
"query": {
|
19 |
"type": "string",
|
20 |
-
"description": "The
|
21 |
}
|
22 |
}
|
23 |
output_type = "string"
|
@@ -102,20 +102,19 @@ def format_search_results(results: str):
|
|
102 |
def get_agent():
|
103 |
"""Single function to handle data loading, processing, and agent creation"""
|
104 |
# Load dataset
|
105 |
-
dataset = load_dataset("
|
106 |
train_docs = dataset["train"]
|
107 |
-
|
108 |
-
source_docs = concatenate_datasets([train_docs, test_docs])
|
109 |
|
110 |
# Create documents
|
111 |
documents = [
|
112 |
Document(
|
113 |
-
page_content=item['
|
114 |
metadata={
|
115 |
-
|
116 |
-
"title": item['
|
117 |
-
"description": item['
|
118 |
-
"published_time": item['publishedTime']
|
119 |
}
|
120 |
)
|
121 |
for item in source_docs
|
@@ -138,41 +137,41 @@ def get_agent():
|
|
138 |
)
|
139 |
|
140 |
# Streamlit UI
|
141 |
-
st.title("
|
142 |
st.markdown("""
|
143 |
-
This search engine uses advanced AI to help you explore
|
144 |
-
It provides detailed, sourced information from a curated database of
|
145 |
""")
|
146 |
|
147 |
# Initialize agent
|
148 |
if 'agent' not in st.session_state:
|
149 |
-
with st.spinner("Loading
|
150 |
st.session_state.agent = get_agent()
|
151 |
|
152 |
# Search interface
|
153 |
search_query = st.text_input(
|
154 |
"π Search African History",
|
155 |
-
placeholder="E.g., Tell me about
|
156 |
-
help="Enter any question about
|
157 |
)
|
158 |
|
159 |
# Advanced search options
|
160 |
with st.expander("Advanced Search Options"):
|
161 |
search_type = st.radio(
|
162 |
"Search Type",
|
163 |
-
["General Query", "
|
164 |
help="Select the type of search you want to perform"
|
165 |
)
|
166 |
|
167 |
-
if search_type == "
|
168 |
-
search_query = f"Focus on the
|
169 |
elif search_type == "Geographic Region":
|
170 |
search_query = f"Focus on the region of: {search_query}"
|
171 |
|
172 |
# Search button
|
173 |
if st.button("Search", type="primary"):
|
174 |
if search_query:
|
175 |
-
with st.spinner("Searching
|
176 |
try:
|
177 |
results = st.session_state.agent.run(search_query)
|
178 |
|
@@ -199,7 +198,7 @@ with st.sidebar:
|
|
199 |
st.markdown("### About This Search Engine")
|
200 |
st.markdown("""
|
201 |
This search engine specializes in African history, providing:
|
202 |
-
- π Detailed
|
203 |
- π Source verification
|
204 |
- π Geographic context
|
205 |
- β³ Historical timeline context
|
@@ -211,4 +210,4 @@ with st.sidebar:
|
|
211 |
|
212 |
# Footer
|
213 |
st.markdown("---")
|
214 |
-
st.caption("Powered by SmolAgents, RAG, and
|
|
|
6 |
from datasets import load_dataset, concatenate_datasets
|
7 |
|
8 |
st.set_page_config(
|
9 |
+
page_title="Science Search Engine",
|
10 |
+
page_icon="π",
|
11 |
layout="wide"
|
12 |
)
|
13 |
|
14 |
class RetrieverTool(Tool):
|
15 |
name = "retriever"
|
16 |
+
description = "Uses BM25 search to retrieve relevant scientific documentation"
|
17 |
inputs = {
|
18 |
"query": {
|
19 |
"type": "string",
|
20 |
+
"description": "The scientific query in affirmative form rather than a question"
|
21 |
}
|
22 |
}
|
23 |
output_type = "string"
|
|
|
102 |
def get_agent():
|
103 |
"""Single function to handle data loading, processing, and agent creation"""
|
104 |
# Load dataset
|
105 |
+
dataset = load_dataset("camel-ai/biology")
|
106 |
train_docs = dataset["train"]
|
107 |
+
source_docs = concatenate_datasets([train_docs])
|
|
|
108 |
|
109 |
# Create documents
|
110 |
documents = [
|
111 |
Document(
|
112 |
+
page_content=item['message_2'],
|
113 |
metadata={
|
114 |
+
# "source": item['url'],
|
115 |
+
"title": item['message_1'],
|
116 |
+
"description": item['sub_topic'],
|
117 |
+
# "published_time": item['publishedTime']
|
118 |
}
|
119 |
)
|
120 |
for item in source_docs
|
|
|
137 |
)
|
138 |
|
139 |
# Streamlit UI
|
140 |
+
st.title("π Scientific Search Engine")
|
141 |
st.markdown("""
|
142 |
+
This search engine uses advanced AI to help you explore science.
|
143 |
+
It provides detailed, sourced information from a curated database of scientific knowledge.
|
144 |
""")
|
145 |
|
146 |
# Initialize agent
|
147 |
if 'agent' not in st.session_state:
|
148 |
+
with st.spinner("Loading database..."):
|
149 |
st.session_state.agent = get_agent()
|
150 |
|
151 |
# Search interface
|
152 |
search_query = st.text_input(
|
153 |
"π Search African History",
|
154 |
+
placeholder="E.g., Tell me about cancer in dogs",
|
155 |
+
help="Enter any question about science"
|
156 |
)
|
157 |
|
158 |
# Advanced search options
|
159 |
with st.expander("Advanced Search Options"):
|
160 |
search_type = st.radio(
|
161 |
"Search Type",
|
162 |
+
["General Query", "Scientific branches"],
|
163 |
help="Select the type of search you want to perform"
|
164 |
)
|
165 |
|
166 |
+
if search_type == "Scientific branches":
|
167 |
+
search_query = f"Focus on the specific scientific branch of: {search_query}"
|
168 |
elif search_type == "Geographic Region":
|
169 |
search_query = f"Focus on the region of: {search_query}"
|
170 |
|
171 |
# Search button
|
172 |
if st.button("Search", type="primary"):
|
173 |
if search_query:
|
174 |
+
with st.spinner("Searching records..."):
|
175 |
try:
|
176 |
results = st.session_state.agent.run(search_query)
|
177 |
|
|
|
198 |
st.markdown("### About This Search Engine")
|
199 |
st.markdown("""
|
200 |
This search engine specializes in African history, providing:
|
201 |
+
- π Detailed information
|
202 |
- π Source verification
|
203 |
- π Geographic context
|
204 |
- β³ Historical timeline context
|
|
|
210 |
|
211 |
# Footer
|
212 |
st.markdown("---")
|
213 |
+
st.caption("Powered by SmolAgents, RAG, and Camel AI Dataset")
|