Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -210,11 +210,14 @@ def search(prompt: str):
|
|
210 |
model="mixtral-8x22b-finetuned",
|
211 |
generation_kwargs = {"max_tokens": 512}
|
212 |
)
|
|
|
|
|
213 |
querying = Pipeline()
|
214 |
querying.add_component("sparse_text_embedder", FastembedSparseTextEmbedder(model="Qdrant/bm42-all-minilm-l6-v2-attentions"))
|
215 |
querying.add_component("dense_text_embedder", FastembedTextEmbedder(
|
216 |
model="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", prefix="Represent this sentence for searching relevant passages: ")
|
217 |
)
|
|
|
218 |
querying.add_component("retriever", QdrantHybridRetriever(document_store=document_store))
|
219 |
querying.add_component("document_joiner", DocumentJoiner())
|
220 |
querying.add_component("ranker", TransformersSimilarityRanker(model="BAAI/bge-m3"))
|
@@ -223,17 +226,20 @@ def search(prompt: str):
|
|
223 |
|
224 |
querying.connect("sparse_text_embedder.sparse_embedding", "retriever.query_sparse_embedding")
|
225 |
querying.connect("dense_text_embedder.embedding", "retriever.query_embedding")
|
|
|
226 |
querying.connect("retriever", "document_joiner")
|
227 |
querying.connect("document_joiner", "ranker")
|
228 |
querying.connect("ranker.documents", "prompt_builder.documents")
|
229 |
querying.connect("prompt_builder", "llm")
|
230 |
querying.debug=True
|
231 |
-
|
232 |
results = querying.run(
|
233 |
-
{
|
234 |
-
|
235 |
-
|
236 |
-
|
|
|
|
|
237 |
}
|
238 |
)
|
239 |
|
|
|
210 |
model="mixtral-8x22b-finetuned",
|
211 |
generation_kwargs = {"max_tokens": 512}
|
212 |
)
|
213 |
+
metadata_extractor = QueryMetadataExtractor()
|
214 |
+
|
215 |
querying = Pipeline()
|
216 |
querying.add_component("sparse_text_embedder", FastembedSparseTextEmbedder(model="Qdrant/bm42-all-minilm-l6-v2-attentions"))
|
217 |
querying.add_component("dense_text_embedder", FastembedTextEmbedder(
|
218 |
model="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", prefix="Represent this sentence for searching relevant passages: ")
|
219 |
)
|
220 |
+
querying.add_component(instance=metadata_extractor, name="metadata_extractor")
|
221 |
querying.add_component("retriever", QdrantHybridRetriever(document_store=document_store))
|
222 |
querying.add_component("document_joiner", DocumentJoiner())
|
223 |
querying.add_component("ranker", TransformersSimilarityRanker(model="BAAI/bge-m3"))
|
|
|
226 |
|
227 |
querying.connect("sparse_text_embedder.sparse_embedding", "retriever.query_sparse_embedding")
|
228 |
querying.connect("dense_text_embedder.embedding", "retriever.query_embedding")
|
229 |
+
querying.connect("metadata_extractor.filters", "retriever.filters")
|
230 |
querying.connect("retriever", "document_joiner")
|
231 |
querying.connect("document_joiner", "ranker")
|
232 |
querying.connect("ranker.documents", "prompt_builder.documents")
|
233 |
querying.connect("prompt_builder", "llm")
|
234 |
querying.debug=True
|
235 |
+
metadata_fields = {"publish_date", "publisher", "document_type"}
|
236 |
results = querying.run(
|
237 |
+
{
|
238 |
+
"dense_text_embedder": {"text": prompt},
|
239 |
+
"sparse_text_embedder": {"text": prompt},
|
240 |
+
"metadata_extractor": {"query": prompt, "metadata_fields": metadata_fields},
|
241 |
+
"ranker": {"query": prompt},
|
242 |
+
"prompt_builder": {"question": prompt}
|
243 |
}
|
244 |
)
|
245 |
|