cynthesis-v4 / agents /dataset_semantic_search_agent.py
abdoh-alkhateeb's picture
Narrow the size of the dataframe returned by DatasetSemanticSearchAgent
ad7bff9
raw
history blame contribute delete
No virus
1.06 kB
import pandas as pd
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
class DatasetSemanticSearchAgent:
def __init__(self, vector_store_path: str) -> None:
self._vector_store = FAISS.load_local(vector_store_path, HuggingFaceEmbeddings(), allow_dangerous_deserialization=True)
def run(self, query: str, limit: int = 10, score_threshold: int = 1.2) -> tuple[pd.DataFrame, dict[str, float]]:
docs_with_scores = self._vector_store.similarity_search_with_score(query, k=limit)
results = []
for doc, score in docs_with_scores:
if score >= score_threshold:
continue
result = doc.metadata
result["summary"] = doc.page_content
result["score"] = score
results.append(result)
df = pd.DataFrame(results)
df.rename(columns={"_id": "id", "full_text": "content"}, inplace=True)
df = df[["title", "author", "date", "url", "content"]]
return df, {"cost": 0}