from langchain.vectorstores import Chroma from langchain.embeddings import HuggingFaceEmbeddings from typing import List, Dict, Optional from tqdm import tqdm def load_and_setup_db( persist_directory: str, embeddings ) -> Chroma: """ Load the previously created ChromaDB with the same embedding model. Args: persist_directory: Directory where the database is stored embedding_model_name: Name of the embedding model to use Returns: Chroma: Loaded vector store """ # Load the existing database vectorstore = Chroma( embedding_function=embeddings, persist_directory=persist_directory ) return vectorstore def search_cases( vectorstore: Chroma, query: str, k: int = 5, metadata_filter: Optional[Dict] = None, score_threshold: Optional[float] = 0.0 ) -> List[Dict]: """ Search the database for relevant cases. Args: vectorstore: Loaded Chroma vector store query: Search query text k: Number of results to return metadata_filter: Optional filter for metadata fields score_threshold: Minimum similarity score threshold Returns: List of relevant documents with scores and metadata """ # Perform similarity search with metadata filtering docs_and_scores = vectorstore.similarity_search_with_score( query, k=k, filter=metadata_filter ) # Process and filter results results = [] for doc, score in docs_and_scores: # Convert score to similarity (assuming distance score) similarity = 1 - score # Apply score threshold if score_threshold and similarity < score_threshold: continue result = { 'content': doc.page_content, 'metadata': doc.metadata, 'similarity_score': round(similarity, 4) } results.append(result) if len(results)==0 and len(docs_and_scores)>0: results.append(docs_and_scores[0]) return results # Example usage function def search_and_display_results( vectorstore: Chroma, query: str, k: int = 5, metadata_filter: Optional[Dict] = None, score_threshold: float = 0.7 ) -> None: """ Search and display results in a formatted way. """ print(f"\nSearching for: {query}") print("-" * 50) results = search_cases( vectorstore=vectorstore, query=query, k=k, metadata_filter=metadata_filter, score_threshold=score_threshold ) if not results: print("No matching results found.") return print(f"Found {len(results)} relevant matches:\n") for i, result in enumerate(results, 1): print(f"Match {i}:") print(f"Similarity Score: {result['similarity_score']}") print(f"Metadata: {result['metadata']}") print(f"Content: {result['content'][:200]}...") # Show first 200 chars print("-" * 50)