|
import logging |
|
from langchain_community.vectorstores.chroma import Chroma |
|
|
|
from src.logging import logging_info |
|
|
|
from .BaseDB import BaseDB |
|
|
|
|
|
|
|
|
|
class ChromaDB(BaseDB): |
|
def __init__(self, embedding_name: str = None, persist_dir=None) -> None: |
|
super().__init__(embedding_name, persist_dir) |
|
|
|
|
|
def init_db(self): |
|
self.client = Chroma( |
|
persist_directory=self.persist_dir, embedding_function=self.embedding |
|
) |
|
|
|
def addStories(self, stories: str, metas: dict = None): |
|
logging_info(self.text_splitter(stories)[-1]) |
|
|
|
split_stories = self.text_splitter(stories) |
|
|
|
self.client.add_texts( |
|
texts=split_stories, metadatas=[metas] * len(split_stories) |
|
) |
|
|
|
def searchBySim( |
|
self, query, n_results=5, metas: dict = None, only_return_document=True |
|
): |
|
result = self.client.similarity_search(query, k=n_results, filter=metas) |
|
|
|
|
|
|
|
if only_return_document: |
|
return [i.page_content for i in result] |
|
|
|
return result |
|
|
|
def deleteStoriesByMeta(self, metas): |
|
ids = self.searchByMeta(metas=metas)["ids"] |
|
if ids: |
|
self.client.delete(ids) |
|
|
|
|
|
def searchByMeta(self, metas=None, include: list[str] = None) -> dict[str, any]: |
|
return self.client.get(where=metas, include=include) |
|
|