import logging import os import random from datetime import datetime from functools import lru_cache from typing import Sequence from zoneinfo import ZoneInfo import langsmith from langchain_core.documents import Document from langchain_community.document_transformers import LongContextReorder from langchain.retrievers.document_compressors import FlashrankRerank logging.basicConfig(level=logging.ERROR) class DocumentFormatter: def __init__(self, prefix: str): self.prefix = prefix def __call__(self, docs: list[Document]) -> str: return "\n---\n".join( [ f"- {self.prefix} {i+1}:\n\n\t" + d.page_content for i, d in enumerate(docs) ] ) def get_datetime() -> str: return datetime.now(ZoneInfo("America/Vancouver")).strftime("%A, %Y-%b-%d %H:%M:%S") def reorder_documents(docs: list[Document]) -> Sequence[Document]: return LongContextReorder().transform_documents(docs) def randomize_documents(documents: list[Document]) -> list[Document]: random.shuffle(documents) return documents def create_langsmith_client(): os.environ["LANGCHAIN_TRACING_V2"] = "true" os.environ["LANGCHAIN_PROJECT"] = "talltree-ai-assistant" os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com" langsmith_api_key = os.getenv("LANGCHAIN_API_KEY") if not langsmith_api_key: raise EnvironmentError("Missing environment variable: LANGCHAIN_API_KEY") return langsmith.Client() @lru_cache(maxsize=1) def get_reranker( top_n: int = 3, model: str = "ms-marco-MiniLM-L-12-v2" ) -> FlashrankRerank: return FlashrankRerank(top_n=top_n, model=model)