pycui's picture
Add RealChar deployment for HuggingFace (V0)
babeaf6
raw
history blame contribute delete
No virus
3.19 kB
import os
from dotenv import load_dotenv
from pathlib import Path
from contextlib import ExitStack
from realtime_ai_character.logger import get_logger
from realtime_ai_character.utils import Singleton, Character
from realtime_ai_character.database.chroma import get_chroma
from llama_index import SimpleDirectoryReader
from langchain.text_splitter import CharacterTextSplitter
load_dotenv()
logger = get_logger(__name__)
class CatalogManager(Singleton):
def __init__(self, overwrite=True):
super().__init__()
self.db = get_chroma()
if overwrite:
logger.info('Overwriting existing data in the chroma.')
self.db.delete_collection()
self.db = get_chroma()
self.characters = {}
self.load_characters(overwrite)
if overwrite:
logger.info('Persisting data in the chroma.')
self.db.persist()
logger.info(
f"Total document load: {self.db._client.get_collection('llm').count()}")
def get_character(self, name) -> Character:
return self.characters.get(name)
def load_character(self, directory):
with ExitStack() as stack:
f_system = stack.enter_context(open(directory / 'system'))
f_user = stack.enter_context(open(directory / 'user'))
system_prompt = f_system.read()
user_prompt = f_user.read()
name = directory.stem.replace('_', ' ').title()
self.characters[name] = Character(
name=name,
llm_system_prompt=system_prompt,
llm_user_prompt=user_prompt
)
return name
def load_characters(self, overwrite):
"""
Load characters from the character_catalog directory. Use /data to create
documents and add them to the chroma.
:overwrite: if True, overwrite existing data in the chroma.
"""
path = Path(__file__).parent
excluded_dirs = {'__pycache__', 'archive'}
directories = [d for d in path.iterdir() if d.is_dir()
and d.name not in excluded_dirs]
for directory in directories:
character_name = self.load_character(directory)
if overwrite:
self.load_data(character_name, directory / 'data')
logger.info('Loaded data for character: ' + character_name)
logger.info(
f'Loaded {len(self.characters)} characters: names {list(self.characters.keys())}')
def load_data(self, character_name: str, data_path: str):
loader = SimpleDirectoryReader(Path(data_path))
documents = loader.load_data()
text_splitter = CharacterTextSplitter(
separator='\n',
chunk_size=500,
chunk_overlap=100)
docs = text_splitter.create_documents(
texts=[d.text for d in documents],
metadatas=[{
'character_name': character_name,
'id': d.id_,
} for d in documents])
self.db.add_documents(docs)
def get_catalog_manager():
return CatalogManager.get_instance()
if __name__ == '__main__':
manager = CatalogManager.get_instance()