import os import json from typing import Dict, Any import uuid from datetime import datetime import pytz import huggingface_hub from huggingface_hub import Repository class InteractionsLogger: def __init__(self, name: str, persist=False): self.persist = persist self.counter = 0 self.name = name # unique id HF_TOKEN = os.environ.get("HF_TOKEN") HF_DATASET_REPO_URL = os.environ.get("HF_DATASET_REPO_URL") if (HF_TOKEN is not None) and (HF_DATASET_REPO_URL is not None): self.repo = Repository( local_dir="data", clone_from=HF_DATASET_REPO_URL, use_auth_token=HF_TOKEN ) else: self.persist = False def set_goal(self, goal: str): # Initialize two variables for saving two files (self.messages for # training and self.structure_data for later use) self.messages = [{"goal": goal}] self.structured_data = {"goal": goal} def add_system(self, more: Dict): self.convos = [{"from": "system"} | more] def add_ai(self, msg: str): self.convos.append({"from": "ai", "value": msg}) self.messages.append({"id": f"{self.name}_{self.counter}", "conversations": self.convos}) self.counter += 1 def add_structured_data(self, data: Dict[str, Any]): self.structured_data.update({f"turn_{self.counter}": data}) def add_message(self, data: Dict[str, Any]): self.structured_data.update(data) def save(self): # add current datetime self.add_message({"datetime": datetime.now(pytz.utc).strftime("%m/%d/%Y %H:%M:%S %Z%z")}) if self.persist: # TODO: want to add retry in a loop? self.repo.git_pull() fname = uuid.uuid4().hex[:16] with open(f"./data/{fname}.json", "w") as f: json.dump(self.messages, f, indent=2) with open(f"./data/{fname}.clean.json", "w") as f: json.dump(self.structured_data, f, indent=2) commit_url = self.repo.push_to_hub() def add_cost(self, cost): self.messages.append({"metrics": cost})