PMB / init_dataset.py
Sergidev's picture
v1
17c173b
raw
history blame
865 Bytes
from huggingface_hub import create_repo, HfApi
from datasets import Dataset
import pandas as pd
import os
DATASET_REPO = "Sergidev/PMBMemory"
def init_dataset():
# Check if dataset exists
api = HfApi()
try:
api.repo_info(repo_id=DATASET_REPO, repo_type="dataset")
print(f"Dataset {DATASET_REPO} already exists.")
except Exception:
print(f"Creating dataset {DATASET_REPO}...")
create_repo(repo_id=DATASET_REPO, repo_type="dataset")
# Create an empty dataframe with the required columns
df = pd.DataFrame(columns=["timestamp", "prompt", "response", "topic"])
# Convert to dataset and push to hub
dataset = Dataset.from_pandas(df)
dataset.push_to_hub(DATASET_REPO)
print(f"Dataset {DATASET_REPO} created successfully.")
if __name__ == "__main__":
init_dataset()