zetavg
update
35fba55 unverified
raw
history blame
2.85 kB
import os
import shutil
import fnmatch
import json
from ..globals import Global
def init_data_dir():
current_file_path = os.path.abspath(__file__)
parent_directory_path = os.path.dirname(current_file_path)
project_dir_path = os.path.abspath(
os.path.join(parent_directory_path, "..", ".."))
os.makedirs(os.path.join(Global.data_dir, "lora_models"), exist_ok=True)
copy_sample_data_if_not_exists(os.path.join(project_dir_path, "templates"),
os.path.join(Global.data_dir, "templates"))
copy_sample_data_if_not_exists(os.path.join(project_dir_path, "datasets"),
os.path.join(Global.data_dir, "datasets"))
def copy_sample_data_if_not_exists(source, destination):
if os.path.exists(destination):
return
print(f"Copying sample data to \"{destination}\"")
shutil.copytree(source, destination)
def get_available_template_names():
templates_directory_path = os.path.join(Global.data_dir, "templates")
all_files = os.listdir(templates_directory_path)
return [os.path.splitext(filename)[0] for filename in all_files if fnmatch.fnmatch(filename, "*.json")]
def get_available_dataset_names():
datasets_directory_path = os.path.join(Global.data_dir, "datasets")
all_files = os.listdir(datasets_directory_path)
return [filename for filename in all_files if fnmatch.fnmatch(filename, "*.json") or fnmatch.fnmatch(filename, "*.jsonl")]
def get_available_lora_model_names():
datasets_directory_path = os.path.join(Global.data_dir, "lora_models")
all_items = os.listdir(datasets_directory_path)
return [item for item in all_items if os.path.isdir(os.path.join(datasets_directory_path, item))]
def get_path_of_available_lora_model(name):
datasets_directory_path = os.path.join(Global.data_dir, "lora_models")
path = os.path.join(datasets_directory_path, name)
if os.path.isdir(path):
return path
return None
def get_dataset_content(name):
file_name = os.path.join(Global.data_dir, "datasets", name)
if not os.path.exists(file_name):
raise ValueError(
f"Can't read {file_name} from datasets. File does not exist.")
with open(file_name, "r") as file:
if fnmatch.fnmatch(name, "*.json"):
return json.load(file)
elif fnmatch.fnmatch(name, "*.jsonl"):
data = []
for line_number, line in enumerate(file, start=1):
try:
data.append(json.loads(line))
except Exception as e:
raise ValueError(
f"Error parsing JSON on line {line_number}: {e}")
return data
else:
raise ValueError(
f"Unknown file format: {file_name}. Expects '*.json' or '*.jsonl'")