Spaces:
Runtime error
Runtime error
import argparse | |
import os | |
from waifuc.action import HeadCountAction, AlignMinSizeAction, CCIPAction, ThreeStageSplitAction, ModeConvertAction, ClassFilterAction, PersonSplitAction, TaggingAction, RatingFilterAction, NoMonochromeAction, RandomFilenameAction, FirstNSelectAction, FilterSimilarAction, FileExtAction | |
from waifuc.export import SaveExporter, TextualInversionExporter | |
from waifuc.source import DanbooruSource, PixivSearchSource, ZerochanSource, LocalSource, GcharAutoSource | |
from cyberharem.dataset.crawler import crawl_dataset_to_huggingface | |
import gradio as gr | |
import os | |
import json | |
from pathlib import Path | |
from waifuc.action import HeadCountAction, AlignMinSizeAction, CCIPAction, ThreeStageSplitAction, ModeConvertAction, ClassFilterAction, PersonSplitAction, TaggingAction, RatingFilterAction, NoMonochromeAction, RandomFilenameAction, FirstNSelectAction, FilterSimilarAction, FileExtAction | |
from waifuc.export import SaveExporter, TextualInversionExporter | |
from waifuc.source import DanbooruSource, PixivSearchSource, ZerochanSource, LocalSource, GcharAutoSource | |
from cyberharem.dataset.crawler import crawl_dataset_to_huggingface | |
from cyberharem.utils import get_hf_client, get_hf_fs | |
from hbutils.system import TemporaryDirectory | |
from cyberharem.utils import download_file as cyber_download_file | |
from huggingface_hub import hf_hub_url, hf_hub_download | |
def start_func(chars, is_cpu, udghs, game_index=None): | |
if not udghs: | |
if is_cpu: | |
os.environ['ONNX_MODE'] = 'CPUExecutionProvider' | |
char_list = chars.split(',') | |
for ch in char_list: | |
crawl_dataset_to_huggingface(ch) | |
print(ch + "完成") | |
return str(chars)+" 上传完成" | |
else: | |
dgrepo = 'deepghs/game_characters' | |
if is_cpu: | |
os.environ['ONNX_MODE'] = 'CPUExecutionProvider' | |
with TemporaryDirectory() as jsondir: | |
print("Downloading jsons..") | |
hf_fs = get_hf_fs() | |
_dgdatas = [file for file in hf_fs.glob(f'datasets/{dgrepo}/*/pixiv_characters.json')] | |
if game_index: | |
name = _dgdatas[game_index-1] | |
os.makedirs(os.path.basename(os.path.dirname(name)), exist_ok=True) | |
# print(f'https://huggingface.co/{dgrepo}/blob/main/{os.path.basename(os.path.dirname(name))}/{os.path.basename(name)}') | |
js = hf_hub_download( | |
# f'https://huggingface.co/{dgrepo}/blob/main/{os.path.basename(os.path.dirname(name))}/{os.path.basename(name)}', | |
# hf_hub_url(dgrepo, filename=os.path.relpath(name, dgrepo)), | |
repo_id=dgrepo, repo_type='dataset', | |
# os.path.join(os.path.basename(os.path.dirname(name)), 'pixiv_characters.json'), | |
filename=Path(os.path.join(os.path.basename(os.path.dirname(name)), 'pixiv_characters.json')).as_posix(), | |
token=os.environ['HF_TOKEN'] | |
) | |
# with open(os.path.join(os.path.basename(os.path.dirname(name)), 'pixiv_characters.json'), 'r') as f: | |
with open(js, 'r', encoding='utf-8') as f: | |
jt = json.load(f) | |
chs = jt['characters'] | |
for jp in chs: | |
jp = jp['jpname'] | |
print(jp, 'start...') | |
crawl_dataset_to_huggingface(jp) | |
print(jp + "完成") | |
else: | |
for name in _dgdatas: | |
os.makedirs(os.path.basename(os.path.dirname(name)), exist_ok=True) | |
# print(f'https://huggingface.co/{dgrepo}/blob/main/{os.path.basename(os.path.dirname(name))}/{os.path.basename(name)}') | |
js = hf_hub_download( | |
# f'https://huggingface.co/{dgrepo}/blob/main/{os.path.basename(os.path.dirname(name))}/{os.path.basename(name)}', | |
# hf_hub_url(dgrepo, filename=os.path.relpath(name, dgrepo)), | |
repo_id=dgrepo, repo_type='dataset', | |
# os.path.join(os.path.basename(os.path.dirname(name)), 'pixiv_characters.json'), | |
filename=Path(os.path.join(os.path.basename(os.path.dirname(name)), 'pixiv_characters.json')).as_posix(), | |
token=os.environ['HF_TOKEN'] | |
) | |
# with open(os.path.join(os.path.basename(os.path.dirname(name)), 'pixiv_characters.json'), 'r') as f: | |
with open(js, 'r', encoding='utf-8') as f: | |
jt = json.load(f) | |
chs = jt['characters'] | |
for jp in chs: | |
jp = jp['jpname'] | |
print(jp, 'start...') | |
with open(os.path.join(os.path.basename(os.path.dirname(name)), 'log.txt'), 'w') as log_f: | |
print(f'{jp} is in crawl.', file=log_f) | |
crawl_dataset_to_huggingface(jp) | |
print(jp + "完成") | |
return "完成" | |
def main(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument('--char', type=str, help='角色列表', default=None) | |
parser.add_argument('--index', type=int, default=None) | |
args = parser.parse_args() | |
start_func(args.char, True, False if args.char else True, args.index) | |
print("全部完成") | |
if __name__ == "__main__": | |
main() | |