LittleApple_fp16 commited on
Commit
ca96eac
·
1 Parent(s): 2720ecc
Files changed (3) hide show
  1. cyberharem/dataset/crawler.py +1 -1
  2. test.ipynb +2 -12
  3. waifu_get.py +78 -17
cyberharem/dataset/crawler.py CHANGED
@@ -156,7 +156,7 @@ def crawl_dataset_to_huggingface(
156
  repository = f'AppleHarem/{get_alphabet_name(name)}'
157
 
158
  hf_fs = get_hf_fs()
159
- if hf_fs.exists(f'{repository}/.gitattributes'):
160
  logging.warn(f'{repository} exists, skipped.')
161
  return
162
  origin_source = get_main_source(source, no_r18, bg_color, no_monochrome_check, drop_multi, skip_preprocess)
 
156
  repository = f'AppleHarem/{get_alphabet_name(name)}'
157
 
158
  hf_fs = get_hf_fs()
159
+ if hf_fs.exists(f'datasets/{repository}/.gitattributes'):
160
  logging.warn(f'{repository} exists, skipped.')
161
  return
162
  origin_source = get_main_source(source, no_r18, bg_color, no_monochrome_check, drop_multi, skip_preprocess)
test.ipynb CHANGED
@@ -1,15 +1,5 @@
1
  {
2
  "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": null,
6
- "id": "e4b4f4a7-1514-4de7-8594-06b2611746ff",
7
- "metadata": {},
8
- "outputs": [],
9
- "source": [
10
- "mkdir cyberharem && mv cyberharem.zip cyberharem/ && cd cyberharem/ && unzip cyberharem.zip && rm -f cyberharem.zip && cd ../"
11
- ]
12
- },
13
  {
14
  "cell_type": "code",
15
  "execution_count": null,
@@ -19,7 +9,7 @@
19
  },
20
  "outputs": [],
21
  "source": [
22
- "!python waifu_get.py --char 才羽モモイ --token token"
23
  ]
24
  },
25
  {
@@ -59,7 +49,7 @@
59
  "name": "python",
60
  "nbconvert_exporter": "python",
61
  "pygments_lexer": "ipython3",
62
- "version": "3.8.1"
63
  }
64
  },
65
  "nbformat": 4,
 
1
  {
2
  "cells": [
 
 
 
 
 
 
 
 
 
 
3
  {
4
  "cell_type": "code",
5
  "execution_count": null,
 
9
  },
10
  "outputs": [],
11
  "source": [
12
+ "!python waifu_get.py --char abc --index 1"
13
  ]
14
  },
15
  {
 
49
  "name": "python",
50
  "nbconvert_exporter": "python",
51
  "pygments_lexer": "ipython3",
52
+ "version": "3.10.6"
53
  }
54
  },
55
  "nbformat": 4,
waifu_get.py CHANGED
@@ -5,27 +5,88 @@ from waifuc.export import SaveExporter, TextualInversionExporter
5
  from waifuc.source import DanbooruSource, PixivSearchSource, ZerochanSource, LocalSource, GcharAutoSource
6
  from cyberharem.dataset.crawler import crawl_dataset_to_huggingface
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  def main():
10
- os.environ['ONNX_MODE'] = 'CPUExecutionProvider'
11
  parser = argparse.ArgumentParser()
12
- parser.add_argument('--char', type=str, help='角色列表')
13
- parser.add_argument('--artist', type=str, help='画师列表')
14
- parser.add_argument('--token', type=str, help='token')
15
- os.environ['HF_TOKEN'] = args.token
16
  args = parser.parse_args()
17
- if args.artist:
18
- char_list = args.artist.split(',')
19
- for ch in char_list:
20
- crawl_dataset_to_huggingface(ch,DanbooruSource)
21
- print(ch+"完成")
22
- print("全部完成")
23
- else:
24
- char_list = args.char.split(',')
25
- for ch in char_list:
26
- crawl_dataset_to_huggingface(ch)
27
- print(ch+"完成")
28
- print("全部完成")
29
 
30
 
31
  if __name__ == "__main__":
 
5
  from waifuc.source import DanbooruSource, PixivSearchSource, ZerochanSource, LocalSource, GcharAutoSource
6
  from cyberharem.dataset.crawler import crawl_dataset_to_huggingface
7
 
8
+ import gradio as gr
9
+ import os
10
+ import json
11
+ from waifuc.action import HeadCountAction, AlignMinSizeAction, CCIPAction, ThreeStageSplitAction, ModeConvertAction, ClassFilterAction, PersonSplitAction, TaggingAction, RatingFilterAction, NoMonochromeAction, RandomFilenameAction, FirstNSelectAction, FilterSimilarAction, FileExtAction
12
+ from waifuc.export import SaveExporter, TextualInversionExporter
13
+ from waifuc.source import DanbooruSource, PixivSearchSource, ZerochanSource, LocalSource, GcharAutoSource
14
+ from cyberharem.dataset.crawler import crawl_dataset_to_huggingface
15
+ from cyberharem.utils import get_hf_client, get_hf_fs
16
+ from hbutils.system import TemporaryDirectory
17
+ from cyberharem.utils import download_file as cyber_download_file
18
+ from huggingface_hub import hf_hub_url, hf_hub_download
19
+
20
+
21
+ def start_func(chars, is_cpu, udghs, game_index=None):
22
+ if not udghs:
23
+ if is_cpu:
24
+ os.environ['ONNX_MODE'] = 'CPUExecutionProvider'
25
+ char_list = chars.split(',')
26
+ for ch in char_list:
27
+ crawl_dataset_to_huggingface(ch)
28
+ print(ch + "完成")
29
+ return str(chars)+" 上传完成"
30
+ else:
31
+ dgrepo = 'deepghs/game_characters'
32
+ if is_cpu:
33
+ os.environ['ONNX_MODE'] = 'CPUExecutionProvider'
34
+ with TemporaryDirectory() as jsondir:
35
+ print("Downloading jsons..")
36
+ hf_fs = get_hf_fs()
37
+ _dgdatas = [file for file in hf_fs.glob(f'datasets/{dgrepo}/*/pixiv_characters.json')]
38
+ if game_index:
39
+ name = _dgdatas[game_index-1]
40
+ os.makedirs(os.path.basename(os.path.dirname(name)), exist_ok=True)
41
+ # print(f'https://huggingface.co/{dgrepo}/blob/main/{os.path.basename(os.path.dirname(name))}/{os.path.basename(name)}')
42
+ js = hf_hub_download(
43
+ # f'https://huggingface.co/{dgrepo}/blob/main/{os.path.basename(os.path.dirname(name))}/{os.path.basename(name)}',
44
+ # hf_hub_url(dgrepo, filename=os.path.relpath(name, dgrepo)),
45
+ repo_id=dgrepo, repo_type='dataset',
46
+ # os.path.join(os.path.basename(os.path.dirname(name)), 'pixiv_characters.json'),
47
+ filename=os.path.join(os.path.basename(os.path.dirname(name)), 'pixiv_characters.json'),
48
+ token=os.environ['HF_TOKEN']
49
+ )
50
+ # with open(os.path.join(os.path.basename(os.path.dirname(name)), 'pixiv_characters.json'), 'r') as f:
51
+ with open(js, 'r', encoding='utf-8') as f:
52
+ jt = json.load(f)
53
+ chs = jt['characters']
54
+ for jp in chs:
55
+ jp = jp['jpname']
56
+ print(jp, 'start...')
57
+ crawl_dataset_to_huggingface(jp)
58
+ print(jp + "完成")
59
+ else:
60
+ for name in _dgdatas:
61
+ os.makedirs(os.path.basename(os.path.dirname(name)), exist_ok=True)
62
+ # print(f'https://huggingface.co/{dgrepo}/blob/main/{os.path.basename(os.path.dirname(name))}/{os.path.basename(name)}')
63
+ js = hf_hub_download(
64
+ # f'https://huggingface.co/{dgrepo}/blob/main/{os.path.basename(os.path.dirname(name))}/{os.path.basename(name)}',
65
+ # hf_hub_url(dgrepo, filename=os.path.relpath(name, dgrepo)),
66
+ repo_id=dgrepo, repo_type='dataset',
67
+ # os.path.join(os.path.basename(os.path.dirname(name)), 'pixiv_characters.json'),
68
+ filename=os.path.join(os.path.basename(os.path.dirname(name)), 'pixiv_characters.json'),
69
+ token=os.environ['HF_TOKEN']
70
+ )
71
+ # with open(os.path.join(os.path.basename(os.path.dirname(name)), 'pixiv_characters.json'), 'r') as f:
72
+ with open(js, 'r', encoding='utf-8') as f:
73
+ jt = json.load(f)
74
+ chs = jt['characters']
75
+ for jp in chs:
76
+ jp = jp['jpname']
77
+ print(jp, 'start...')
78
+ crawl_dataset_to_huggingface(jp)
79
+ print(jp + "完成")
80
+ return "完成"
81
+
82
 
83
  def main():
 
84
  parser = argparse.ArgumentParser()
85
+ parser.add_argument('--char', type=str, help='角色列表', default=None)
86
+ parser.add_argument('--index', type=int, default=None)
 
 
87
  args = parser.parse_args()
88
+ start_func(args.char, True, False is args.char else True, args.index)
89
+ print("全部完成")
 
 
 
 
 
 
 
 
 
 
90
 
91
 
92
  if __name__ == "__main__":