LittleApple-fp16 commited on
Commit
862921e
1 Parent(s): 400590a

Upload start.ipynb

Browse files
Files changed (1) hide show
  1. danbooru_cawler/start.ipynb +103 -0
danbooru_cawler/start.ipynb ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "91225aac-0181-478d-bb6a-2ad7512ec0ca",
6
+ "metadata": {
7
+ "tags": []
8
+ },
9
+ "source": [
10
+ "### 更改设置\n",
11
+ "更改并运行下方单元格"
12
+ ]
13
+ },
14
+ {
15
+ "cell_type": "code",
16
+ "execution_count": 13,
17
+ "id": "0c42eb88-737c-4ead-b954-df29809e0076",
18
+ "metadata": {
19
+ "tags": []
20
+ },
21
+ "outputs": [
22
+ {
23
+ "name": "stdout",
24
+ "output_type": "stream",
25
+ "text": [
26
+ "Overwriting danbooru_crawler/settings.py\n"
27
+ ]
28
+ }
29
+ ],
30
+ "source": [
31
+ "%%writefile danbooru_crawler/settings.py\n",
32
+ "BOT_NAME = \"danbooru_crawler\"\n",
33
+ "SEARCH_TAG = \"cat girl\" # 搜索内容\n",
34
+ "SEARCH_TYPE = 1 # 0 / 1 是否抓取缩略图,0代表抓缩略图,1代表原图\n",
35
+ "SEARCH_LINK = False # True / False 连锁搜索(即搜索子图,训练画风可以打开,训练概念等会出现与tag特征不符的图导致污染训练集)\n",
36
+ "SEARCH_TAG_TXT = True # True / False 是否一并抓取图片自身的tag,推荐打开\n",
37
+ "\n",
38
+ "SPIDER_MODULES = [\"danbooru_crawler.spiders\"]\n",
39
+ "NEWSPIDER_MODULE = \"danbooru_crawler.spiders\"\n",
40
+ "IMAGES_STORE = \"./pics\"\n",
41
+ "ROBOTSTXT_OBEY = False\n",
42
+ "ITEM_PIPELINES = {\n",
43
+ " \"danbooru_crawler.pipelines.PicsDownloadPipeline\": 1,\n",
44
+ " # \"scrapy.pipelines.PicsDownloadPipeline\": 1,\n",
45
+ "}\n",
46
+ "REQUEST_FINGERPRINTER_IMPLEMENTATION = \"2.7\"\n",
47
+ "TWISTED_REACTOR = \"twisted.internet.asyncioreactor.AsyncioSelectorReactor\"\n",
48
+ "FEED_EXPORT_ENCODING = \"utf-8\""
49
+ ]
50
+ },
51
+ {
52
+ "cell_type": "markdown",
53
+ "id": "97c71a2b-9474-462e-9910-3fcbea431f53",
54
+ "metadata": {
55
+ "tags": []
56
+ },
57
+ "source": [
58
+ "### 运行"
59
+ ]
60
+ },
61
+ {
62
+ "cell_type": "code",
63
+ "execution_count": null,
64
+ "id": "5b398c0d-effe-4891-94cf-2b4148b149bb",
65
+ "metadata": {
66
+ "tags": []
67
+ },
68
+ "outputs": [],
69
+ "source": [
70
+ "!python main.py"
71
+ ]
72
+ },
73
+ {
74
+ "cell_type": "code",
75
+ "execution_count": null,
76
+ "id": "35873bfc-8e08-4017-a262-5c61f84fc235",
77
+ "metadata": {},
78
+ "outputs": [],
79
+ "source": []
80
+ }
81
+ ],
82
+ "metadata": {
83
+ "kernelspec": {
84
+ "display_name": "Python 3 (ipykernel)",
85
+ "language": "python",
86
+ "name": "python3"
87
+ },
88
+ "language_info": {
89
+ "codemirror_mode": {
90
+ "name": "ipython",
91
+ "version": 3
92
+ },
93
+ "file_extension": ".py",
94
+ "mimetype": "text/x-python",
95
+ "name": "python",
96
+ "nbconvert_exporter": "python",
97
+ "pygments_lexer": "ipython3",
98
+ "version": "3.8.1"
99
+ }
100
+ },
101
+ "nbformat": 4,
102
+ "nbformat_minor": 5
103
+ }