Commit
·
27b0d30
1
Parent(s):
8ed5a9b
Update Querying Data to run directly from Colab
Browse files- Querying Data.ipynb +13 -5
Querying Data.ipynb
CHANGED
@@ -33,10 +33,11 @@
|
|
33 |
"import pandas.io.formats.style\n",
|
34 |
"import random\n",
|
35 |
"import functools\n",
|
|
|
36 |
"from typing import Literal\n",
|
37 |
"\n",
|
38 |
"SOURCE: Literal[\"danbooru\", \"e621\"] = \"e621\"\n",
|
39 |
-
"DATA_FOLDER = \"
|
40 |
]
|
41 |
},
|
42 |
{
|
@@ -105,10 +106,17 @@
|
|
105 |
"metadata": {},
|
106 |
"outputs": [],
|
107 |
"source": [
|
108 |
-
"
|
109 |
-
"
|
110 |
-
"
|
111 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
"tags_by_name = tags.copy(deep=True)\n",
|
113 |
"tags_by_name.set_index(\"name\", inplace=True)\n",
|
114 |
"tags.set_index(\"tag_id\", inplace=True)"
|
|
|
33 |
"import pandas.io.formats.style\n",
|
34 |
"import random\n",
|
35 |
"import functools\n",
|
36 |
+
"import os\n",
|
37 |
"from typing import Literal\n",
|
38 |
"\n",
|
39 |
"SOURCE: Literal[\"danbooru\", \"e621\"] = \"e621\"\n",
|
40 |
+
"DATA_FOLDER = \".\""
|
41 |
]
|
42 |
},
|
43 |
{
|
|
|
106 |
"metadata": {},
|
107 |
"outputs": [],
|
108 |
"source": [
|
109 |
+
"def get_feather(filename: str) -> pandas.DataFrame:\n",
|
110 |
+
" target = f\"{DATA_FOLDER}/{filename}.feather\"\n",
|
111 |
+
" if not os.path.exists(target):\n",
|
112 |
+
" !wget -O {filename}.feather https://huggingface.co/Specimen5423/E621TagAssociations/resolve/main/{filename}.feather?download=true\n",
|
113 |
+
" return pandas.read_feather(target)\n",
|
114 |
+
"\n",
|
115 |
+
"os.makedirs(DATA_FOLDER, exist_ok=True)\n",
|
116 |
+
"tags = get_feather(\"tags\")\n",
|
117 |
+
"posts_by_tag = get_feather(\"posts_by_tag\").set_index(\"tag_id\")\n",
|
118 |
+
"tags_by_post = get_feather(\"tags_by_post\").set_index(\"post_id\")\n",
|
119 |
+
"implications = get_feather(\"implications\")\n",
|
120 |
"tags_by_name = tags.copy(deep=True)\n",
|
121 |
"tags_by_name.set_index(\"name\", inplace=True)\n",
|
122 |
"tags.set_index(\"tag_id\", inplace=True)"
|