Specimen5423 commited on
Commit
27b0d30
·
1 Parent(s): 8ed5a9b

Update Querying Data to run directly from Colab

Browse files
Files changed (1) hide show
  1. Querying Data.ipynb +13 -5
Querying Data.ipynb CHANGED
@@ -33,10 +33,11 @@
33
  "import pandas.io.formats.style\n",
34
  "import random\n",
35
  "import functools\n",
 
36
  "from typing import Literal\n",
37
  "\n",
38
  "SOURCE: Literal[\"danbooru\", \"e621\"] = \"e621\"\n",
39
- "DATA_FOLDER = \"H:/Data/TagSuggest/e621_dataframes\""
40
  ]
41
  },
42
  {
@@ -105,10 +106,17 @@
105
  "metadata": {},
106
  "outputs": [],
107
  "source": [
108
- "tags = pandas.read_feather(f\"{DATA_FOLDER}/tags.feather\")\n",
109
- "posts_by_tag = pandas.read_feather(f\"{DATA_FOLDER}/posts_by_tag.feather\").set_index(\"tag_id\")\n",
110
- "tags_by_post = pandas.read_feather(f\"{DATA_FOLDER}/tags_by_post.feather\").set_index(\"post_id\")\n",
111
- "implications = pandas.read_feather(f\"{DATA_FOLDER}/implications.feather\")\n",
 
 
 
 
 
 
 
112
  "tags_by_name = tags.copy(deep=True)\n",
113
  "tags_by_name.set_index(\"name\", inplace=True)\n",
114
  "tags.set_index(\"tag_id\", inplace=True)"
 
33
  "import pandas.io.formats.style\n",
34
  "import random\n",
35
  "import functools\n",
36
+ "import os\n",
37
  "from typing import Literal\n",
38
  "\n",
39
  "SOURCE: Literal[\"danbooru\", \"e621\"] = \"e621\"\n",
40
+ "DATA_FOLDER = \".\""
41
  ]
42
  },
43
  {
 
106
  "metadata": {},
107
  "outputs": [],
108
  "source": [
109
+ "def get_feather(filename: str) -> pandas.DataFrame:\n",
110
+ " target = f\"{DATA_FOLDER}/{filename}.feather\"\n",
111
+ " if not os.path.exists(target):\n",
112
+ " !wget -O {filename}.feather https://huggingface.co/Specimen5423/E621TagAssociations/resolve/main/{filename}.feather?download=true\n",
113
+ " return pandas.read_feather(target)\n",
114
+ "\n",
115
+ "os.makedirs(DATA_FOLDER, exist_ok=True)\n",
116
+ "tags = get_feather(\"tags\")\n",
117
+ "posts_by_tag = get_feather(\"posts_by_tag\").set_index(\"tag_id\")\n",
118
+ "tags_by_post = get_feather(\"tags_by_post\").set_index(\"post_id\")\n",
119
+ "implications = get_feather(\"implications\")\n",
120
  "tags_by_name = tags.copy(deep=True)\n",
121
  "tags_by_name.set_index(\"name\", inplace=True)\n",
122
  "tags.set_index(\"tag_id\", inplace=True)"