Spaces:
Sleeping
Sleeping
File size: 29,724 Bytes
43789f0 |
|
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from sklearn.model_selection import train_test_split\n",
"import matplotlib.pyplot as plt\n",
"import cv2\n",
"import os\n",
"from PIL import Image\n",
"from tqdm import tqdm\n",
"import torch\n",
"from torch.utils.data import Dataset, DataLoader\n",
"from torchvision import transforms"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# creating train and test dataset"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"def getData(type):\n",
" df = list()\n",
" directory = f'D-Fire/{type}/labels' \n",
" n = len(os.listdir(directory))\n",
" for filename in tqdm(os.listdir(directory)):\n",
" f = os.path.join(directory, filename)\n",
" # print(f)\n",
"\n",
" image = filename[:-3] + 'jpg'\n",
" # print(image)\n",
" # break\n",
" img = Image.open(f'D-Fire/{type}/images/{image}')\n",
" width, height = img.size\n",
" # print(width, height)\n",
" # plt.imshow(img)\n",
" # plt.show()\n",
" # break\n",
" pre = [image, width, height]\n",
" if os.path.getsize(f) == 0:\n",
" dp = pre + [2]\n",
" df.append(dp)\n",
" else:\n",
" with open(f) as fp:\n",
" lines = fp.readlines()\n",
" for line in lines:\n",
" line = line.split()\n",
" line = list(map(float, line))\n",
" line[0] = int(line[0])\n",
" # line.insert(0, image)\n",
" dp = pre + line\n",
" df.append(dp)\n",
" fp.close()\n",
" return df, n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|ββββββββββ| 17221/17221 [00:11<00:00, 1447.90it/s]\n",
"100%|ββββββββββ| 4306/4306 [00:03<00:00, 1340.39it/s]\n"
]
}
],
"source": [
"# get train and test data\n",
"train, n_train = getData(\"train\")\n",
"df_train = pd.DataFrame(train, columns= [\"Image\", \"Width\", \"Height\", \"Label\", \"x_min\", \"y_min\", \"x_max\", \"y_max\"])\n",
"test, n_test = getData(\"test\")\n",
"df_test = pd.DataFrame(test, columns= [\"Image\", \"Width\", \"Height\", \"Label\", \"x_min\", \"y_min\", \"x_max\", \"y_max\"])"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Image</th>\n",
" <th>Width</th>\n",
" <th>Height</th>\n",
" <th>Label</th>\n",
" <th>x_min</th>\n",
" <th>y_min</th>\n",
" <th>x_max</th>\n",
" <th>y_max</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>AoF05695.jpg</td>\n",
" <td>1280</td>\n",
" <td>720</td>\n",
" <td>0</td>\n",
" <td>0.700781</td>\n",
" <td>0.379167</td>\n",
" <td>0.039062</td>\n",
" <td>0.105556</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>WEB08898.jpg</td>\n",
" <td>640</td>\n",
" <td>360</td>\n",
" <td>0</td>\n",
" <td>0.477344</td>\n",
" <td>0.291667</td>\n",
" <td>0.264063</td>\n",
" <td>0.555556</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>WEB01102.jpg</td>\n",
" <td>640</td>\n",
" <td>360</td>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>WEB07573.jpg</td>\n",
" <td>1100</td>\n",
" <td>619</td>\n",
" <td>0</td>\n",
" <td>0.465000</td>\n",
" <td>0.475767</td>\n",
" <td>0.290000</td>\n",
" <td>0.906300</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>WEB08640.jpg</td>\n",
" <td>640</td>\n",
" <td>360</td>\n",
" <td>0</td>\n",
" <td>0.578125</td>\n",
" <td>0.506944</td>\n",
" <td>0.709375</td>\n",
" <td>0.936111</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Image Width Height Label x_min y_min x_max y_max\n",
"0 AoF05695.jpg 1280 720 0 0.700781 0.379167 0.039062 0.105556\n",
"1 WEB08898.jpg 640 360 0 0.477344 0.291667 0.264063 0.555556\n",
"2 WEB01102.jpg 640 360 2 NaN NaN NaN NaN\n",
"3 WEB07573.jpg 1100 619 0 0.465000 0.475767 0.290000 0.906300\n",
"4 WEB08640.jpg 640 360 0 0.578125 0.506944 0.709375 0.936111"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# train sample\n",
"df_train.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# data split exploration"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"group_tr = df_train.groupby(\"Label\").count().iloc[:, 0].to_numpy()\n",
"group_tr_ratio = group_tr / n_train\n",
"group_te = df_test.groupby(\"Label\").count().iloc[:, 0].to_numpy()\n",
"group_te_ratio = group_te / n_test"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# statistics on data ratio split\n",
"x = np.arange(3)\n",
"plt.bar(x, group_tr_ratio, color ='r', width = 0.25,\n",
" edgecolor ='grey', label ='Train')\n",
"x = [x + 0.25 for x in x]\n",
"plt.bar(x, group_te_ratio, color ='b', width = 0.25,\n",
" edgecolor ='grey', label ='Test')\n",
"plt.xlabel('Labels')\n",
"plt.ylabel('Proprtion Ratio')\n",
"plt.xticks([0.15, 1.15, 2.15], [\"Smoke\", \"Fire\", \"None\"])\n",
"plt.legend()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# total count for different classes"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Smoke</th>\n",
" <th>Fire</th>\n",
" <th>Neither</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Train</th>\n",
" <td>9550</td>\n",
" <td>11814</td>\n",
" <td>7833</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Test</th>\n",
" <td>2315</td>\n",
" <td>2878</td>\n",
" <td>2005</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Smoke Fire Neither\n",
"Train 9550 11814 7833\n",
"Test 2315 2878 2005"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.DataFrame([group_tr, group_te], columns=[\"Smoke\", \"Fire\", \"Neither\"], index=[\"Train\", \"Test\"])"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Image Label\n",
"AoF00000.jpg 2 1\n",
"AoF00001.jpg 1 1\n",
"AoF00002.jpg 0 1\n",
"AoF00003.jpg 2 1\n",
"AoF00004.jpg 2 1\n",
" ..\n",
"WEB09440.jpg 0 2\n",
"WEB09441.jpg 0 2\n",
" 1 3\n",
"WEB09442.jpg 0 1\n",
" 1 1\n",
"Name: Width, Length: 20984, dtype: int64"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_train.groupby([\"Image\", \"Label\"]).count()[\"Width\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(42)\n",
"idx = np.random.randint\n",
"smoke = df_train[\"Label\"] == 0 \n",
"fire = df_train[\"Label\"] == 1\n",
"neither = df_train[\"Label\"] == 2\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "AIClass",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
|