jcarnero commited on
Commit
8625e42
·
1 Parent(s): 77da018

lab notebook: first data look & training

Browse files
.gitignore CHANGED
@@ -1,3 +1,6 @@
 
 
 
1
  # Byte-compiled / optimized / DLL files
2
  __pycache__/
3
  *.py[cod]
 
1
+ # Project specific
2
+ /data
3
+
4
  # Byte-compiled / optimized / DLL files
5
  __pycache__/
6
  *.py[cod]
README.md CHANGED
@@ -1,2 +1,5 @@
1
  # birds-classification
2
- Train model for birds classification & gradio app
 
 
 
 
1
  # birds-classification
2
+
3
+ Train model for birds classification and gradio app
4
+
5
+ Training is done using fastai, deployment mimics its transforms to publish a gradio app that has no fastai dependencies.
deployment/requirements.txt ADDED
File without changes
training/birds/config.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ DATA_STORAGE_PATH = "../data"
2
+ DATASET = "200-bird-species-with-11788-images"
3
+ OWNER = "veeralakrishna"
4
+
5
+ DATA_PATH = DATA_STORAGE_PATH + "/kaggle/" + DATASET
6
+
7
+
training/birds/utils/kaggle.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+ from zipfile import ZipFile
4
+ from kaggle import api
5
+
6
+
7
+ def running_on_kaggle() -> bool:
8
+ """
9
+ Checks if script is running on kaggle
10
+ :return: true if the script is running on kaggle, false otherwise
11
+ """
12
+ if os.environ.get("KAGGLE_KERNEL_RUN_TYPE", ""):
13
+ return True
14
+ else:
15
+ return False
16
+
17
+
18
+ def download_competition_data(competition: str, input_path: str | Path) -> None:
19
+ """
20
+ Downloads data from kaggle competition only if input folder is empty
21
+ :param comptetition: string with the competition name id of kaggle
22
+ :param input_path: path of the input folder
23
+ """
24
+ data_path = Path(input_path)
25
+ if not data_path.exists():
26
+ data_path.mkdir(parents=True)
27
+ if not any(data_path.iterdir()):
28
+ api.competition_download_cli(competition, path=data_path)
29
+ with ZipFile(data_path / (competition + ".zip"), "r") as zipObj:
30
+ # Extract all the contents of zip file in current directory
31
+ zipObj.extractall(path=data_path)
32
+ os.remove(data_path / (competition + ".zip"))
33
+
34
+ print(os.listdir(data_path))
35
+
36
+
37
+ def download_dataset(owner: str, dataset: str, input_path: str | Path) -> None:
38
+ """
39
+ Downloads data from kaggle competition only if input folder is empty
40
+ :param comptetition: string with the competition name id of kaggle
41
+ :param input_path: path of the input folder
42
+ """
43
+ data_path = Path(input_path)
44
+ if not data_path.exists():
45
+ data_path.mkdir(parents=True)
46
+ if not any(data_path.iterdir()):
47
+ api.dataset_download_files(f"{owner}/{dataset}", path=data_path)
48
+ with ZipFile(data_path / (dataset + ".zip"), "r") as zipObj:
49
+ # Extract all the contents of zip file in current directory
50
+ zipObj.extractall(path=data_path)
51
+ os.remove(data_path / (dataset + ".zip"))
52
+
53
+ print(os.listdir(data_path))
training/environment.yml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: fastai
2
+ channels:
3
+ - pytorch
4
+ - nvidia
5
+ - conda-forge
6
+ - fastchan
7
+ - defaults
8
+ dependencies:
9
+ - python=3.10
10
+ - pip
11
+ - cudatoolkit=11.7
12
+ - pytorch==1.13.1
13
+ - torchvision
14
+ - pytorch-cuda=11.7
15
+ - fastcore=1.5.28
16
+ - fastai=2.7.11
17
+ - pip:
18
+ - ipykernel
19
+ - ipywidgets
20
+ - timm==0.6.12
21
+ - kaggle==1.5.12
training/notebooks/lab.ipynb ADDED
The diff for this file is too large to render. See raw diff