Erik Kenkel commited on
Commit
3af8a88
·
1 Parent(s): 3856bb9

updated and fixed model

Browse files
.idea/.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
4
+ # Editor-based HTTP Client requests
5
+ /httpRequests/
6
+ # Datasource local storage ignored files
7
+ /dataSources/
8
+ /dataSources.local.xml
.idea/dog-identifier.iml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$">
5
+ <excludeFolder url="file://$MODULE_DIR$/venv" />
6
+ </content>
7
+ <orderEntry type="jdk" jdkName="Python 3.10 (dog-identifier)" jdkType="Python SDK" />
8
+ <orderEntry type="sourceFolder" forTests="false" />
9
+ </component>
10
+ </module>
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/misc.xml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (dog-identifier)" project-jdk-type="Python SDK" />
4
+ </project>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/dog-identifier.iml" filepath="$PROJECT_DIR$/.idea/dog-identifier.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="$PROJECT_DIR$" vcs="Git" />
5
+ </component>
6
+ </project>
app.py CHANGED
@@ -1,27 +1,25 @@
1
- from fastai.vision.all import *
2
  import gradio as gr
3
- import requests
4
- import json
5
 
6
- URL = 'https://dog.ceo/api/breeds/list/all'
 
 
 
7
 
8
- result = requests.get(url = URL).json()
9
- searchText = []
10
- for val in result['message'].items():
11
- if len(val[1]) > 0:
12
- for type in val[1]:
13
- searchText.append(f'{type} {val[0]}')
14
- else:
15
- searchText.append(val[0])
16
- searchText.sort()
17
 
18
  def classify_image(img):
19
- pred,idx,probs = learn.predict(img)
20
- return dict(zip(searchText, map(float, probs)))
21
 
22
  learn = load_learner('dogIdentifierModel.pkl')
23
 
24
- image = gr.components.Image(shape=(224, 224))
25
  label = gr.components.Label()
26
  examples = ['golden-retriever.jpg', 'german-shepherd.jpg', 'doberman.jpg', 'husky.jpg']
27
 
 
1
+ from fastai.vision.all import load_learner
2
  import gradio as gr
3
+ import pathlib
 
4
 
5
+ # For the posix path error: when you train your model on colab/gradient and download it, then do inference on Windows.
6
+ # Redirect PosixPath to WindowsPath:
7
+ temp = pathlib.PosixPath
8
+ pathlib.PosixPath = pathlib.WindowsPath
9
 
10
+
11
+ # Data below sourced from:
12
+ # URL = 'https://dog.ceo/api/breeds/list/all'
13
+ # To remain consistent, I initialized it as a tuple (previously broke when the API was utilized to get dog breeds)
14
+ dog_breeds = tuple(['affenpinscher', 'afghan hound', 'african', 'airedale', 'akita', 'american terrier', 'appenzeller', 'australian cattledog', 'australian terrier', 'basenji', 'basset hound', 'beagle', 'bedlington terrier', 'bernese mountain', 'bichon frise', 'blenheim spaniel', 'blood hound', 'bluetick', 'border collie', 'border terrier', 'borzoi', 'boston bulldog', 'bouvier', 'boxer', 'brabancon', 'briard', 'brittany spaniel', 'bull mastiff', 'cairn terrier', 'cardigan corgi', 'caucasian ovcharka', 'cavapoo', 'chesapeake retriever', 'chihuahua', 'chow', 'clumber', 'cockapoo', 'cocker spaniel', 'coonhound', 'cotondetulear', 'curly retriever', 'dachshund', 'dalmatian', 'dandie terrier', 'dhole', 'dingo', 'doberman', 'english bulldog', 'english hound', 'english mastiff', 'english setter', 'english sheepdog', 'english springer', 'entlebucher', 'eskimo', 'flatcoated retriever', 'fox terrier', 'french bulldog', 'german pointer', 'germanlonghair pointer', 'germanshepherd', 'giant schnauzer', 'golden retriever', 'gordon setter', 'great dane', 'groenendael', 'havanese', 'husky', 'ibizan hound', 'irish setter', 'irish spaniel', 'irish terrier', 'irish wolfhound', 'italian greyhound', 'italian segugio', 'japanese spaniel', 'japanese spitz', 'keeshond', 'kelpie', 'kerryblue terrier', 'komondor', 'kuvasz', 'labradoodle', 'labrador', 'lakeland terrier', 'lapphund finnish', 'leonberg', 'lhasa', 'malamute', 'malinois', 'maltese', 'medium poodle', 'mexicanhairless', 'miniature pinscher', 'miniature poodle', 'miniature schnauzer', 'mix', 'newfoundland', 'norfolk terrier', 'norwegian buhund', 'norwegian elkhound', 'norwich terrier', 'otterhound', 'papillon', 'patterdale terrier', 'pekinese', 'pembroke', 'pitbull', 'plott hound', 'pomeranian', 'pug', 'puggle', 'pyrenees', 'redbone', 'rhodesian ridgeback', 'rottweiler', 'russell terrier', 'saluki', 'samoyed', 'schipperke', 'scottish deerhound', 'scottish terrier', 'sealyham terrier', 'sharpei', 'shepherd australian', 'shetland sheepdog', 'shiba', 'shihtzu', 'silky terrier', 'spanish waterdog', 'staffordshire bullterrier', 'standard poodle', 'stbernard', 'sussex spaniel', 'swiss mountain', 'tervuren', 'tibetan mastiff', 'tibetan terrier', 'toy poodle', 'toy terrier', 'vizsla', 'walker hound', 'weimaraner', 'welsh spaniel', 'welsh terrier', 'westhighland terrier', 'wheaten terrier', 'whippet', 'yorkshire terrier'])
 
 
 
 
15
 
16
  def classify_image(img):
17
+ _, _, probs = learn.predict(img)
18
+ return dict(zip(dog_breeds, map(float, probs)))
19
 
20
  learn = load_learner('dogIdentifierModel.pkl')
21
 
22
+ image = gr.components.Image(shape=(396, 396))
23
  label = gr.components.Label()
24
  examples = ['golden-retriever.jpg', 'german-shepherd.jpg', 'doberman.jpg', 'husky.jpg']
25
 
dogIdentifierModel.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00b112b5934fbc8b427263108210a43cb4f713859410075a3b0ce501cf7f6f8f
3
- size 115166765
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74f0e6053ff9c2449e247c70ca896f163f23824ac02bcffc38d7a3c53f083eb3
3
+ size 115323011
modelCreation.py CHANGED
@@ -1,81 +1,79 @@
1
- import timm
2
  from fastcore.all import *
3
  from fastdownload import download_url
4
  from fastai.vision.widgets import *
5
  from fastai.vision.all import *
6
- import os, shutil
7
- import time
8
  import requests
9
- import json
10
- from azure.cognitiveservices.search.imagesearch import ImageSearchClient as api
11
- from msrest.authentication import CognitiveServicesCredentials as auth
12
 
13
-
14
- def search_images_bing(key, term, min_sz=128, max_images=105):
15
- params = {'q': term, 'count': max_images, 'min_height': min_sz, 'min_width': min_sz}
16
- headers = {"Ocp-Apim-Subscription-Key": key}
17
- search_url = "https://api.bing.microsoft.com/v7.0/images/search"
18
- response = requests.get(search_url, headers=headers, params=params)
19
- response.raise_for_status()
20
- search_results = response.json()
21
- return L(search_results['value'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
 
24
  URL = 'https://dog.ceo/api/breeds/list/all'
25
 
 
26
  result = requests.get(url = URL).json()
27
- searchText = []
28
  for val in result['message'].items():
29
  if len(val[1]) > 0:
30
  for type in val[1]:
31
- searchText.append(f'{type} {val[0]}')
32
  else:
33
- searchText.append(val[0])
34
 
35
  path = Path('Dog_Types')
36
 
37
- key = os.environ.get('AZURE_SEARCH_KEY', 'INSERT KEY HERE')
38
-
39
- if not path.exists():
40
- path.mkdir()
41
- else:
42
- folder = 'Dog_Types'
43
- for filename in os.listdir(folder):
44
- file_path = os.path.join(folder, filename)
45
- try:
46
- if os.path.isfile(file_path) or os.path.islink(file_path):
47
- os.unlink(file_path)
48
- elif os.path.isdir(file_path):
49
- shutil.rmtree(file_path)
50
- except Exception as e:
51
- print('Failed to delete %s. Reason: %s' % (file_path, e))
52
 
53
- for o in searchText:
54
- try:
55
- dest = (path / o)
56
- dest.mkdir(exist_ok=True, parents=True)
57
- results = search_images_bing(key, f'{o} dog photos')
58
- download_images(dest, urls=results.attrgot('contentUrl'))
59
- resize_images(path/o, max_size=460, dest=path/o)
60
- except shutil.SameFileError:
61
- pass
62
 
63
- for breed in searchText:
 
 
64
  failed = verify_images(get_image_files(f'{path}/{breed}'))
65
  failed.map(Path.unlink)
66
 
67
 
 
 
 
 
68
  dataloaders = DataBlock(
69
  blocks=(ImageBlock, CategoryBlock),
70
  get_items=get_image_files,
71
  splitter=RandomSplitter(valid_pct=0.2, seed=42),
72
  get_y=parent_label,
73
- item_tfms=Resize(460),
74
- batch_tfms=aug_transforms(size=224, min_scale=0.75)
75
  ).dataloaders(path)
76
 
77
-
 
78
  learn = vision_learner(dataloaders, 'convnext_tiny_in22k', metrics=error_rate).to_fp16()
 
 
 
79
  learn.fine_tune(8, freeze_epochs=3)
80
 
81
  learn.export('dogIdentifierModel.pkl')
 
 
1
  from fastcore.all import *
2
  from fastdownload import download_url
3
  from fastai.vision.widgets import *
4
  from fastai.vision.all import *
5
+ import os
 
6
  import requests
 
 
 
7
 
8
+ def search_and_download_images(api_key, breeds, download_path, num_images=10):
9
+ headers = {
10
+ 'Ocp-Apim-Subscription-Key': api_key
11
+ }
12
+
13
+ for breed in breeds:
14
+ # Create a directory for the breed if it doesn't exist
15
+ breed_dir = os.path.join(download_path, breed)
16
+ os.makedirs(breed_dir, exist_ok=True)
17
+
18
+ # Initialize a counter for the number of successfully downloaded images
19
+ downloaded_count = 0
20
+
21
+ # Make the API request
22
+ params = {
23
+ 'q': f'{breed} dog',
24
+ 'count': num_images
25
+ }
26
+ response = requests.get('https://api.bing.microsoft.com/v7.0/images/search', headers=headers, params=params)
27
+
28
+ # L() is from fastai
29
+ results = L(response.json()['value'])
30
+ download_images(breed_dir, urls=results.attrgot('contentUrl'))
31
 
32
 
33
  URL = 'https://dog.ceo/api/breeds/list/all'
34
 
35
+ # Get the breeds of all the dogs (some of them require reformatting)
36
  result = requests.get(url = URL).json()
37
+ dog_breeds = []
38
  for val in result['message'].items():
39
  if len(val[1]) > 0:
40
  for type in val[1]:
41
+ dog_breeds.append(f'{type} {val[0]}')
42
  else:
43
+ dog_breeds.append(val[0])
44
 
45
  path = Path('Dog_Types')
46
 
47
+ api_key = os.environ.get('AZURE_SEARCH_KEY', 'INSERT KEY HERE')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
+ search_and_download_images(api_key, dog_breeds, path, num_images=200)
 
 
 
 
 
 
 
 
50
 
51
+ # Ensure that all images are able to be opened.
52
+ # If they cannot be opened, remove them
53
+ for breed in dog_breeds:
54
  failed = verify_images(get_image_files(f'{path}/{breed}'))
55
  failed.map(Path.unlink)
56
 
57
 
58
+ # Load the data into fastai datablock
59
+ # In this we randomly split the data into train, validation, and test
60
+ # Resize the data to be 396x396 px
61
+ # Also perform tansforms (in this way, we are able to get imperfect data to train on)
62
  dataloaders = DataBlock(
63
  blocks=(ImageBlock, CategoryBlock),
64
  get_items=get_image_files,
65
  splitter=RandomSplitter(valid_pct=0.2, seed=42),
66
  get_y=parent_label,
67
+ item_tfms=Resize(396),
68
+ batch_tfms=aug_transforms(size=396, min_scale=0.75)
69
  ).dataloaders(path)
70
 
71
+ # Load the data into the Convnext-22k
72
+ # NOTE: When this happens, fastai will adjust the start of the model according to the input size of your data
73
  learn = vision_learner(dataloaders, 'convnext_tiny_in22k', metrics=error_rate).to_fp16()
74
+ # Because of this, for 3 epochs, I decided to freeze the weights of the pretrained model (as this has already been optimized)
75
+ # Only the additional layers that were added will adjust in the first 3 epochs
76
+ # After the 3 epochs, the model will update all weights
77
  learn.fine_tune(8, freeze_epochs=3)
78
 
79
  learn.export('dogIdentifierModel.pkl')
requirements.txt CHANGED
@@ -1,7 +1,8 @@
1
- timm==0.6.7
2
- fastai==2.7.4
3
- torch==1.11.0
 
4
  gradio==3.1.4
5
- numpy==1.23.0
6
- pandas==1.4.3
7
- azure-cognitiveservices-search-imagesearch==2.0.0
 
1
+ matplotlib==3.5.2
2
+ timm==0.9.7
3
+ fastai==2.7.12
4
+ torch==2.0.0
5
  gradio==3.1.4
6
+ numpy==1.23.5
7
+ pandas==2.0.2
8
+ Pillow==9.2.0