Spaces:

ekenkel
/

dog-identifier

Build error

App Files Files Community

Erik Kenkel commited on Oct 24, 2023

Commit

3af8a88

1 Parent(s): 3856bb9

updated and fixed model

Browse files

Files changed (10) hide show

.idea/.gitignore +8 -0
.idea/dog-identifier.iml +10 -0
.idea/inspectionProfiles/profiles_settings.xml +6 -0
.idea/misc.xml +4 -0
.idea/modules.xml +8 -0
.idea/vcs.xml +6 -0
app.py +14 -16
dogIdentifierModel.pkl +2 -2
modelCreation.py +44 -46
requirements.txt +7 -6

.idea/.gitignore ADDED Viewed

	@@ -0,0 +1,8 @@

+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml

.idea/dog-identifier.iml ADDED Viewed

	@@ -0,0 +1,10 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <excludeFolder url="file://$MODULE_DIR$/venv" />
+    </content>
+    <orderEntry type="jdk" jdkName="Python 3.10 (dog-identifier)" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>

.idea/inspectionProfiles/profiles_settings.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

.idea/misc.xml ADDED Viewed

	@@ -0,0 +1,4 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (dog-identifier)" project-jdk-type="Python SDK" />
+</project>

.idea/modules.xml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/dog-identifier.iml" filepath="$PROJECT_DIR$/.idea/dog-identifier.iml" />
+    </modules>
+  </component>
+</project>

.idea/vcs.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>

app.py CHANGED Viewed

@@ -1,27 +1,25 @@
-from fastai.vision.all import *
 import gradio as gr
-import requests
-import json
-URL = 'https://dog.ceo/api/breeds/list/all'
-result = requests.get(url = URL).json()
-searchText = []
-for val in result['message'].items():
-    if len(val[1]) > 0:
-        for type in val[1]:
-            searchText.append(f'{type} {val[0]}')
-    else:
-        searchText.append(val[0])
-searchText.sort()
 def classify_image(img):
-    pred,idx,probs = learn.predict(img)
-    return dict(zip(searchText, map(float, probs)))
 learn = load_learner('dogIdentifierModel.pkl')
-image = gr.components.Image(shape=(224, 224))
 label = gr.components.Label()
 examples = ['golden-retriever.jpg', 'german-shepherd.jpg', 'doberman.jpg', 'husky.jpg']

+from fastai.vision.all import load_learner
 import gradio as gr
+import pathlib
+# For the posix path error: when you train your model on colab/gradient and download it, then do inference on Windows.
+# Redirect PosixPath to WindowsPath:
+temp = pathlib.PosixPath
+pathlib.PosixPath = pathlib.WindowsPath
+# Data below sourced from:
+# URL = 'https://dog.ceo/api/breeds/list/all'
+# To remain consistent, I initialized it as a tuple (previously broke when the API was utilized to get dog breeds)
+dog_breeds = tuple(['affenpinscher', 'afghan hound', 'african', 'airedale', 'akita', 'american terrier', 'appenzeller', 'australian cattledog', 'australian terrier', 'basenji', 'basset hound', 'beagle', 'bedlington terrier', 'bernese mountain', 'bichon frise', 'blenheim spaniel', 'blood hound', 'bluetick', 'border collie', 'border terrier', 'borzoi', 'boston bulldog', 'bouvier', 'boxer', 'brabancon', 'briard', 'brittany spaniel', 'bull mastiff', 'cairn terrier', 'cardigan corgi', 'caucasian ovcharka', 'cavapoo', 'chesapeake retriever', 'chihuahua', 'chow', 'clumber', 'cockapoo', 'cocker spaniel', 'coonhound', 'cotondetulear', 'curly retriever', 'dachshund', 'dalmatian', 'dandie terrier', 'dhole', 'dingo', 'doberman', 'english bulldog', 'english hound', 'english mastiff', 'english setter', 'english sheepdog', 'english springer', 'entlebucher', 'eskimo', 'flatcoated retriever', 'fox terrier', 'french bulldog', 'german pointer', 'germanlonghair pointer', 'germanshepherd', 'giant schnauzer', 'golden retriever', 'gordon setter', 'great dane', 'groenendael', 'havanese', 'husky', 'ibizan hound', 'irish setter', 'irish spaniel', 'irish terrier', 'irish wolfhound', 'italian greyhound', 'italian segugio', 'japanese spaniel', 'japanese spitz', 'keeshond', 'kelpie', 'kerryblue terrier', 'komondor', 'kuvasz', 'labradoodle', 'labrador', 'lakeland terrier', 'lapphund finnish', 'leonberg', 'lhasa', 'malamute', 'malinois', 'maltese', 'medium poodle', 'mexicanhairless', 'miniature pinscher', 'miniature poodle', 'miniature schnauzer', 'mix', 'newfoundland', 'norfolk terrier', 'norwegian buhund', 'norwegian elkhound', 'norwich terrier', 'otterhound', 'papillon', 'patterdale terrier', 'pekinese', 'pembroke', 'pitbull', 'plott hound', 'pomeranian', 'pug', 'puggle', 'pyrenees', 'redbone', 'rhodesian ridgeback', 'rottweiler', 'russell terrier', 'saluki', 'samoyed', 'schipperke', 'scottish deerhound', 'scottish terrier', 'sealyham terrier', 'sharpei', 'shepherd australian', 'shetland sheepdog', 'shiba', 'shihtzu', 'silky terrier', 'spanish waterdog', 'staffordshire bullterrier', 'standard poodle', 'stbernard', 'sussex spaniel', 'swiss mountain', 'tervuren', 'tibetan mastiff', 'tibetan terrier', 'toy poodle', 'toy terrier', 'vizsla', 'walker hound', 'weimaraner', 'welsh spaniel', 'welsh terrier', 'westhighland terrier', 'wheaten terrier', 'whippet', 'yorkshire terrier'])
 def classify_image(img):
+    _, _, probs = learn.predict(img)
+    return dict(zip(dog_breeds, map(float, probs)))
 learn = load_learner('dogIdentifierModel.pkl')
+image = gr.components.Image(shape=(396, 396))
 label = gr.components.Label()
 examples = ['golden-retriever.jpg', 'german-shepherd.jpg', 'doberman.jpg', 'husky.jpg']

dogIdentifierModel.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:00b112b5934fbc8b427263108210a43cb4f713859410075a3b0ce501cf7f6f8f
-size 115166765

 version https://git-lfs.github.com/spec/v1
+oid sha256:74f0e6053ff9c2449e247c70ca896f163f23824ac02bcffc38d7a3c53f083eb3
+size 115323011

modelCreation.py CHANGED Viewed

@@ -1,81 +1,79 @@
-import timm
 from fastcore.all import *
 from fastdownload import download_url
 from fastai.vision.widgets import *
 from fastai.vision.all import *
-import os, shutil
-import time
 import requests
-import json
-from azure.cognitiveservices.search.imagesearch import ImageSearchClient as api
-from msrest.authentication import CognitiveServicesCredentials as auth
-def search_images_bing(key, term, min_sz=128, max_images=105):
-    params = {'q': term, 'count': max_images, 'min_height': min_sz, 'min_width': min_sz}
-    headers = {"Ocp-Apim-Subscription-Key": key}
-    search_url = "https://api.bing.microsoft.com/v7.0/images/search"
-    response = requests.get(search_url, headers=headers, params=params)
-    response.raise_for_status()
-    search_results = response.json()
-    return L(search_results['value'])
 URL = 'https://dog.ceo/api/breeds/list/all'
 result = requests.get(url = URL).json()
-searchText = []
 for val in result['message'].items():
     if len(val[1]) > 0:
         for type in val[1]:
-            searchText.append(f'{type} {val[0]}')
     else:
-        searchText.append(val[0])
 path = Path('Dog_Types')
-key = os.environ.get('AZURE_SEARCH_KEY', 'INSERT KEY HERE')
-if not path.exists():
-    path.mkdir()
-else:
-    folder = 'Dog_Types'
-    for filename in os.listdir(folder):
-        file_path = os.path.join(folder, filename)
-        try:
-            if os.path.isfile(file_path) or os.path.islink(file_path):
-                os.unlink(file_path)
-            elif os.path.isdir(file_path):
-                shutil.rmtree(file_path)
-        except Exception as e:
-            print('Failed to delete %s. Reason: %s' % (file_path, e))
-for o in searchText:
-    try:
-        dest = (path / o)
-        dest.mkdir(exist_ok=True, parents=True)
-        results = search_images_bing(key, f'{o} dog photos')
-        download_images(dest, urls=results.attrgot('contentUrl'))
-        resize_images(path/o, max_size=460, dest=path/o)
-    except shutil.SameFileError:
-        pass
-for breed in searchText:
     failed = verify_images(get_image_files(f'{path}/{breed}'))
     failed.map(Path.unlink)
 dataloaders = DataBlock(
     blocks=(ImageBlock, CategoryBlock),
     get_items=get_image_files,
     splitter=RandomSplitter(valid_pct=0.2, seed=42),
     get_y=parent_label,
-    item_tfms=Resize(460),
-    batch_tfms=aug_transforms(size=224, min_scale=0.75)
 ).dataloaders(path)
 learn = vision_learner(dataloaders, 'convnext_tiny_in22k', metrics=error_rate).to_fp16()
 learn.fine_tune(8, freeze_epochs=3)
 learn.export('dogIdentifierModel.pkl')

 from fastcore.all import *
 from fastdownload import download_url
 from fastai.vision.widgets import *
 from fastai.vision.all import *
+import os
 import requests
+def search_and_download_images(api_key, breeds, download_path, num_images=10):
+    headers = {
+        'Ocp-Apim-Subscription-Key': api_key
+        }
+    for breed in breeds:
+        # Create a directory for the breed if it doesn't exist
+        breed_dir = os.path.join(download_path, breed)
+        os.makedirs(breed_dir, exist_ok=True)
+        # Initialize a counter for the number of successfully downloaded images
+        downloaded_count = 0
+        # Make the API request
+        params = {
+            'q': f'{breed} dog',
+            'count': num_images
+        }
+        response = requests.get('https://api.bing.microsoft.com/v7.0/images/search', headers=headers, params=params)
+        # L() is from fastai
+        results = L(response.json()['value'])
+        download_images(breed_dir, urls=results.attrgot('contentUrl'))
 URL = 'https://dog.ceo/api/breeds/list/all'
+# Get the breeds of all the dogs (some of them require reformatting)
 result = requests.get(url = URL).json()
+dog_breeds = []
 for val in result['message'].items():
     if len(val[1]) > 0:
         for type in val[1]:
+            dog_breeds.append(f'{type} {val[0]}')
     else:
+        dog_breeds.append(val[0])
 path = Path('Dog_Types')
+api_key = os.environ.get('AZURE_SEARCH_KEY', 'INSERT KEY HERE')
+search_and_download_images(api_key, dog_breeds, path, num_images=200)
+# Ensure that all images are able to be opened.
+# If they cannot be opened, remove them
+for breed in dog_breeds:
     failed = verify_images(get_image_files(f'{path}/{breed}'))
     failed.map(Path.unlink)
+# Load the data into fastai datablock
+# In this we randomly split the data into train, validation, and test
+# Resize the data to be 396x396 px
+# Also perform tansforms (in this way, we are able to get imperfect data to train on)
 dataloaders = DataBlock(
     blocks=(ImageBlock, CategoryBlock),
     get_items=get_image_files,
     splitter=RandomSplitter(valid_pct=0.2, seed=42),
     get_y=parent_label,
+    item_tfms=Resize(396),
+    batch_tfms=aug_transforms(size=396, min_scale=0.75)
 ).dataloaders(path)
+# Load the data into the Convnext-22k
+# NOTE: When this happens, fastai will adjust the start of the model according to the input size of your data
 learn = vision_learner(dataloaders, 'convnext_tiny_in22k', metrics=error_rate).to_fp16()
+# Because of this, for 3 epochs, I decided to freeze the weights of the pretrained model (as this has already been optimized)
+# Only the additional layers that were added will adjust in the first 3 epochs
+# After the 3 epochs, the model will update all weights
 learn.fine_tune(8, freeze_epochs=3)
 learn.export('dogIdentifierModel.pkl')

requirements.txt CHANGED Viewed

@@ -1,7 +1,8 @@
-timm==0.6.7
-fastai==2.7.4
-torch==1.11.0
 gradio==3.1.4
-numpy==1.23.0
-pandas==1.4.3
-azure-cognitiveservices-search-imagesearch==2.0.0

+matplotlib==3.5.2
+timm==0.9.7
+fastai==2.7.12
+torch==2.0.0
 gradio==3.1.4
+numpy==1.23.5
+pandas==2.0.2
+Pillow==9.2.0