dwancin commited on
Commit
a5661ba
1 Parent(s): 53f7030

sklearn has been imported

Browse files
Files changed (3) hide show
  1. README.md +3 -3
  2. app.py +99 -166
  3. requirements.txt +2 -1
README.md CHANGED
@@ -5,7 +5,7 @@ emoji: 🤖
5
  colorFrom: gray
6
  colorTo: gray
7
  sdk: gradio
8
- sdk_version: 4.27.0
9
  app_file: app.py
10
  pinned: true
11
  header: mini
@@ -19,8 +19,8 @@ tags:
19
  - computer-vision
20
  - face-recognition
21
  ---
22
- # Face Recognition
23
- Match and identify faces with help of a comprehensive dataset of facial emeddings.
24
 
25
  ## Dataset
26
  ### Details
 
5
  colorFrom: gray
6
  colorTo: gray
7
  sdk: gradio
8
+ sdk_version: 4.28.0
9
  app_file: app.py
10
  pinned: true
11
  header: mini
 
19
  - computer-vision
20
  - face-recognition
21
  ---
22
+ # Face Match
23
+ Upload an image, and the system will find the most similar face in our dataset.
24
 
25
  ## Dataset
26
  ### Details
app.py CHANGED
@@ -1,185 +1,118 @@
1
- import os, sys, uuid
2
- import requests
3
- import tempfile
4
- import datetime
5
- import gradio as gr
6
- import face_recognition
7
  import numpy as np
8
  import pandas as pd
 
9
  from huggingface_hub import HfFileSystem, hf_hub_download
10
- from PIL import Image, UnidentifiedImageError
11
- from io import BytesIO
12
-
13
- # Constants
14
- hf_token = os.environ.get("HF_TOKEN")
15
- space_subdomain = os.environ['SPACE_SUBDOMAIN']
16
- dataset = os.environ['DATASET']
17
- hf_file_system = HfFileSystem()
18
- max_distance = 1.0
19
- timeout = 30
20
 
21
- # Function to get images with authentication
22
- def get_image_with_auth(file_name):
23
- image_path = hf_hub_download(repo_id=dataset, repo_type="dataset", filename=file_name, use_auth_token=hf_token)
24
- img = Image.open(image_path)
25
- return img
26
 
27
- # Load dataset
28
- with hf_file_system.open(f'datasets/{dataset}/metadata.csv', revision='main', token=True) as file:
29
- df = pd.read_csv(file)
30
 
31
- # Convert string embeddings to numpy arrays
32
- def convert_embeddings(embedding_str):
33
- embedding = np.fromstring(embedding_str.strip('[]'), sep=',')
34
- return embedding
 
 
 
35
 
36
- df['embeddings'] = df['embeddings'].apply(convert_embeddings)
37
 
38
- # Load image from URL
39
- def load_image_from_url(url, timeout=timeout):
40
- try:
41
- response = requests.get(url, timeout=timeout)
42
- response.raise_for_status() # Raises an HTTPError for certain status codes
43
- img = Image.open(BytesIO(response.content)).convert('RGB')
44
- return img
45
- except requests.exceptions.Timeout:
46
- # Handle the timeout differently, maybe retry or return a specific message
47
- raise ValueError("The request timed out. Please try again later.")
48
- except requests.exceptions.HTTPError as e:
49
- if e.response.status_code == 429:
50
- raise ValueError("Too many requests: please wait a moment and try again.")
51
- else:
52
- raise ValueError(f"HTTP error: {e}")
53
- except requests.exceptions.RequestException as e:
54
- raise ValueError(f"Error fetching image from URL: {e}")
55
- except UnidentifiedImageError:
56
- raise ValueError("Cannot identify image file. Ensure the URL points to a valid image.")
57
 
58
-
59
- # Compute facial embeddings for the uploaded image
60
  def get_embedding(image):
61
- face_encodings = face_recognition.face_encodings(np.array(image))
62
- if face_encodings:
63
- return face_encodings[0]
64
- else:
65
- raise ValueError("No faces detected in the image or unsupported image format.")
66
 
67
- # Find the most similar faces
68
  def find_matching_face(embedding, df):
69
- df['distance'] = df['embeddings'].apply(lambda x: np.linalg.norm(x - embedding))
70
- best_match = df.sort_values('distance').iloc[0]
71
- return best_match, best_match['distance']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
- # The main function to recognize face and return the most similar face from the dataset
74
  def recognize_face(input_image_path):
75
- try:
76
- # Get initiated time
77
- start_time = datetime.datetime.now()
78
-
79
- # Load the input image directly from the provided path
80
- input_image = Image.open(input_image_path)
81
-
82
- # Compute facial embeddings for the uploaded image
83
- embedding = get_embedding(input_image)
84
-
85
- # Find the most similar face in the dataset and the distance score
86
- most_similar_face, output_score = find_matching_face(embedding, df)
87
-
88
- # Normalize the distance and convert it to a similarity percentage
89
- output_percentage = max(0, (1 - (output_score / max_distance))) * 100
90
-
91
- # Categorise the similarity percentages
92
- if output_percentage > 55:
93
- output_similarity = "very high"
94
- elif output_percentage > 53:
95
- output_similarity = "high"
96
- elif output_percentage > 50:
97
- output_similarity = "medium"
98
- elif output_percentage > 48:
99
- output_similarity = "low"
100
- else:
101
- output_similarity = "very low"
102
-
103
- # Retrieve image using the file name from the dataset
104
- similar_face_image = get_image_with_auth(most_similar_face['file_name'])
105
-
106
- # Generate a unique filename and save the file in the expected Gradio directory
107
- unique_filename = str(uuid.uuid4()) + ".jpg"
108
- gradio_tmp_dir = "/tmp/gradio/"
109
- os.makedirs(gradio_tmp_dir, exist_ok=True)
110
- file_path = os.path.join(gradio_tmp_dir, unique_filename)
111
- similar_face_image.save(file_path)
112
-
113
- # Get name of the matching face
114
- output_name = f"{most_similar_face['text']}"
115
-
116
- # Get the similarity percentage
117
- output_percentage = f"{output_percentage:.2f}%"
118
-
119
- # Construct the URL using the unique filename
120
- output_image = file_path
121
-
122
- # Get completed time
123
- end_time = datetime.datetime.now()
124
-
125
- # YAML script showing input and outputs
126
- output_code = (f'''```yaml
127
- inputs:
128
- - time: "{start_time}"
129
- - image: "{input_image_path}"
130
- outputs:
131
- - time: "{end_time}"
132
- - image: "{most_similar_face['file_name']}"
133
- - name: "{output_name}"
134
- - similarity: "{output_similarity}"
135
- ```''')
136
-
137
- # print output code
138
- print(output_code)
139
-
140
- print(f"procentage: {output_percentage}")
141
-
142
- # Return the outputs
143
- return output_image, output_code
144
-
145
- except Exception as e:
146
- output_code = f'''error: "{e}"'''
147
- return None, output_code
148
 
149
- def update_code(output_code):
150
  return gr.update(visible=True)
151
 
152
- css = '''
153
- .gradio-container { max-width: 700px !important; }
154
- .source-selection { display: none !important; }
155
- .svelte-1pijsyv { border-radius: 0 !important; }
156
- .clear { max-width: 140px; }
157
- .submit { max-width: 240px; }
158
- '''
159
-
160
- # Gradio Blocks UI
161
- with gr.Blocks(css=css) as demo:
162
- with gr.Row(equal_height=False):
 
 
 
 
 
163
  with gr.Column():
164
- title = gr.HTML('''<h1><center>Face Match</center></h1>''', elem_classes="title")
165
- subtitle = gr.HTML('''<h3><center>Upload an Image to Identify Faces</center></h3>''', elem_classes="subtitle")
 
 
 
 
166
  with gr.Row():
167
- with gr.Column():
168
- with gr.Group():
169
- with gr.Row(equal_height=True):
170
- input_image = gr.Image(label="Input", show_label=False, type="filepath", interactive=True, elem_classes="image")
171
- output_image = gr.Image(label="Output", show_label=False, type="filepath", interactive=False, elem_classes="image")
172
- with gr.Row():
173
- output_code = gr.Markdown(visible=False, elem_classes="code")
174
- with gr.Row():
175
- clear = gr.ClearButton([input_image, output_image, output_code], elem_classes="clear")
176
- submit = gr.Button("Submit", variant="primary", elem_classes="submit")
177
- with gr.Row():
178
- examples = gr.Examples(["examples/0001.png", "examples/0002.png", "examples/0003.png", "examples/0004.png"], input_image)
179
-
180
-
181
- output_image.change(fn=update_code, inputs=output_code, outputs=output_code)
182
- submit.click(fn=recognize_face, inputs=input_image, outputs=[output_image, output_code])
183
-
184
- # Launch the Gradio UI
185
- demo.launch(server_name="0.0.0.0", server_port=7860, debug=True, show_api=False)
 
1
+ import os
 
 
 
 
 
2
  import numpy as np
3
  import pandas as pd
4
+ from PIL import Image
5
  from huggingface_hub import HfFileSystem, hf_hub_download
6
+ import face_recognition
7
+ import gradio as gr
8
+ from sklearn.metrics import euclidean_distances
 
 
 
 
 
 
 
9
 
10
+ # Environment Variables
11
+ HF_TOKEN = os.getenv("HF_TOKEN")
12
+ HF_DATASET = os.getenv('HF_DATASET')
 
 
13
 
14
+ # Hugging Face File System for authentication
15
+ fs = HfFileSystem()
 
16
 
17
+ # Load and prepare dataset
18
+ def load_dataset():
19
+ """Load dataset metadata and embeddings, converting embeddings to numpy arrays."""
20
+ with fs.open(f'datasets/{HF_DATASET}/metadata.csv', revision='main', token=True) as f:
21
+ df = pd.read_csv(f)
22
+ df['embeddings'] = df['embeddings'].apply(lambda x: np.fromstring(x.strip('[]'), sep=','))
23
+ return df
24
 
25
+ df = load_dataset()
26
 
27
+ # Function to fetch images with authentication
28
+ def get_image_with_auth(file_name):
29
+ """Retrieve an image using Hugging Face's hub with authentication."""
30
+ image_path = hf_hub_download(repo_id=HF_DATASET, repo_type="dataset", filename=file_name, token=HF_TOKEN)
31
+ return Image.open(image_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ # Utility function to load and process image
 
34
  def get_embedding(image):
35
+ """Generate facial embeddings from an image using the face_recognition library."""
36
+ encodings = face_recognition.face_encodings(np.array(image))
37
+ return encodings[0] if encodings else None
 
 
38
 
39
+ # Matching face using sklearn
40
  def find_matching_face(embedding, df):
41
+ """Find the most similar face in the dataset using Euclidean distance."""
42
+ embeddings_matrix = np.stack(df['embeddings'].values)
43
+ distances = euclidean_distances([embedding], embeddings_matrix)[0]
44
+ min_index = np.argmin(distances)
45
+ return df.iloc[min_index], distances[min_index]
46
+
47
+ # Determine the quality of the match
48
+ def categorize_similarity(distance):
49
+ """Categorize the similarity based on the Euclidean distance."""
50
+ if distance < 0.3:
51
+ return "Very High Match"
52
+ elif distance < 0.5:
53
+ return "High Match"
54
+ elif distance < 0.7:
55
+ return "Moderate Match"
56
+ elif distance < 1.0:
57
+ return "Low Match"
58
+ else:
59
+ return "Very Low Match"
60
 
61
+ # Main function to process the face matching
62
  def recognize_face(input_image_path):
63
+ """Process an uploaded image to find the most similar face in the dataset."""
64
+ input_image = Image.open(input_image_path).convert('RGB')
65
+ embedding = get_embedding(input_image)
66
+ if embedding is None:
67
+ return None, "No face detected."
68
+ most_similar_face, distance = find_matching_face(embedding, df)
69
+ match_quality = categorize_similarity(distance)
70
+ similar_face_image_path = get_image_with_auth(most_similar_face['file_name'])
71
+ info_message = f'''```yaml
72
+ input:
73
+ - image: "{input_image_path}"
74
+ output:
75
+ - name: "{most_similar_face['text']}"
76
+ - quality: "{match_quality}"
77
+ - distance: {100 * (1 - distance)}
78
+ ```
79
+ '''
80
+ print(info_message)
81
+ return similar_face_image_path, info_message
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
+ def update(output_info):
84
  return gr.update(visible=True)
85
 
86
+ # Gradio setup
87
+ with gr.Blocks(
88
+ analytics_enabled=False,
89
+ title="Face Match",
90
+ css='''
91
+ .gradio-container { max-width: 700px !important; }
92
+ .source-selection { display: none !important; }
93
+ #clear { max-width: 140px; }
94
+ #submit { max-width: 240px; }
95
+ .svelte-1pijsyv { border-radius: 0 !important; }
96
+ .svelte-s6ybro { display: none !important; }
97
+ '''
98
+ ) as demo:
99
+ title = gr.HTML("<h1><center>Face Match</center></h1>")
100
+ subtitle = gr.HTML("<h3><center>Upload an image, and the system will find the most similar face in our dataset.</center></h3>")
101
+ with gr.Row():
102
  with gr.Column():
103
+ with gr.Group():
104
+ with gr.Row(equal_height=True):
105
+ input_image = gr.Image(type="filepath", show_label=False, interactive=True)
106
+ output_image = gr.Image(type="filepath", show_label=False, interactive=False, show_share_button=False, show_download_button=False)
107
+ with gr.Row():
108
+ output_info = gr.Markdown(visible=False)
109
  with gr.Row():
110
+ clear = gr.ClearButton([input_image, output_image, output_info], elem_id="clear", elem_classes="button")
111
+ submit = gr.Button("Submit", variant="primary", elem_id="submit", elem_classes="button")
112
+ with gr.Row():
113
+ examples = gr.Examples(["examples/0001.png", "examples/0002.png", "examples/0003.png", "examples/0004.png"], input_image)
114
+ output_image.change(fn=update, inputs=output_info, outputs=output_info)
115
+ submit.click(fn=recognize_face, inputs=input_image, outputs=[output_image, output_info])
116
+
117
+ # Launch
118
+ demo.launch(show_api=False)
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -2,4 +2,5 @@ gradio
2
  face_recognition
3
  numpy
4
  pillow
5
- pandas
 
 
2
  face_recognition
3
  numpy
4
  pillow
5
+ pandas
6
+ scikit-learn