File size: 12,552 Bytes
bf955de
e3b75a8
34f782e
 
cd434be
 
 
f47deed
 
 
 
 
 
e3b75a8
f47deed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5c27c2
f47deed
 
 
 
 
 
34f782e
e3b75a8
34f782e
 
e3b75a8
 
 
 
bc3c0e3
e3b75a8
14a5102
e3b75a8
 
 
14a5102
e3b75a8
 
 
 
14a5102
e3b75a8
14a5102
e3b75a8
14a5102
e3b75a8
14a5102
e3b75a8
14a5102
e3b75a8
14a5102
e3b75a8
14a5102
e3b75a8
14a5102
 
fcd588e
 
 
 
 
 
 
 
 
 
 
 
e3b75a8
 
fcd588e
 
 
 
 
cb4e839
fcd588e
 
 
 
 
e3b75a8
 
 
 
 
 
 
14a5102
e3b75a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
005ffc6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d687a5f
 
 
005ffc6
 
1e6e6f3
 
e3b75a8
 
5f73400
e3b75a8
 
 
 
 
 
31e3ee7
e3b75a8
598a9af
e3b75a8
 
 
d687a5f
e3b75a8
 
 
 
747cf34
e3b75a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14a5102
e3b75a8
 
 
 
 
 
14a5102
 
e3b75a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ff49b8
e3b75a8
 
 
 
 
 
 
5f73400
e3b75a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14a5102
29a7b7b
 
e3b75a8
14a5102
f47deed
e3b75a8
 
 
 
14a5102
e3b75a8
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
import streamlit as st

# x = st.slider("Select a value")
# st.write(x, "squared is", x * x)
st.title('Welcome to the Labelbox custom classifier training application!')
st.header('In this module you will be able to add data to your instance if you dont already have it, and then use bulk classification to train a custom image classification model')
st.subheader("If you don't have data in your org, enter your API Click the button below! Otherwise, Skip to section 2")
# -*- coding: utf-8 -*-
"""
Original file is located at
    https://colab.research.google.com/drive/1nOSff67KXhNgX_XSfnv3xnddobRoaK0d

"""

api_key = st.text_input("Enter your api key:", type="password")

import labelbox
import labelpandas as lp
import os
import pandas as pd
from tensorflow.python.lib.io import file_io
import io
from pandas import read_csv

# read csv file from google cloud storage
def read_data(gcs_path):
   file_stream = file_io.FileIO(gcs_path, mode='r')
   csv_data = read_csv(io.StringIO(file_stream.read()))
   return csv_data
def freedatatolb(amount_of_data):
    client = lp.Client(api_key)
    gcs_path = 'https://storage.googleapis.com/solution_accelerator_datasets/images_styles.csv'
    df = pd.read_csv(gcs_path)
    df = df.drop(['id', 'season', 'usage', 'year',"gender", "masterCategory", "subCategory", "articleType","baseColour"], axis =1)
    fields ={"row_data":["link"], # Column containing URL to asset (single)
        "global_key": ['filename'], # Column containing globalkey value (single, unique)
        "external_id": ["productDisplayName"], # Column containing external ID value (single)
        "metadata_string": [], # Column containing string metadata values (multiple)
        "metadata_number": [], # Column containing number metadata values (multiple)
        "metadata_datetime": [] # Column containing datetime metadata values (multiple, must be ISO 8601)
        }
    columns = {}

    for field in fields.keys():
        for name in fields[field]:
            if field.startswith('metadata'):
                columns[name] = f"{field.split('_')[0]}///{field.split('_')[1]}///{name}"
            else:
                columns[name] = field
    new_df = df.rename(columns=(columns))
    testdf = new_df.head(amount_of_data)
    dataset_id = client.lb_client.create_dataset(name = str(gcs_path.split('/')[-1])).uid
# dataset_id = client.lb_client.get_dataset("c4b7prd6207850000lljx2hr8").uid
    results = client.create_data_rows_from_table(
        table = testdf,
        dataset_id = dataset_id,
        skip_duplicates = True, # If True, will skip data rows where a global key is already in use,
        verbose = True, # If True, prints information about code execution
    )
    return results
data_amount = st.slider("choose amout of data to add to labelbox", 250, 1000)
if st.button("Add data to your Labelbox"):
    st.write(f"adding {data_amount} datarows to Labelbox instance")
    bing = freedatatolb(data_amount)
    st.write(bing)
             
st.title("SECTION 2")
st.title("Auto Image classifier training and inference: Imagnet Weights")

# -*- coding: utf-8 -*-
"""
Original file is located at
    https://colab.research.google.com/drive/1CSyAE9DhwGTl7bLaSoo7QSyMuoEqJpCj

"""

def train_and_inference(api_key, ontology_id, model_run_id):
    # st.write('thisisstarting')
    api_key = api_key # insert Labelbox API key
    ontology_id = ontology_id # get the ontology ID from the Settings tab at the top left of your model run
    model_run_id = model_run_id #get the model run ID from the settings gear icon on the right side of your Model Run
    # st.write('1')
    import pydantic
    st.write(pydantic.__version__)

    import numpy as np
    # st.write('2')
    import tensorflow as tf
    # st.write('3')
    from tensorflow.keras import layers
    # st.write('4')
    from tensorflow.keras.models import Sequential
    # st.write('5')
    from tensorflow.keras.preprocessing.image import ImageDataGenerator
    # st.write('6')
    import os
    # st.write('7')
    import labelbox
    # st.write('zat')
    from labelbox import Client
    # st.write('8')
    # st.write('9')
    import numpy as np
    import tensorflow as tf
    from tensorflow.keras import layers
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.preprocessing.image import ImageDataGenerator
    import os
    from labelbox.schema.ontology import OntologyBuilder, Tool, Classification, Option
    from labelbox import Client, LabelingFrontend, LabelImport, MALPredictionImport
    from labelbox.data.annotation_types import (
        Label, ImageData, ObjectAnnotation, MaskData,
        Rectangle, Point, Line, Mask, Polygon,
        Radio, Checklist, Text,
        ClassificationAnnotation, ClassificationAnswer
    )
    from labelbox import MediaType
    from labelbox.data.serialization import NDJsonConverter
    import pandas as pd
    import shutil
    import labelbox.data
    import scipy

    import json
    import uuid
    import time
    import requests
    import pandas as pd
    import shutil

    import json
    import uuid
    import time
    import requests
    # st.write('imports')

    """Connect to labelbox client
    Define Model Variables
    """

    client = Client(api_key)
    EPOCHS = 10

    """#Setup Training

    Export Classifications from Model Run
    """

    model_run = client.get_model_run(model_run_id)

    client.enable_experimental = True
    data_json = model_run.export_labels(download=True)
    print(data_json)

    """Separate datarows into folders."""

    import requests
    import os

    from urllib.parse import unquote

    def download_and_save_image(url, destination_folder, filename):
        try:
            # Decode the URL
            url = unquote(url)

            # Ensure destination directory exists
            if not os.path.exists(destination_folder):
                os.makedirs(destination_folder)

            # Start the download process
            response = requests.get(url, stream=True)

            # Check if the request was successful
            if response.status_code == 200:
                file_path = os.path.join(destination_folder, filename)
                with open(file_path, 'wb') as file:
                    for chunk in response.iter_content(8192):
                        file.write(chunk)
            #     st.write(f"Image downloaded and saved: {file_path}")
            # else:
            #     st.write(f"Failed to download the image. Status code: {response.status_code}")
        except Exception as e:
            st.write(f"An error occurred: {e}")



    BASE_DIR = 'dataset'
    labeldict = {}

    for entry in data_json:
        data_split = entry['Data Split']
        if data_split not in ['training', 'validation']:  # we are skipping 'test' for now
            continue

        image_url = f"{entry['Labeled Data']}"
        label = entry['Label']['classifications'][0]['answer']['value']
        labeldict[label] = entry['Label']['classifications'][0]['answer']['title']

        destination_folder = os.path.join(BASE_DIR, data_split, label)
        filename = os.path.basename(image_url)
        # st.write(filename)

        download_and_save_image(image_url, destination_folder, filename)

    """#Train Model"""
    st.write(labeldict)

    import tensorflow as tf
    from tensorflow.keras.preprocessing.image import ImageDataGenerator
    from tensorflow.keras.applications import MobileNetV2
    from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
    from tensorflow.keras.models import Model
    from tensorflow.keras.optimizers import Adam

    TRAIN_DIR = 'dataset/training'
    VALIDATION_DIR = 'dataset/validation'
    IMG_HEIGHT, IMG_WIDTH = 224, 224  # default size for MobileNetV2
    BATCH_SIZE = 32

    train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest'
    )

    validation_datagen = ImageDataGenerator(rescale=1./255)

    train_ds = train_datagen.flow_from_directory(
        TRAIN_DIR,
        target_size=(IMG_HEIGHT, IMG_WIDTH),
        batch_size=BATCH_SIZE,
        class_mode='categorical'
    )

    validation_ds = validation_datagen.flow_from_directory(
        VALIDATION_DIR,
        target_size=(IMG_HEIGHT, IMG_WIDTH),
        batch_size=BATCH_SIZE,
        class_mode='categorical'
    )

    base_model = MobileNetV2(input_shape=(IMG_HEIGHT, IMG_WIDTH, 3),
                            include_top=False,
                            weights='imagenet')

    # Freeze the base model
    for layer in base_model.layers:
        layer.trainable = False

    # Create custom classification head
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    predictions = Dense(train_ds.num_classes, activation='softmax')(x)

    model = Model(inputs=base_model.input, outputs=predictions)

    model.compile(optimizer=Adam(learning_rate=0.0001),
                loss='categorical_crossentropy',
                metrics=['accuracy'])

    st.write("training")
    history = model.fit(
        train_ds,
        validation_data=validation_ds,
        epochs=EPOCHS
    )

    """Run Inference on Model run Datarows"""
    st.write('running Inference')

    import numpy as np
    import requests
    from tensorflow.keras.preprocessing import image
    from PIL import Image
    from io import BytesIO
    # Fetch the image from the URL
    def load_image_from_url(img_url, target_size=(224, 224)):
        response = requests.get(img_url)
        img = Image.open(BytesIO(response.content))
        img = img.resize(target_size)
        img_array = image.img_to_array(img)
        return np.expand_dims(img_array, axis=0)
    def make_prediction(img_url):
    # Image URL
        img_url = img_url

        # Load and preprocess the image
        img_data = load_image_from_url(img_url)
        img_data = img_data / 255.0  # Normalize the image data to [0,1]

        # Make predictions
        predictions = model.predict(img_data)
        predicted_class = np.argmax(predictions[0])

        # Retrieve the confidence score (probability) for the predicted class
        confidence = predictions[0][predicted_class]

        # Map the predicted class index to its corresponding label
        class_map = train_ds.class_indices
        inverse_map = {v: k for k, v in class_map.items()}
        predicted_label = inverse_map[predicted_class]

        return predicted_label, confidence

    from tensorflow.errors import InvalidArgumentError  # Add this import
    ontology = client.get_ontology(ontology_id)
    label_list = []
    st.write(ontology)
    for datarow in model_run.export_labels(download=True):
        try:
            label, confidence = make_prediction(datarow['Labeled Data'])
        except InvalidArgumentError as e:
            print(f"InvalidArgumentError: {e}. Skipping this data row.")
            continue  # Skip to the next datarow if an exception occurs
        my_checklist_answer = ClassificationAnswer(
                    name = labeldict[label.lower()],
                    confidence=confidence)
        checklist_prediction = ClassificationAnnotation(
    name=ontology.classifications()[0].instructions,
        value=Radio(
            answer = my_checklist_answer
    ))
    # print(datarow["DataRow ID"])
        label_prediction = Label(
        data=ImageData(uid=datarow['DataRow ID']),
        annotations = [checklist_prediction])
        label_list.append(label_prediction)

    prediction_import = model_run.add_predictions(
        name="prediction_upload_job"+str(uuid.uuid4()),
        predictions=label_list)

    prediction_import.wait_until_done()

    st.write(prediction_import.errors == [])
    if prediction_import.errors == []:
        return "Model Trained and inference ran successfully"
    else:
        return prediction_import.errors

st.title("Enter Applicable IDs and keys below")

model_run_id = st.text_input("Enter your model run ID:")
ontology_id = st.text_input("Enter your ontology ID:")

if st.button("Train and run inference"):
    st.write('Starting Up...')
        # Check if the key is not empty
    if api_key + model_run_id + ontology_id:
        result = train_and_inference(api_key, ontology_id, model_run_id)
        st.write(result)
    else:
        st.warning("Please enter all keys.")