WoodLB commited on
Commit
f47deed
·
1 Parent(s): e5e60bd

data ingestion + training

Browse files
Files changed (1) hide show
  1. app.py +61 -1
app.py CHANGED
@@ -2,7 +2,67 @@ import streamlit as st
2
 
3
  # x = st.slider("Select a value")
4
  # st.write(x, "squared is", x * x)
 
 
 
 
 
 
 
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  st.title("Auto Image classifier training and inference: Imagnet Weights")
7
 
8
  # -*- coding: utf-8 -*-
@@ -284,7 +344,7 @@ def train_and_inference(api_key, ontology_id, model_run_id):
284
  return prediction_import.errors
285
 
286
  st.title("Enter Applicable IDs and keys below")
287
- api_key = st.text_input("Enter your api key:", type="password")
288
  model_run_id = st.text_input("Enter your model run ID:")
289
  ontology_id = st.text_input("Enter your ontology ID:")
290
 
 
2
 
3
  # x = st.slider("Select a value")
4
  # st.write(x, "squared is", x * x)
5
+ st.title("If you don't have data in your org, enter your API Click the button below! Otherwise, Skep to section 2")
6
+ # -*- coding: utf-8 -*-
7
+ """
8
+ Original file is located at
9
+ https://colab.research.google.com/drive/1nOSff67KXhNgX_XSfnv3xnddobRoaK0d
10
+
11
+ """
12
 
13
+ api_key = st.text_input("Enter your api key:", type="password")
14
+
15
+ import labelbox
16
+ import labelpandas as lp
17
+ import os
18
+ import pandas as pd
19
+ from tensorflow.python.lib.io import file_io
20
+ import io
21
+ from pandas import read_csv
22
+
23
+ # read csv file from google cloud storage
24
+ def read_data(gcs_path):
25
+ file_stream = file_io.FileIO(gcs_path, mode='r')
26
+ csv_data = read_csv(io.StringIO(file_stream.read()))
27
+ return csv_data
28
+ def freedatatolb(amount_of_data):
29
+ client = lp.Client(api_key)
30
+ gcs_path = 'https://storage.googleapis.com/solution_accelerator_datasets/images_styles.csv'
31
+ df = pd.read_csv(gcs_path)
32
+ df = df.drop(['id', 'season', 'usage', 'year',"gender", "masterCategory", "subCategory", "articleType","baseColour"], axis =1)
33
+ fields ={"row_data":["link"], # Column containing URL to asset (single)
34
+ "global_key": ['filename'], # Column containing globalkey value (single, unique)
35
+ "external_id": ["productDisplayName"], # Column containing external ID value (single)
36
+ "metadata_string": [], # Column containing string metadata values (multiple)
37
+ "metadata_number": [], # Column containing number metadata values (multiple)
38
+ "metadata_datetime": [] # Column containing datetime metadata values (multiple, must be ISO 8601)
39
+ }
40
+ columns = {}
41
+
42
+ for field in fields.keys():
43
+ for name in fields[field]:
44
+ if field.startswith('metadata'):
45
+ columns[name] = f"{field.split('_')[0]}///{field.split('_')[1]}///{name}"
46
+ else:
47
+ columns[name] = field
48
+ new_df = df.rename(columns=(columns))
49
+ testdf = new_df.head(amount_of_data)
50
+ dataset_id = client.lb_client.create_dataset(name = str(gcs_path.split('/')[-1])).uid
51
+ # dataset_id = client.lb_client.get_dataset("c4b7prd6207850000lljx2hr8").uid
52
+ results = client.create_data_rows_from_table(
53
+ table = testdf,
54
+ dataset_id = dataset_id,
55
+ skip_duplicates = True, # If True, will skip data rows where a global key is already in use,
56
+ verbose = True, # If True, prints information about code execution
57
+ )
58
+ return results
59
+ data_amount = st.slider("choose amout of data to add to labelbox", 100, 500)
60
+ if st.button("Add data to your Labelbox"):
61
+ st.write(f"adding {data_amount} datarows to Labelbox instance")
62
+ bing = freedatatolb(data_amount)
63
+ st.write(bing)
64
+
65
+ st.title("SECTION 2")
66
  st.title("Auto Image classifier training and inference: Imagnet Weights")
67
 
68
  # -*- coding: utf-8 -*-
 
344
  return prediction_import.errors
345
 
346
  st.title("Enter Applicable IDs and keys below")
347
+
348
  model_run_id = st.text_input("Enter your model run ID:")
349
  ontology_id = st.text_input("Enter your ontology ID:")
350