Spaces:
Runtime error
Runtime error
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import openpyxl
|
5 |
+
from sklearn.preprocessing import MinMaxScaler
|
6 |
+
from sklearn.feature_extraction.text import CountVectorizer
|
7 |
+
from sklearn.ensemble import RandomForestClassifier
|
8 |
+
import pickle
|
9 |
+
|
10 |
+
import warnings
|
11 |
+
warnings.filterwarnings('ignore')
|
12 |
+
|
13 |
+
|
14 |
+
st.set_page_config(page_title='Product Type Predictor')
|
15 |
+
st.title('Detect Product Type')
|
16 |
+
st.subheader('Upload your CSV file')
|
17 |
+
uploaded_file = st.file_uploader('Choose a CSV file', type='csv')
|
18 |
+
if uploaded_file is not None:
|
19 |
+
st.markdown('---')
|
20 |
+
# Loading the data
|
21 |
+
@st.cache_data
|
22 |
+
def load_excel(file1):
|
23 |
+
df = pd.read_csv(file1)
|
24 |
+
return df
|
25 |
+
|
26 |
+
data = load_excel(uploaded_file)
|
27 |
+
st.subheader('Data Preview')
|
28 |
+
st.dataframe(data.head(20))
|
29 |
+
# Feature selection
|
30 |
+
features = ['a_ApplicableMarkets', 'Number of Unique Finished Packs in BOM',
|
31 |
+
'Total Number of Finished Packs in BOM', 'GMN', 'Product_Description',
|
32 |
+
'EA_GTIN', 'CV_GTIN', 'Product_Hierarchy_Code',
|
33 |
+
'Product_Hierarchy_Units_Per_Pack_L8', 'myPSR_Pack_Variant',
|
34 |
+
'Stibo_Pack_variant']
|
35 |
+
df = data[features]
|
36 |
+
df = df.replace(r'^\s*$', np.nan, regex=True)
|
37 |
+
df[['EA_GTIN', 'CV_GTIN']] = df[['EA_GTIN', 'CV_GTIN']].apply(pd.to_numeric)
|
38 |
+
df = df.replace(np.nan, 0, regex=True)
|
39 |
+
text_cols = ['a_ApplicableMarkets', 'Product_Hierarchy_Units_Per_Pack_L8', 'myPSR_Pack_Variant', 'Stibo_Pack_variant']
|
40 |
+
df = pd.get_dummies(data=df, columns=text_cols)
|
41 |
+
v = CountVectorizer()
|
42 |
+
text_vectors = v.fit_transform(df['Product_Description'])
|
43 |
+
text_vectors_df = pd.DataFrame(text_vectors.toarray(), columns=v.get_feature_names_out())
|
44 |
+
df_ext = pd.concat([df, text_vectors_df],axis=1)
|
45 |
+
df = df_ext.drop("Product_Description",axis=1)
|
46 |
+
scaler = MinMaxScaler()
|
47 |
+
df[['GMN','EA_GTIN', 'CV_GTIN']] = scaler.fit_transform(df[['GMN','EA_GTIN', 'CV_GTIN']])
|
48 |
+
loaded_model = pickle.load(open('rfc_model.pkl', 'rb'))
|
49 |
+
result = loaded_model.predict(df)
|
50 |
+
data['Product_Type']=result
|
51 |
+
|
52 |
+
out=data.to_csv().encode('utf-8')
|
53 |
+
st.download_button(label='DOWNLOAD RESULT',data=out, file_name='Product_Output.csv',mime='csv')
|