Spaces:
Runtime error
Runtime error
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
from sklearn.feature_extraction.text import CountVectorizer | |
from sklearn.ensemble import RandomForestClassifier | |
import joblib | |
import warnings | |
warnings.filterwarnings('ignore') | |
st.set_page_config(page_title='Product Type Predictor') | |
st.title('Detect Product Type') | |
st.subheader('Upload your CSV file') | |
uploaded_file = st.file_uploader('Choose a CSV file', type='csv') | |
if uploaded_file is not None: | |
st.markdown('---') | |
# Loading the data | |
def load_excel(file1): | |
df = pd.read_csv(file1) | |
return df | |
data = load_excel(uploaded_file) | |
st.subheader('Data Preview') | |
st.dataframe(data.head(20)) | |
# Feature selection | |
features = ['a_ApplicableMarkets', 'Manufacturing Plant','Number of Unique Finished Packs in BOM', | |
'Total Number of Finished Packs in BOM', 'GMN', 'Product_Description', | |
'EA_GTIN', 'CV_GTIN', 'Product_Hierarchy_Code', | |
'Product_Hierarchy_Units_Per_Pack_L8', 'myPSR_Pack_Variant', | |
'Stibo_Pack_variant'] | |
df = data[features] | |
df['Manufacturing Plant'] = df['Manufacturing Plant'].replace({'Commerical Plant':'Commercial Plant'}) | |
df['Stibo_Pack_variant'] = df['Stibo_Pack_variant'].replace({'Migration Open Stock':'Migration OpenStock'}) | |
df = df.replace(np.nan, 0, regex=True) | |
df['EA_GTIN'] = df['EA_GTIN'].astype(str) | |
df['CV_GTIN'] = df['CV_GTIN'].astype(str) | |
def GTIN_validity(x): | |
gtin=str(x) | |
if x=="0.0": | |
return False | |
if x: | |
gtin=gtin[:-2] | |
original_digits = [int(x) for x in gtin] | |
digits_without_check_digit = original_digits[:-1] | |
digits_without_check_digit.reverse() | |
multiplied_digits = [x*3 if not i%2 else x | |
for i,x | |
in enumerate(digits_without_check_digit)] | |
digits_sum = sum(multiplied_digits) | |
if (digits_sum % 10): | |
uprounded_sum = digits_sum + (10 - digits_sum % 10) | |
else: | |
uprounded_sum = digits_sum | |
expected_check_digit = uprounded_sum - digits_sum | |
return (original_digits[-1] == expected_check_digit) | |
df['EA_GTIN_valid']=df.apply(lambda x: GTIN_validity(x['EA_GTIN']),axis=1) | |
df['CV_GTIN_valid']=df.apply(lambda x: GTIN_validity(x['CV_GTIN']),axis=1) | |
text_cols = ['a_ApplicableMarkets', 'Manufacturing Plant', 'Product_Hierarchy_Units_Per_Pack_L8', 'myPSR_Pack_Variant', 'Stibo_Pack_variant'] | |
df = pd.get_dummies(data=df, columns=text_cols) | |
v = CountVectorizer() | |
text_vectors = v.fit_transform(df['Product_Description']) | |
text_vectors_df = pd.DataFrame(text_vectors.toarray(), columns=v.get_feature_names_out()) | |
df_ext = pd.concat([df, text_vectors_df],axis=1) | |
df = df_ext.drop(['GMN','Product_Description','EA_GTIN','CV_GTIN'],axis=1) | |
loaded_model = joblib.load(open('rfc_model_grid.sav', 'rb')) | |
result = loaded_model.predict(df) | |
data['Product_Type_Predicted']=result | |
out=data.to_csv().encode('utf-8') | |
st.download_button(label='DOWNLOAD RESULT',data=out, file_name='Product_Type_Output.csv',mime='csv') |