import streamlit as st import pandas as pd import numpy as np import openpyxl from sklearn.preprocessing import MinMaxScaler from sklearn.feature_extraction.text import CountVectorizer from sklearn.ensemble import RandomForestClassifier import pickle import warnings warnings.filterwarnings('ignore') st.set_page_config(page_title='Product Type Predictor') st.title('Detect Product Type') st.subheader('Upload your CSV file') uploaded_file = st.file_uploader('Choose a CSV file', type='csv') if uploaded_file is not None: st.markdown('---') # Loading the data @st.cache_data def load_excel(file1): df = pd.read_csv(file1) return df data = load_excel(uploaded_file) st.subheader('Data Preview') st.dataframe(data.head(20)) # Feature selection features = ['a_ApplicableMarkets', 'Number of Unique Finished Packs in BOM', 'Total Number of Finished Packs in BOM', 'GMN', 'Product_Description', 'EA_GTIN', 'CV_GTIN', 'Product_Hierarchy_Code', 'Product_Hierarchy_Units_Per_Pack_L8', 'myPSR_Pack_Variant', 'Stibo_Pack_variant'] df = data[features] df = df.replace(r'^\s*$', np.nan, regex=True) df[['EA_GTIN', 'CV_GTIN']] = df[['EA_GTIN', 'CV_GTIN']].apply(pd.to_numeric) df = df.replace(np.nan, 0, regex=True) text_cols = ['a_ApplicableMarkets', 'Product_Hierarchy_Units_Per_Pack_L8', 'myPSR_Pack_Variant', 'Stibo_Pack_variant'] df = pd.get_dummies(data=df, columns=text_cols) v = CountVectorizer() text_vectors = v.fit_transform(df['Product_Description']) text_vectors_df = pd.DataFrame(text_vectors.toarray(), columns=v.get_feature_names_out()) df_ext = pd.concat([df, text_vectors_df],axis=1) df = df_ext.drop("Product_Description",axis=1) scaler = MinMaxScaler() df[['GMN','EA_GTIN', 'CV_GTIN']] = scaler.fit_transform(df[['GMN','EA_GTIN', 'CV_GTIN']]) loaded_model = pickle.load(open('rfc_model.pkl', 'rb')) result = loaded_model.predict(df) data['Product_Type']=result out=data.to_csv().encode('utf-8') st.download_button(label='DOWNLOAD RESULT',data=out, file_name='Product_Output.csv',mime='csv')