nurindahpratiwi commited on
Commit
8f104ab
1 Parent(s): fc524a4

update file

Browse files
Files changed (4) hide show
  1. app.py +145 -132
  2. app_.py +144 -0
  3. app_3.py +0 -167
  4. requirements.txt +6 -11
app.py CHANGED
@@ -1,144 +1,157 @@
1
- import pandas as pd
2
- from transformers import pipeline
3
  import streamlit as st
4
- import datetime
5
- from huggingface_hub import hf_hub_download
6
  import joblib
 
 
 
 
 
7
 
8
- REPO_ID = "AlbieCofie/predict-customer-churn"
9
 
10
- num_imputer = joblib.load(
11
- hf_hub_download(repo_id=REPO_ID, filename="numerical_imputer.joblib")
12
- )
13
 
14
- cat_imputer = joblib.load(
15
- hf_hub_download(repo_id=REPO_ID, filename="categorical_imputer.joblib")
16
- )
 
17
 
18
- encoder = joblib.load(
19
- hf_hub_download(repo_id=REPO_ID, filename="encoder.joblib")
 
 
 
20
  )
21
 
 
 
 
22
  scaler = joblib.load(
23
- hf_hub_download(repo_id=REPO_ID, filename="scaler.joblib")
24
  )
25
 
26
- model = joblib.load(
27
- hf_hub_download(repo_id=REPO_ID, filename="Final_model.joblib")
28
- )
 
 
 
 
 
 
 
 
 
29
 
30
- # Create a function that applies the ML pipeline and makes predictions
31
- def predict(gender,SeniorCitizen,Partner,Dependents, tenure, PhoneService,MultipleLines,
32
- InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,
33
- Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges):
34
-
35
-
36
-
37
- # Create a dataframe with the input data
38
- input_df = pd.DataFrame({
39
- 'gender': [gender],
40
- 'SeniorCitizen': [SeniorCitizen],
41
- 'Partner': [Partner],
42
- 'Dependents': [Dependents],
43
- 'tenure': [tenure],
44
- 'PhoneService': [PhoneService],
45
- 'MultipleLines': [MultipleLines],
46
- 'InternetService': [InternetService],
47
- 'OnlineSecurity': [OnlineSecurity],
48
- 'OnlineBackup': [OnlineBackup],
49
- 'DeviceProtection': [DeviceProtection],
50
- 'TechSupport': [TechSupport],
51
- 'StreamingTV': [StreamingTV],
52
- 'StreamingMovies': [StreamingMovies],
53
- 'Contract': [Contract],
54
- 'PaperlessBilling': [PaperlessBilling],
55
- 'PaymentMethod': [PaymentMethod],
56
- 'MonthlyCharges': [MonthlyCharges],
57
- 'TotalCharges': [TotalCharges]
58
- })
59
-
60
- # Selecting categorical and numerical columns separately
61
- cat_columns = [col for col in input_df.columns if input_df[col].dtype == 'object']
62
- num_columns = [col for col in input_df.columns if input_df[col].dtype != 'object']
63
-
64
- # Apply the imputers on the input data
65
- input_df_imputed_cat = cat_imputer.transform(input_df[cat_columns])
66
- input_df_imputed_num = num_imputer.transform(input_df[num_columns])
67
-
68
- # Encode the categorical columns
69
- input_encoded_df = pd.DataFrame(encoder.transform(input_df_imputed_cat).toarray(),
70
- columns=encoder.get_feature_names_out(cat_columns))
71
-
72
- # Scale the numerical columns
73
- input_df_scaled = scaler.transform(input_df_imputed_num)
74
- input_scaled_df = pd.DataFrame(input_df_scaled , columns = num_columns)
75
-
76
-
77
- #joining the cat encoded and num scaled
78
- final_df = pd.concat([input_encoded_df, input_scaled_df], axis=1)
79
-
80
- final_df = final_df.reindex(columns=['SeniorCitizen','tenure','MonthlyCharges','TotalCharges',
81
- 'gender_Female','gender_Male','Partner_No','Partner_Yes','Dependents_No','Dependents_Yes','PhoneService_No',
82
- 'PhoneService_Yes','MultipleLines_No','MultipleLines_Yes','InternetService_DSL','InternetService_Fiber optic',
83
- 'InternetService_No','OnlineSecurity_No','OnlineSecurity_Yes','OnlineBackup_No','OnlineBackup_Yes','DeviceProtection_No',
84
- 'DeviceProtection_Yes','TechSupport_No','TechSupport_Yes','StreamingTV_No','StreamingTV_Yes','StreamingMovies_No',
85
- 'StreamingMovies_Yes','Contract_Month-to-month','Contract_One year','Contract_Two year','PaperlessBilling_No',
86
- 'PaperlessBilling_Yes','PaymentMethod_Bank transfer (automatic)','PaymentMethod_Credit card (automatic)','PaymentMethod_Electronic check',
87
- 'PaymentMethod_Mailed check'])
88
-
89
- # Make predictions using the model
90
- predictions = model.predict(final_df)[0]
91
- #prediction = model.predict(final_df)[0]
92
-
93
- # Make predictions using the model
94
- #predictions = model.predict(final_df)
95
-
96
- # Convert the numpy array to an integer
97
- #prediction_label = int(predictions.item())
98
-
99
- prediction_label = "Beware!!! This customer is likely to Churn" if predictions.item() == "Yes" else "This customer is Not likely churn"
100
-
101
-
102
- return prediction_label
103
-
104
- #return predictions
105
-
106
-
107
- if 'clicked' not in st.session_state:
108
- st.session_state.clicked = False
109
-
110
- def click_button():
111
- st.session_state.clicked = True
112
-
113
-
114
- st.title("CUSTOMER CHURN PREDICTION APP")
115
-
116
- with st.form(key="customer-information"):
117
- st.markdown("This app predicts whether a customer will leave your company or not. Enter the details of the customer below to see the result")
118
- gender = st.radio('Select your gender', ('male', 'female'))
119
- SeniorCitizen = st.radio("Are you a Seniorcitizen; No=0 and Yes=1", ('0', '1'))
120
- Partner = st.radio('Do you have Partner', ('Yes', 'No'))
121
- Dependents = st.selectbox('Do you have any Dependents?', ('No', 'Yes'))
122
- tenure = st.number_input('Lenght of tenure (no. of months with Telco)', min_value=0, max_value=90, value=1, step=1)
123
- PhoneService = st.radio('Do you have PhoneService? ', ('No', 'Yes'))
124
- MultipleLines = st.radio('Do you have MultipleLines', ('No', 'Yes'))
125
- InternetService = st.radio('Do you have InternetService', ('DSL', 'Fiber optic', 'No'))
126
- OnlineSecurity = st.radio('Do you have OnlineSecurity?', ('No', 'Yes'))
127
- OnlineBackup = st.radio('Do you have OnlineBackup?', ('No', 'Yes'))
128
- DeviceProtection = st.radio('Do you have DeviceProtection?', ('No', 'Yes'))
129
- TechSupport = st.radio('Do you have TechSupport?', ('No', 'Yes'))
130
- StreamingTV = st.radio('Do you have StreamingTV?', ('No', 'Yes'))
131
- StreamingMovies = st.radio('Do you have StreamingMovies?', ('No', 'Yes'))
132
- Contract = st.selectbox('which Contract do you use?', ('Month-to-month', 'One year', 'Two year'))
133
- PaperlessBilling = st.radio('Do you prefer PaperlessBilling?', ('Yes', 'No'))
134
- PaymentMethod = st.selectbox('Which PaymentMethod do you prefer?', ('Electronic check', 'Mailed check', 'Bank transfer (automatic)',
135
- 'Credit card (automatic)'))
136
- MonthlyCharges = st.number_input("Enter monthly charges (the range should between 0-120)")
137
- TotalCharges = st.number_input("Enter total charges (the range should between 0-10.000)")
138
- st.form_submit_button('Predict', on_click=click_button)
139
-
140
- if st.session_state.clicked:
141
- # The message and nested widget will remain on the page
142
- predict(gender,SeniorCitizen,Partner,Dependents, tenure, PhoneService,MultipleLines,
143
- InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,
144
- Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges)
 
 
 
1
  import streamlit as st
2
+ import numpy as np
 
3
  import joblib
4
+ from sklearn.preprocessing import StandardScaler
5
+ import pandas as pd
6
+ import matplotlib.pyplot as plt
7
+ import seaborn as sns
8
+ from huggingface_hub import hf_hub_download
9
 
 
10
 
11
+ REPO_ID = "flokabukie/Credit_Card_Fraud_Detection"
 
 
12
 
13
+ # Load the non-anomaly data
14
+ non_anomaly_csv_filename = 'non_anomaly_data.csv'
15
+ non_anomaly_csv_filename = hf_hub_download(repo_id=REPO_ID, filename=non_anomaly_csv_filename)
16
+ non_anomaly_df = pd.read_csv(non_anomaly_csv_filename)
17
 
18
+ # Load the Isolation Forest model
19
+ model_filename = "IsolationForest.joblib"
20
+
21
+ isolation_forest = joblib.load(
22
+ hf_hub_download(repo_id=REPO_ID, filename=model_filename)
23
  )
24
 
25
+ # Load the StandardScaler
26
+ scaler_filename = "StandardScaler.joblib"
27
+
28
  scaler = joblib.load(
29
+ hf_hub_download(repo_id=REPO_ID, filename=scaler_filename)
30
  )
31
 
32
+ st.title("Anomaly Detection App with Isolation Forest")
33
+
34
+ st.sidebar.title("Input Feature Values")
35
+ transaction_dollar_amount = st.sidebar.slider("Transaction Dollar Amount", min_value=0.0, max_value=10000.0)
36
+ longitude = st.sidebar.slider("Longitude (Long)", min_value=-180.0, max_value=180.0)
37
+ latitude = st.sidebar.slider("Latitude (Lat)", min_value=-90.0, max_value=90.0)
38
+ credit_card_limit = st.sidebar.slider("Credit Card Limit", min_value=0, max_value=50000)
39
+ year = st.sidebar.slider("Year", min_value=2000, max_value=2030)
40
+ month = st.sidebar.slider("Month", min_value=1, max_value=12)
41
+ day = st.sidebar.slider("Day", min_value=1, max_value=31)
42
+
43
+ submitted = st.sidebar.button("Submit")
44
 
45
+ if submitted:
46
+ input_data = {
47
+ 'transaction_dollar_amount': transaction_dollar_amount,
48
+ 'Long': longitude,
49
+ 'Lat': latitude,
50
+ 'credit_card_limit': credit_card_limit,
51
+ 'year': year,
52
+ 'month': month,
53
+ 'day': day
54
+ }
55
+
56
+ selected_columns = pd.DataFrame([input_data])
57
+
58
+ # Standardize the input data using the loaded StandardScaler
59
+ selected_columns_scaled = scaler.transform(selected_columns)
60
+
61
+ # Apply Isolation Forest for anomaly detection on the non-anomaly dataset
62
+ non_anomaly_scores = isolation_forest.decision_function(scaler.transform(non_anomaly_df))
63
+
64
+ # Apply Isolation Forest for anomaly detection on your single input data
65
+ your_anomaly_score = isolation_forest.decision_function(selected_columns_scaled)[0]
66
+
67
+
68
+
69
+ # Calculate the minimum and maximum anomaly scores from non-anomaly data
70
+ min_non_anomaly_score = np.min(non_anomaly_scores)
71
+ max_non_anomaly_score = np.max(non_anomaly_scores)
72
+
73
+ # Add a margin of error for the range
74
+ margin = 0.5
75
+ min_threshold = min_non_anomaly_score - margin
76
+ max_threshold = max_non_anomaly_score + margin
77
+
78
+ # Determine if the input data point is an anomaly based on the score
79
+ #is_anomaly = your_anomaly_score >= np.percentile(non_anomaly_scores, 95)
80
+
81
+ # Determine if the input data point is an anomaly based on the score
82
+ is_anomaly = your_anomaly_score < min_threshold or your_anomaly_score > max_threshold
83
+
84
+
85
+ # Print the anomaly status
86
+ st.subheader("Anomaly Classification")
87
+ if is_anomaly:
88
+ st.write("Prediction Result: 🚨 Anomaly Detected!")
89
+ else:
90
+ st.write("Prediction Result: ✅ Not Anomaly")
91
+
92
+ # Create a bar plot to visualize the anomaly score distribution and your data point's score
93
+ plt.figure(figsize=(8, 5))
94
+
95
+ # Plot the distribution of anomaly scores from the non-anomaly dataset
96
+ sns.histplot(non_anomaly_scores, kde=True, color='gray', label='Non-Anomaly Score Distribution')
97
+
98
+ # Plot your data point's anomaly score
99
+ plt.axvline(x=your_anomaly_score, color='blue', linestyle='dashed', label='Your Data Point')
100
+
101
+ # Set labels and title
102
+ plt.xlabel('Anomaly Score')
103
+ plt.ylabel('Frequency')
104
+ plt.title('Anomaly Score Distribution and Your Data Point')
105
+ plt.legend()
106
+ #plt.grid(True)
107
+
108
+ # Display the histogram plot
109
+ st.pyplot(plt)
110
+
111
+
112
+ # Explain the results
113
+ st.write("The input data point has been classified as an anomaly." if is_anomaly
114
+ else "The input data point is not classified as an anomaly.")
115
+ st.write("The anomaly score is:", your_anomaly_score)
116
+ st.write("The threshold for anomaly detection is:", min_threshold, "to", max_threshold)
117
+
118
+ # Create a scatter plot for longitude and latitude
119
+ fig, ax = plt.subplots(figsize=(10, 8))
120
+
121
+ # Plot non-anomaly data
122
+ sns.scatterplot(data=non_anomaly_df, x='Long', y='Lat', color='lightgrey', label='Normal 🏙️', ax=ax)
123
+
124
+ # Plot input data
125
+ if is_anomaly:
126
+ ax.scatter(selected_columns['Long'], selected_columns['Lat'], color='red', label='Suspicious 🚩', s=100, marker='x')
127
+ anomaly_marker = 'Suspicious 🚩'
128
+ else:
129
+ ax.scatter(selected_columns['Long'], selected_columns['Lat'], color='green', label='Valid ✅', s=100, marker='o')
130
+ anomaly_marker = 'Valid ✅'
131
+
132
+ ax.set_xlabel("Longitude")
133
+ ax.set_ylabel("Latitude")
134
+ ax.set_title("Location Plot: Anomaly Detection 🗺️")
135
+ ax.legend()
136
+ ax.grid(True)
137
+
138
+ # Show the scatter plot in Streamlit
139
+ st.subheader("Location Plot: Anomaly Detection 🗺️")
140
+ st.pyplot(fig)
141
+
142
+ # Explanation based on the anomaly classification
143
+ st.subheader("Anomaly Classification")
144
+ if your_anomaly_score < min_threshold or your_anomaly_score > max_threshold:
145
+ st.write("Prediction Result: 🚨 Anomaly Detected!")
146
+ else:
147
+ st.write("Prediction Result: Not Anomaly")
148
+
149
+ # Explain the results
150
+ # Explain the results
151
+ st.write("The location plot visualizes the anomaly detection result based on longitude and latitude.")
152
+ if your_anomaly_score < min_threshold or your_anomaly_score > max_threshold:
153
+ st.write("The input data point is marked as Suspicious 🚩 due to its anomaly score.")
154
+ st.write("The red 'x' marker indicates a suspicious location.")
155
+ else:
156
+ st.write("The input data point is marked as Valid due to its anomaly score.")
157
+ st.write("The green 'o' marker indicates a valid location.")
 
 
app_.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from transformers import pipeline
3
+ import streamlit as st
4
+ import datetime
5
+ from huggingface_hub import hf_hub_download
6
+ import joblib
7
+
8
+ REPO_ID = "AlbieCofie/predict-customer-churn"
9
+
10
+ num_imputer = joblib.load(
11
+ hf_hub_download(repo_id=REPO_ID, filename="numerical_imputer.joblib")
12
+ )
13
+
14
+ cat_imputer = joblib.load(
15
+ hf_hub_download(repo_id=REPO_ID, filename="categorical_imputer.joblib")
16
+ )
17
+
18
+ encoder = joblib.load(
19
+ hf_hub_download(repo_id=REPO_ID, filename="encoder.joblib")
20
+ )
21
+
22
+ scaler = joblib.load(
23
+ hf_hub_download(repo_id=REPO_ID, filename="scaler.joblib")
24
+ )
25
+
26
+ model = joblib.load(
27
+ hf_hub_download(repo_id=REPO_ID, filename="Final_model.joblib")
28
+ )
29
+
30
+ # Create a function that applies the ML pipeline and makes predictions
31
+ def predict(gender,SeniorCitizen,Partner,Dependents, tenure, PhoneService,MultipleLines,
32
+ InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,
33
+ Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges):
34
+
35
+
36
+
37
+ # Create a dataframe with the input data
38
+ input_df = pd.DataFrame({
39
+ 'gender': [gender],
40
+ 'SeniorCitizen': [SeniorCitizen],
41
+ 'Partner': [Partner],
42
+ 'Dependents': [Dependents],
43
+ 'tenure': [tenure],
44
+ 'PhoneService': [PhoneService],
45
+ 'MultipleLines': [MultipleLines],
46
+ 'InternetService': [InternetService],
47
+ 'OnlineSecurity': [OnlineSecurity],
48
+ 'OnlineBackup': [OnlineBackup],
49
+ 'DeviceProtection': [DeviceProtection],
50
+ 'TechSupport': [TechSupport],
51
+ 'StreamingTV': [StreamingTV],
52
+ 'StreamingMovies': [StreamingMovies],
53
+ 'Contract': [Contract],
54
+ 'PaperlessBilling': [PaperlessBilling],
55
+ 'PaymentMethod': [PaymentMethod],
56
+ 'MonthlyCharges': [MonthlyCharges],
57
+ 'TotalCharges': [TotalCharges]
58
+ })
59
+
60
+ # Selecting categorical and numerical columns separately
61
+ cat_columns = [col for col in input_df.columns if input_df[col].dtype == 'object']
62
+ num_columns = [col for col in input_df.columns if input_df[col].dtype != 'object']
63
+
64
+ # Apply the imputers on the input data
65
+ input_df_imputed_cat = cat_imputer.transform(input_df[cat_columns])
66
+ input_df_imputed_num = num_imputer.transform(input_df[num_columns])
67
+
68
+ # Encode the categorical columns
69
+ input_encoded_df = pd.DataFrame(encoder.transform(input_df_imputed_cat).toarray(),
70
+ columns=encoder.get_feature_names_out(cat_columns))
71
+
72
+ # Scale the numerical columns
73
+ input_df_scaled = scaler.transform(input_df_imputed_num)
74
+ input_scaled_df = pd.DataFrame(input_df_scaled , columns = num_columns)
75
+
76
+
77
+ #joining the cat encoded and num scaled
78
+ final_df = pd.concat([input_encoded_df, input_scaled_df], axis=1)
79
+
80
+ final_df = final_df.reindex(columns=['SeniorCitizen','tenure','MonthlyCharges','TotalCharges',
81
+ 'gender_Female','gender_Male','Partner_No','Partner_Yes','Dependents_No','Dependents_Yes','PhoneService_No',
82
+ 'PhoneService_Yes','MultipleLines_No','MultipleLines_Yes','InternetService_DSL','InternetService_Fiber optic',
83
+ 'InternetService_No','OnlineSecurity_No','OnlineSecurity_Yes','OnlineBackup_No','OnlineBackup_Yes','DeviceProtection_No',
84
+ 'DeviceProtection_Yes','TechSupport_No','TechSupport_Yes','StreamingTV_No','StreamingTV_Yes','StreamingMovies_No',
85
+ 'StreamingMovies_Yes','Contract_Month-to-month','Contract_One year','Contract_Two year','PaperlessBilling_No',
86
+ 'PaperlessBilling_Yes','PaymentMethod_Bank transfer (automatic)','PaymentMethod_Credit card (automatic)','PaymentMethod_Electronic check',
87
+ 'PaymentMethod_Mailed check'])
88
+
89
+ # Make predictions using the model
90
+ predictions = model.predict(final_df)[0]
91
+ #prediction = model.predict(final_df)[0]
92
+
93
+ # Make predictions using the model
94
+ #predictions = model.predict(final_df)
95
+
96
+ # Convert the numpy array to an integer
97
+ #prediction_label = int(predictions.item())
98
+
99
+ prediction_label = "Beware!!! This customer is likely to Churn" if predictions.item() == "Yes" else "This customer is Not likely churn"
100
+
101
+
102
+ return prediction_label
103
+
104
+ #return predictions
105
+
106
+
107
+ if 'clicked' not in st.session_state:
108
+ st.session_state.clicked = False
109
+
110
+ def click_button():
111
+ st.session_state.clicked = True
112
+
113
+
114
+ st.title("CUSTOMER CHURN PREDICTION APP")
115
+
116
+ with st.form(key="customer-information"):
117
+ st.markdown("This app predicts whether a customer will leave your company or not. Enter the details of the customer below to see the result")
118
+ gender = st.radio('Select your gender', ('male', 'female'))
119
+ SeniorCitizen = st.radio("Are you a Seniorcitizen; No=0 and Yes=1", ('0', '1'))
120
+ Partner = st.radio('Do you have Partner', ('Yes', 'No'))
121
+ Dependents = st.selectbox('Do you have any Dependents?', ('No', 'Yes'))
122
+ tenure = st.number_input('Lenght of tenure (no. of months with Telco)', min_value=0, max_value=90, value=1, step=1)
123
+ PhoneService = st.radio('Do you have PhoneService? ', ('No', 'Yes'))
124
+ MultipleLines = st.radio('Do you have MultipleLines', ('No', 'Yes'))
125
+ InternetService = st.radio('Do you have InternetService', ('DSL', 'Fiber optic', 'No'))
126
+ OnlineSecurity = st.radio('Do you have OnlineSecurity?', ('No', 'Yes'))
127
+ OnlineBackup = st.radio('Do you have OnlineBackup?', ('No', 'Yes'))
128
+ DeviceProtection = st.radio('Do you have DeviceProtection?', ('No', 'Yes'))
129
+ TechSupport = st.radio('Do you have TechSupport?', ('No', 'Yes'))
130
+ StreamingTV = st.radio('Do you have StreamingTV?', ('No', 'Yes'))
131
+ StreamingMovies = st.radio('Do you have StreamingMovies?', ('No', 'Yes'))
132
+ Contract = st.selectbox('which Contract do you use?', ('Month-to-month', 'One year', 'Two year'))
133
+ PaperlessBilling = st.radio('Do you prefer PaperlessBilling?', ('Yes', 'No'))
134
+ PaymentMethod = st.selectbox('Which PaymentMethod do you prefer?', ('Electronic check', 'Mailed check', 'Bank transfer (automatic)',
135
+ 'Credit card (automatic)'))
136
+ MonthlyCharges = st.number_input("Enter monthly charges (the range should between 0-120)")
137
+ TotalCharges = st.number_input("Enter total charges (the range should between 0-10.000)")
138
+ st.form_submit_button('Predict', on_click=click_button)
139
+
140
+ if st.session_state.clicked:
141
+ # The message and nested widget will remain on the page
142
+ predict(gender,SeniorCitizen,Partner,Dependents, tenure, PhoneService,MultipleLines,
143
+ InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,
144
+ Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges)
app_3.py DELETED
@@ -1,167 +0,0 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import joblib
4
- import matplotlib.pyplot as plt
5
- from huggingface_hub import hf_hub_download
6
- import time
7
- import base64
8
- from transformers import AutoTokenizer
9
-
10
- # Load the pre-trained numerical imputer, scaler, and model using joblib
11
-
12
- REPO_ID = "AlbieCofie/predict-customer-churn"
13
-
14
- num_imputer = joblib.load(
15
- hf_hub_download(repo_id=REPO_ID, filename="numerical_imputer.joblib")
16
- )
17
-
18
- scaler = joblib.load(
19
- hf_hub_download(repo_id=REPO_ID, filename="scaler.joblib")
20
- )
21
-
22
- model = joblib.load(
23
- hf_hub_download(repo_id=REPO_ID, filename="Final_model.joblib")
24
- )
25
-
26
- # Define a function to preprocess the input data
27
- def preprocess_input_data(input_data):
28
- input_data_df = pd.DataFrame(input_data, columns=['PRG', 'PL', 'PR', 'SK', 'TS', 'M11', 'BD2', 'Age', 'Insurance'])
29
- num_columns = input_data_df.select_dtypes(include='number').columns
30
-
31
- input_data_imputed_num = num_imputer.transform(input_data_df[num_columns])
32
- input_scaled_df = pd.DataFrame(scaler.transform(input_data_imputed_num), columns=num_columns)
33
-
34
- return input_scaled_df
35
-
36
-
37
- # Define a function to make the sepsis prediction
38
- def predict_sepsis(input_data):
39
- input_scaled_df = preprocess_input_data(input_data)
40
- prediction = model.predict(input_scaled_df)[0]
41
- probabilities = model.predict_proba(input_scaled_df)[0]
42
- sepsis_status = "Positive" if prediction == 1 else "Negative"
43
-
44
- status_icon = "✔" if prediction == 1 else "✘" # Red 'X' icon for positive sepsis prediction, green checkmark icon for negative sepsis prediction
45
- sepsis_explanation = "Sepsis is a life-threatening condition caused by an infection. A positive prediction suggests that the patient might be exhibiting sepsis symptoms and requires immediate medical attention." if prediction == 1 else "Sepsis is a life-threatening condition caused by an infection. A negative prediction suggests that the patient is not currently exhibiting sepsis symptoms."
46
-
47
- output_df = pd.DataFrame(input_data, columns=['PRG', 'PL', 'PR', 'SK', 'TS', 'M11', 'BD2', 'Age', 'Insurance'])
48
- output_df['Prediction'] = sepsis_status
49
- output_df['Negative Probability'] = probabilities[0]
50
- output_df['Positive Probability'] = probabilities[1]
51
-
52
- return output_df, probabilities, status_icon, sepsis_explanation
53
-
54
- # Create a Streamlit app
55
- def main():
56
- st.title('Sepsis Prediction App')
57
-
58
- st.image("Strealit_.jpg")
59
-
60
- # How to use
61
- st.sidebar.title('How to Use')
62
- st.sidebar.markdown('1. Adjust the input parameters on the left sidebar.')
63
- st.sidebar.markdown('2. Click the "Predict" button to initiate the prediction.')
64
- st.sidebar.markdown('3. The app will simulate a prediction process with a progress bar.')
65
- st.sidebar.markdown('4. Once the prediction is complete, the results will be displayed below.')
66
-
67
-
68
- st.sidebar.title('Input Parameters')
69
-
70
- # Input parameter explanations
71
- st.sidebar.markdown('**PRG:** Plasma Glucose')
72
- PRG = st.sidebar.number_input('PRG', value=0.0)
73
-
74
- st.sidebar.markdown('**PL:** Blood Work Result 1')
75
- PL = st.sidebar.number_input('PL', value=0.0)
76
-
77
- st.sidebar.markdown('**PR:** Blood Pressure Measured')
78
- PR = st.sidebar.number_input('PR', value=0.0)
79
-
80
- st.sidebar.markdown('**SK:** Blood Work Result 2')
81
- SK = st.sidebar.number_input('SK', value=0.0)
82
-
83
- st.sidebar.markdown('**TS:** Blood Work Result 3')
84
- TS = st.sidebar.number_input('TS', value=0.0)
85
-
86
- st.sidebar.markdown('**M11:** BMI')
87
- M11 = st.sidebar.number_input('M11', value=0.0)
88
-
89
- st.sidebar.markdown('**BD2:** Blood Work Result 4')
90
- BD2 = st.sidebar.number_input('BD2', value=0.0)
91
-
92
- st.sidebar.markdown('**Age:** What is the Age of the Patient: ')
93
- Age = st.sidebar.number_input('Age', value=0.0)
94
-
95
- st.sidebar.markdown('**Insurance:** Does the patient have Insurance?')
96
- insurance_options = {0: 'NO', 1: 'YES'}
97
- Insurance = st.sidebar.radio('Insurance', list(insurance_options.keys()), format_func=lambda x: insurance_options[x])
98
-
99
-
100
- input_data = [[PRG, PL, PR, SK, TS, M11, BD2, Age, Insurance]]
101
-
102
- if st.sidebar.button('Predict'):
103
- with st.spinner("Predicting..."):
104
- # Simulate a long-running process
105
- progress_bar = st.progress(0)
106
- step = 20 # A big step will reduce the execution time
107
- for i in range(0, 100, step):
108
- time.sleep(0.1)
109
- progress_bar.progress(i + step)
110
-
111
- output_df, probabilities, status_icon, sepsis_explanation = predict_sepsis(input_data)
112
-
113
- st.subheader('Prediction Result')
114
- prediction_text = "Positive" if status_icon == "✔" else "Negative"
115
- st.markdown(f"Prediction: **{prediction_text}**")
116
- st.markdown(f"{status_icon} {sepsis_explanation}")
117
- st.write(output_df)
118
-
119
- # Add a download button for output_df
120
- csv = output_df.to_csv(index=False)
121
- b64 = base64.b64encode(csv.encode()).decode()
122
- href = f'<a href="data:file/csv;base64,{b64}" download="output.csv">Download Output CSV</a>'
123
- st.markdown(href, unsafe_allow_html=True)
124
-
125
-
126
- # Plot the probabilities
127
- fig, ax = plt.subplots()
128
- ax.bar(['Negative', 'Positive'], probabilities)
129
- ax.set_xlabel('Sepsis Status')
130
- ax.set_ylabel('Probability')
131
- ax.set_title('Sepsis Prediction Probabilities')
132
- st.pyplot(fig)
133
-
134
- # Print feature importance
135
- if hasattr(model, 'coef_'):
136
- feature_importances = model.coef_[0]
137
- feature_names = ['PRG', 'PL', 'PR', 'SK', 'TS', 'M11', 'BD2', 'Age', 'Insurance']
138
-
139
- importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': feature_importances})
140
- importance_df = importance_df.sort_values('Importance', ascending=False)
141
-
142
- st.subheader('Feature Importance')
143
- fig, ax = plt.subplots()
144
- bars = ax.bar(importance_df['Feature'], importance_df['Importance'])
145
- ax.set_xlabel('Feature')
146
- ax.set_ylabel('Importance')
147
- ax.set_title('Feature Importance')
148
- ax.tick_params(axis='x', rotation=45)
149
-
150
- # Add data labels to the bars
151
- for bar in bars:
152
- height = bar.get_height()
153
- ax.annotate(f'{height:.2f}', xy=(bar.get_x() + bar.get_width() / 2, height),
154
- xytext=(0, 3), # 3 points vertical offset
155
- textcoords="offset points",
156
- ha='center', va='bottom')
157
- st.pyplot(fig)
158
-
159
- else:
160
- st.write('Feature importance is not available for this model.')
161
-
162
- #st.subheader('Sepsis Explanation')
163
- #st.markdown(f"{status_icon} {sepsis_explanation}")
164
-
165
-
166
- if __name__ == '__main__':
167
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,15 +1,10 @@
1
- joblib==1.2.0
 
 
 
 
2
  matplotlib==3.7.1
3
  matplotlib-inline==0.1.6
4
- numpy==1.24.2
5
- pandas==1.5.3
6
- scikit-learn==1.2.2
7
- scipy==1.10.0
8
  seaborn==0.12.2
9
- streamlit==1.20.0
10
- fastapi==0.95.1
11
- uvicorn==0.22.0
12
- pydantic==1.10.7
13
- transformers==4.24.0
14
- tokenizers== 0.13.2
15
  huggingface_hub==0.11
 
1
+ streamlit==1.25.0
2
+ numpy==1.23.5
3
+ scikit-learn==1.2.2
4
+ sklearn-pandas==2.2.0
5
+ joblib==1.3.2
6
  matplotlib==3.7.1
7
  matplotlib-inline==0.1.6
8
+ matplotlib-venn==0.11.9
 
 
 
9
  seaborn==0.12.2
 
 
 
 
 
 
10
  huggingface_hub==0.11