nurindahpratiwi commited on
Commit
7962c69
1 Parent(s): 8f104ab
Files changed (3) hide show
  1. app.py +60 -148
  2. app_2.py +15 -0
  3. requirements.txt +5 -9
app.py CHANGED
@@ -1,157 +1,69 @@
1
- import streamlit as st
2
- import numpy as np
3
  import joblib
4
- from sklearn.preprocessing import StandardScaler
5
  import pandas as pd
6
- import matplotlib.pyplot as plt
7
- import seaborn as sns
8
  from huggingface_hub import hf_hub_download
9
 
 
10
 
11
- REPO_ID = "flokabukie/Credit_Card_Fraud_Detection"
12
-
13
- # Load the non-anomaly data
14
- non_anomaly_csv_filename = 'non_anomaly_data.csv'
15
- non_anomaly_csv_filename = hf_hub_download(repo_id=REPO_ID, filename=non_anomaly_csv_filename)
16
- non_anomaly_df = pd.read_csv(non_anomaly_csv_filename)
17
-
18
- # Load the Isolation Forest model
19
- model_filename = "IsolationForest.joblib"
20
-
21
- isolation_forest = joblib.load(
22
- hf_hub_download(repo_id=REPO_ID, filename=model_filename)
23
  )
24
 
25
- # Load the StandardScaler
26
- scaler_filename = "StandardScaler.joblib"
27
-
28
- scaler = joblib.load(
29
- hf_hub_download(repo_id=REPO_ID, filename=scaler_filename)
30
  )
31
 
32
- st.title("Anomaly Detection App with Isolation Forest")
33
-
34
- st.sidebar.title("Input Feature Values")
35
- transaction_dollar_amount = st.sidebar.slider("Transaction Dollar Amount", min_value=0.0, max_value=10000.0)
36
- longitude = st.sidebar.slider("Longitude (Long)", min_value=-180.0, max_value=180.0)
37
- latitude = st.sidebar.slider("Latitude (Lat)", min_value=-90.0, max_value=90.0)
38
- credit_card_limit = st.sidebar.slider("Credit Card Limit", min_value=0, max_value=50000)
39
- year = st.sidebar.slider("Year", min_value=2000, max_value=2030)
40
- month = st.sidebar.slider("Month", min_value=1, max_value=12)
41
- day = st.sidebar.slider("Day", min_value=1, max_value=31)
42
-
43
- submitted = st.sidebar.button("Submit")
44
-
45
- if submitted:
46
- input_data = {
47
- 'transaction_dollar_amount': transaction_dollar_amount,
48
- 'Long': longitude,
49
- 'Lat': latitude,
50
- 'credit_card_limit': credit_card_limit,
51
- 'year': year,
52
- 'month': month,
53
- 'day': day
54
- }
55
-
56
- selected_columns = pd.DataFrame([input_data])
57
-
58
- # Standardize the input data using the loaded StandardScaler
59
- selected_columns_scaled = scaler.transform(selected_columns)
60
-
61
- # Apply Isolation Forest for anomaly detection on the non-anomaly dataset
62
- non_anomaly_scores = isolation_forest.decision_function(scaler.transform(non_anomaly_df))
63
-
64
- # Apply Isolation Forest for anomaly detection on your single input data
65
- your_anomaly_score = isolation_forest.decision_function(selected_columns_scaled)[0]
66
-
67
-
68
-
69
- # Calculate the minimum and maximum anomaly scores from non-anomaly data
70
- min_non_anomaly_score = np.min(non_anomaly_scores)
71
- max_non_anomaly_score = np.max(non_anomaly_scores)
72
-
73
- # Add a margin of error for the range
74
- margin = 0.5
75
- min_threshold = min_non_anomaly_score - margin
76
- max_threshold = max_non_anomaly_score + margin
77
-
78
- # Determine if the input data point is an anomaly based on the score
79
- #is_anomaly = your_anomaly_score >= np.percentile(non_anomaly_scores, 95)
80
-
81
- # Determine if the input data point is an anomaly based on the score
82
- is_anomaly = your_anomaly_score < min_threshold or your_anomaly_score > max_threshold
83
-
84
-
85
- # Print the anomaly status
86
- st.subheader("Anomaly Classification")
87
- if is_anomaly:
88
- st.write("Prediction Result: 🚨 Anomaly Detected!")
89
- else:
90
- st.write("Prediction Result: ✅ Not Anomaly")
91
-
92
- # Create a bar plot to visualize the anomaly score distribution and your data point's score
93
- plt.figure(figsize=(8, 5))
94
-
95
- # Plot the distribution of anomaly scores from the non-anomaly dataset
96
- sns.histplot(non_anomaly_scores, kde=True, color='gray', label='Non-Anomaly Score Distribution')
97
-
98
- # Plot your data point's anomaly score
99
- plt.axvline(x=your_anomaly_score, color='blue', linestyle='dashed', label='Your Data Point')
100
-
101
- # Set labels and title
102
- plt.xlabel('Anomaly Score')
103
- plt.ylabel('Frequency')
104
- plt.title('Anomaly Score Distribution and Your Data Point')
105
- plt.legend()
106
- #plt.grid(True)
107
-
108
- # Display the histogram plot
109
- st.pyplot(plt)
110
-
111
-
112
- # Explain the results
113
- st.write("The input data point has been classified as an anomaly." if is_anomaly
114
- else "The input data point is not classified as an anomaly.")
115
- st.write("The anomaly score is:", your_anomaly_score)
116
- st.write("The threshold for anomaly detection is:", min_threshold, "to", max_threshold)
117
-
118
- # Create a scatter plot for longitude and latitude
119
- fig, ax = plt.subplots(figsize=(10, 8))
120
-
121
- # Plot non-anomaly data
122
- sns.scatterplot(data=non_anomaly_df, x='Long', y='Lat', color='lightgrey', label='Normal 🏙️', ax=ax)
123
-
124
- # Plot input data
125
- if is_anomaly:
126
- ax.scatter(selected_columns['Long'], selected_columns['Lat'], color='red', label='Suspicious 🚩', s=100, marker='x')
127
- anomaly_marker = 'Suspicious 🚩'
128
- else:
129
- ax.scatter(selected_columns['Long'], selected_columns['Lat'], color='green', label='Valid ✅', s=100, marker='o')
130
- anomaly_marker = 'Valid ✅'
131
-
132
- ax.set_xlabel("Longitude")
133
- ax.set_ylabel("Latitude")
134
- ax.set_title("Location Plot: Anomaly Detection 🗺️")
135
- ax.legend()
136
- ax.grid(True)
137
-
138
- # Show the scatter plot in Streamlit
139
- st.subheader("Location Plot: Anomaly Detection 🗺️")
140
- st.pyplot(fig)
141
-
142
- # Explanation based on the anomaly classification
143
- st.subheader("Anomaly Classification")
144
- if your_anomaly_score < min_threshold or your_anomaly_score > max_threshold:
145
- st.write("Prediction Result: 🚨 Anomaly Detected!")
146
- else:
147
- st.write("Prediction Result: ✅ Not Anomaly")
148
-
149
- # Explain the results
150
- # Explain the results
151
- st.write("The location plot visualizes the anomaly detection result based on longitude and latitude.")
152
- if your_anomaly_score < min_threshold or your_anomaly_score > max_threshold:
153
- st.write("The input data point is marked as Suspicious 🚩 due to its anomaly score.")
154
- st.write("The red 'x' marker indicates a suspicious location.")
155
- else:
156
- st.write("The input data point is marked as Valid ✅ due to its anomaly score.")
157
- st.write("The green 'o' marker indicates a valid location.")
 
 
 
1
  import joblib
 
2
  import pandas as pd
3
+ import streamlit as st
 
4
  from huggingface_hub import hf_hub_download
5
 
6
+ REPO_ID = "chanyaphas/creditc"
7
 
8
+ model = joblib.load(
9
+ hf_hub_download(repo_id=REPO_ID, filename="model.joblib")
 
 
 
 
 
 
 
 
 
 
10
  )
11
 
12
+ unique_values = joblib.load(
13
+ hf_hub_download(repo_id=REPO_ID, filename="unique_values.joblib")
 
 
 
14
  )
15
 
16
+ EDU_DICT = {'Lower secondary': 1,
17
+ 'Secondary / secondary special': 2,
18
+ 'Academic degree': 3,
19
+ 'Incomplete higher': 4,
20
+ 'Higher education' : 5
21
+ }
22
+
23
+ def main():
24
+ st.title("Credit Card Approval Prediction")
25
+
26
+ with st.form("questionaire"):
27
+
28
+ Gender = st.selectbox('Gender', unique_values['CODE_GENDER'])
29
+ Own_car = st.selectbox('Own_car', unique_values['FLAG_OWN_CAR'])
30
+ Property = st.selectbox('Property', unique_values['FLAG_OWN_REALTY'])
31
+ Income_type = st.selectbox('Income_type', unique_values['NAME_INCOME_TYPE'])
32
+ Marital_status = st.selectbox('Marital_status', unique_values['NAME_FAMILY_STATUS'])
33
+ Housing_type = st.selectbox('Housing_type', unique_values['NAME_HOUSING_TYPE'])
34
+ Education = st.selectbox('Education', unique_values['NAME_EDUCATION_TYPE'])
35
+
36
+ Income = st.slider('Income', min_value=27000, max_value=1575000)
37
+ Children = st.slider('Children', min_value=0, max_value=19)
38
+ Day_Employed = st.slider('Day_Employed', min_value=0, max_value=3)
39
+ Flag_Mobile = st.slider('Flag_Mobile', min_value=0, max_value=1)
40
+ Flag_work_phone = st.slider('Flag_work_phone', min_value=0, max_value=1)
41
+ Flag_Phone = st.slider('Flag_Phone', min_value=0, max_value=1)
42
+ Flag_Email = st.slider('Flag_Email', min_value=0, max_value=1)
43
+ Family_mem = st.slider('Family_mem', min_value=1, max_value=20)
44
+
45
+ clicked = st.form_submit_button("Result")
46
+ if clicked:
47
+ result = model.predict(pd.DataFrame({
48
+ "CODE_GENDER": [Gender],
49
+ "FLAG_OWN_CAR": [Own_car],
50
+ "FLAG_OWN_REALTY": [Property],
51
+ "CNT_CHILDREN": [Children],
52
+ "AMT_INCOME_TOTAL": [Income],
53
+ "NAME_INCOME_TYPE": [Income_type],
54
+ "NAME_EDUCATION_TYPE": [EDU_DICT[Education]],
55
+ "NAME_FAMILY_STATUS": [Marital_status],
56
+ "NAME_HOUSING_TYPE": [Housing_type],
57
+ "DAYS_EMPLOYED": [Day_Employed],
58
+ "FLAG_MOBIL": [Flag_Mobile],
59
+ "FLAG_WORK_PHONE": [Flag_work_phone],
60
+ "FLAG_PHONE": [Flag_Phone],
61
+ "FLAG_EMAIL": [Flag_Email],
62
+ "CNT_FAM_MEMBERS": [Family_mem]}))
63
+
64
+ result = 'Pass' if result[0] == 1 else 'Did not Pass'
65
+
66
+ st.success('Credit Card approval prediction results is {}'.format(result))
67
+
68
+ if __name__ == '__main__':
69
+ main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app_2.py CHANGED
@@ -19,6 +19,21 @@ model = joblib.load(
19
  hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
20
  )
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  # Add a title and subtitle
24
  st.write("<center><h1>Sales Prediction App</h1></center>", unsafe_allow_html=True)
 
19
  hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
20
  )
21
 
22
+ cat_imputer = joblib.load(
23
+ hf_hub_download(repo_id=REPO_ID, filename="categorical_imputer.joblib")
24
+ )
25
+
26
+ encoder = joblib.load(
27
+ hf_hub_download(repo_id=REPO_ID, filename="encoder.joblib")
28
+ )
29
+
30
+ scaler = joblib.load(
31
+ hf_hub_download(repo_id=REPO_ID, filename="scaler.joblib")
32
+ )
33
+
34
+ dt_model = joblib.load(
35
+ hf_hub_download(repo_id=REPO_ID, filename="Final_model.joblib")
36
+ )
37
 
38
  # Add a title and subtitle
39
  st.write("<center><h1>Sales Prediction App</h1></center>", unsafe_allow_html=True)
requirements.txt CHANGED
@@ -1,10 +1,6 @@
1
- streamlit==1.25.0
2
- numpy==1.23.5
3
  scikit-learn==1.2.2
4
- sklearn-pandas==2.2.0
5
- joblib==1.3.2
6
- matplotlib==3.7.1
7
- matplotlib-inline==0.1.6
8
- matplotlib-venn==0.11.9
9
- seaborn==0.12.2
10
- huggingface_hub==0.11
 
1
+ joblib
2
+ pandas
3
  scikit-learn==1.2.2
4
+ xgboost==1.7.6
5
+ altair<5
6
+ huggingface_hub