Gieorgie commited on
Commit
0c7270f
1 Parent(s): dad36ba

Upload 7 files

Browse files
Files changed (8) hide show
  1. .gitattributes +1 -0
  2. Main_Data.csv +0 -0
  3. app.py +11 -0
  4. eda.py +218 -0
  5. employee.jpg +3 -0
  6. model.pkl +3 -0
  7. prediction.py +71 -0
  8. requirements.txt +8 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ employee.jpg filter=lfs diff=lfs merge=lfs -text
Main_Data.csv ADDED
The diff for this file is too large to render. See raw diff
 
app.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import eda
2
+ import prediction
3
+ import streamlit as st
4
+
5
+
6
+ page = st.sidebar.selectbox('Pilih Halaman: ', ('EDA', 'Prediction'))
7
+
8
+ if page == 'EDA':
9
+ eda.run()
10
+ else:
11
+ prediction.run()
eda.py ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import seaborn as sns
4
+ import matplotlib.pyplot as plt
5
+ import plotly.express as px
6
+ from PIL import Image
7
+
8
+
9
+ #melebarkan
10
+ st.set_page_config(
11
+ page_title='Employee Attrition Prediction',
12
+ layout='wide',
13
+ initial_sidebar_state='expanded'
14
+
15
+ )
16
+
17
+ st.markdown("""<style>.reportview-container {background: "5160549.jpg"}.sidebar .sidebar-content {background: "5160549.jpg"}</style>""",unsafe_allow_html=True)
18
+
19
+
20
+
21
+ def run():
22
+
23
+ # membuat judul
24
+ st.title('Employee Attrition Prediction')
25
+
26
+ #membuat sub header
27
+ st.subheader('Employee Attrition Prediction EDA')
28
+
29
+
30
+
31
+ # library pillow buat gambar
32
+ image = Image.open('employee.jpg')
33
+ st.markdown('---')
34
+ st.image(image, caption=' "" ')
35
+
36
+ # descripsi
37
+ st.write('### Halaman ini berisi Eksplorasi Data ')
38
+
39
+ # Membuat Garis lurus
40
+ st.markdown('---')
41
+
42
+
43
+ # Nampilin dataframe
44
+ st.write('### Employee Attrition Data')
45
+
46
+ df = pd.read_csv('Main_data.csv')
47
+ st.dataframe(df.head(5))
48
+
49
+ st.markdown('***')
50
+ #barplot
51
+ fig = plt.figure(figsize=(8,5))
52
+
53
+
54
+ ###########################################
55
+
56
+ st.write('### Attrition Distribution')
57
+
58
+ # Menghitung jumlah setiap value
59
+ target_counts = df['Attrition'].value_counts()
60
+
61
+ # Membuat label untuk legenda dengan jumlah setiap value
62
+ labels = [f'Attrition {i} - {count}' for i, count in target_counts.items()]
63
+
64
+ # Membuat pie chart
65
+ fig = plt.figure(figsize=(10, 5))
66
+ plt.subplot(1, 2, 2)
67
+ target_counts.plot(kind='pie', autopct='%1.1f%%', shadow=True, labels=None, colors =['#0072C6', '#BFBFBF'])
68
+ plt.title('Employee Attrition')
69
+
70
+ # Menambahkan legenda
71
+ plt.legend(labels, loc='upper right', bbox_to_anchor=(1.3, 1))
72
+
73
+
74
+ st.pyplot(fig)
75
+
76
+ st.markdown('---')
77
+
78
+
79
+ ###########################################
80
+ st.write('### Data Demografi Karyawan')
81
+ pilihan = st.selectbox('Pilih Kolom : ', ('Gender','Education','MaritalStatus','Department'))
82
+
83
+ # Melakukan pengelompokan langsung pada indeks DataFrame
84
+ attrition_data = df.groupby([df[pilihan], 'Attrition']).size().unstack(fill_value=0)
85
+
86
+ fig = plt.figure(figsize=(15, 5))
87
+ colors =['#0072C6', '#BFBFBF']
88
+ # Plot: Distribusi Attrition berdasarkan kolom yang dipilih
89
+ ax = plt.gca()
90
+
91
+ # Menyesuaikan jenis plot berdasarkan jumlah indeks attrition_data
92
+ if len(attrition_data.index) > 3:
93
+ attrition_data.plot(kind='barh', stacked=True, color=colors, ax=ax)
94
+ ax.set_xlabel('Jumlah Karyawan')
95
+ ax.set_ylabel(pilihan) # Menggunakan nama kolom yang dipilih langsung
96
+ else:
97
+ attrition_data.plot(kind='bar', stacked=True, color=colors, ax=ax)
98
+ ax.set_ylabel('Jumlah Karyawan')
99
+ ax.set_xlabel(pilihan) # Menggunakan nama kolom yang dipilih langsung
100
+ ax.set_xticklabels(attrition_data.index, rotation=0)
101
+
102
+ ax.set_title(f'Distribusi Attrition Berdasarkan {pilihan}') # Menggunakan nama kolom yang dipilih langsung
103
+ ax.legend(title='Attrition', labels=['Tidak', 'Ya'])
104
+
105
+ # Menambahkan anotasi pada plot
106
+ for container in ax.containers:
107
+ if len(attrition_data.index) > 3:
108
+ labels = [f'{int(v.get_width())}' for v in container]
109
+ else:
110
+ labels = [f'{int(v.get_height())}' for v in container]
111
+ ax.bar_label(container, labels=labels, label_type='center', padding=2)
112
+ st.pyplot(fig)
113
+
114
+ st.markdown('---')
115
+
116
+ ####################################################
117
+
118
+ st.write('### Data Survey Karyawan')
119
+ pilihan = st.selectbox('Pilih Kolom : ', ('EnvironmentSatisfaction','JobSatisfaction', 'WorkLifeBalance'))
120
+
121
+ # Melakukan pengelompokan langsung pada indeks DataFrame
122
+ attrition_data = df.groupby([df[pilihan], 'Attrition']).size().unstack(fill_value=0)
123
+
124
+ fig = plt.figure(figsize=(15, 5))
125
+ colors =['#0072C6', '#BFBFBF']
126
+ # Plot: Distribusi Attrition berdasarkan kolom yang dipilih
127
+ ax = plt.gca()
128
+
129
+ # Menyesuaikan jenis plot berdasarkan jumlah indeks attrition_data
130
+ if len(attrition_data.index) > 3:
131
+ attrition_data.plot(kind='barh', stacked=True, color=colors, ax=ax)
132
+ ax.set_xlabel('Jumlah Karyawan')
133
+ ax.set_ylabel(pilihan) # Menggunakan nama kolom yang dipilih langsung
134
+ else:
135
+ attrition_data.plot(kind='bar', stacked=True, color=colors, ax=ax)
136
+ ax.set_ylabel('Jumlah Karyawan')
137
+ ax.set_xlabel(pilihan) # Menggunakan nama kolom yang dipilih langsung
138
+ ax.set_xticklabels(attrition_data.index, rotation=0)
139
+
140
+ ax.set_title(f'Distribusi Attrition Berdasarkan {pilihan}') # Menggunakan nama kolom yang dipilih langsung
141
+ ax.legend(title='Attrition', labels=['Tidak', 'Ya'])
142
+
143
+ # Menambahkan anotasi pada plot
144
+ for container in ax.containers:
145
+ if len(attrition_data.index) > 3:
146
+ labels = [f'{int(v.get_width())}' for v in container]
147
+ else:
148
+ labels = [f'{int(v.get_height())}' for v in container]
149
+ ax.bar_label(container, labels=labels, label_type='center', padding=2)
150
+ st.pyplot(fig)
151
+
152
+ st.markdown('---')
153
+
154
+ ####################################################
155
+
156
+ st.write('### Data Performa Karyawan')
157
+ pilihan = st.selectbox('Pilih Kolom : ', ('JobInvolvement', 'PerformanceRating','BusinessTravel','JobLevel', 'JobRole'))
158
+
159
+ # Melakukan pengelompokan langsung pada indeks DataFrame
160
+ attrition_data = df.groupby([df[pilihan], 'Attrition']).size().unstack(fill_value=0)
161
+
162
+ fig = plt.figure(figsize=(15, 5))
163
+ colors =['#0072C6', '#BFBFBF']
164
+ # Plot: Distribusi Attrition berdasarkan kolom yang dipilih
165
+ ax = plt.gca()
166
+
167
+ # Menyesuaikan jenis plot berdasarkan jumlah indeks attrition_data
168
+ if len(attrition_data.index) > 3:
169
+ attrition_data.plot(kind='barh', stacked=True, color=colors, ax=ax)
170
+ ax.set_xlabel('Jumlah Karyawan')
171
+ ax.set_ylabel(pilihan) # Menggunakan nama kolom yang dipilih langsung
172
+ else:
173
+ attrition_data.plot(kind='bar', stacked=True, color=colors, ax=ax)
174
+ ax.set_ylabel('Jumlah Karyawan')
175
+ ax.set_xlabel(pilihan) # Menggunakan nama kolom yang dipilih langsung
176
+ ax.set_xticklabels(attrition_data.index, rotation=0)
177
+
178
+ ax.set_title(f'Distribusi Attrition Berdasarkan {pilihan}') # Menggunakan nama kolom yang dipilih langsung
179
+ ax.legend(title='Attrition', labels=['Tidak', 'Ya'])
180
+
181
+ # Menambahkan anotasi pada plot
182
+ for container in ax.containers:
183
+ if len(attrition_data.index) > 3:
184
+ labels = [f'{int(v.get_width())}' for v in container]
185
+ else:
186
+ labels = [f'{int(v.get_height())}' for v in container]
187
+ ax.bar_label(container, labels=labels, label_type='center', padding=2)
188
+ st.pyplot(fig)
189
+
190
+ st.markdown('---')
191
+
192
+ ####################################################
193
+
194
+ st.write('### Data Numerical')
195
+ pilihan = st.selectbox('Pilih Kolom : ', ('Age','DistanceFromHome','MonthlyIncome', 'NumCompaniesWorked','PercentSalaryHike','TotalWorkingYears',
196
+ 'YearsAtCompany','YearsSinceLastPromotion','YearsWithCurrManager'))
197
+
198
+
199
+ fig = plt.figure(figsize=(15, 5))
200
+
201
+ attrition_no = df[df['Attrition'] == 'No'][pilihan]
202
+ attrition_yes = df[df['Attrition'] == 'Yes'][pilihan]
203
+
204
+ sns.histplot(attrition_no, color=colors[0], label='No', kde=False, bins=30)
205
+ sns.histplot(attrition_yes, color=colors[1], label='Yes', kde=False, bins=30)
206
+
207
+ plt.title(f'Histogram Distribusi {pilihan} Berdasarkan Attrition')
208
+ plt.xlabel(pilihan)
209
+ plt.ylabel('Jumlah Karyawan')
210
+ plt.legend(title='Attrition')
211
+
212
+ plt.tight_layout()
213
+
214
+ st.pyplot(fig)
215
+
216
+
217
+ if __name__ == '__main__':
218
+ run()
employee.jpg ADDED

Git LFS Details

  • SHA256: a1dcc459f8072f5645ac67a2a98d0fc1cd98a1de116ad6aecc9c73c4c620c8eb
  • Pointer size: 133 Bytes
  • Size of remote file: 12.7 MB
model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:323c9b55b492141bda434df631e0cc483a0726af23265c9ebd0829a6393cb651
3
+ size 263218
prediction.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import pickle
4
+
5
+ # Path to the model file
6
+ model_path = "model.pkl"
7
+
8
+ # Load the model
9
+ with open(model_path, 'rb') as f:
10
+ model = pickle.load(f)
11
+
12
+ def run():
13
+ st.title('Prediksi Pengunduran Diri Karyawan')
14
+
15
+ # Formulir untuk pengisian data
16
+ with st.form('form_employee_attrition'):
17
+ # Kolom input sesuai dengan keterangan yang Anda berikan
18
+ business_travel = st.selectbox('Business Travel', ['Travel_Rarely', 'Travel_Frequently', 'Non-Travel'])
19
+ department = st.selectbox('Department', ['Sales', 'Research & Development', 'Human Resources'])
20
+ education_field = st.selectbox('Education Field', ['Life Sciences', 'Other', 'Medical', 'Marketing', 'Technical Degree', 'Human Resources'])
21
+ job_role = st.selectbox('Job Role', ['Healthcare Representative', 'Research Scientist', 'Sales Executive', 'Human Resources', 'Research Director', 'Laboratory Technician', 'Manufacturing Director', 'Sales Representative', 'Manager'])
22
+ marital_status = st.selectbox('Marital Status', ['Married', 'Single', 'Divorced'])
23
+ training_times_last_year = st.selectbox('Training Times Last Year', [0, 1, 2, 3, 4, 5, 6])
24
+ job_involvement = st.selectbox('Job Involvement', [1, 2, 3, 4], format_func=lambda x: {1: 'Low', 2: 'Medium', 3: 'High', 4: 'Very High'}[x])
25
+ environment_satisfaction = st.selectbox('Environment Satisfaction', [1, 2, 3, 4], format_func=lambda x: {1: 'Low', 2: 'Medium', 3: 'High', 4: 'Very High'}[x])
26
+ job_satisfaction = st.selectbox('Job Satisfaction', [1, 2, 3, 4], format_func=lambda x: {1: 'Low', 2: 'Medium', 3: 'High', 4: 'Very High'}[x])
27
+ work_life_balance = st.selectbox('Work Life Balance', [1, 2, 3, 4], format_func=lambda x: {1: 'Bad', 2: 'Good', 3: 'Better', 4: 'Best'}[x])
28
+ age = st.slider('Age', min_value=18, max_value=60)
29
+ percent_salary_hike = st.slider('Percent Salary Hike', min_value=11, max_value=25)
30
+ total_working_years = st.slider('Total Working Years', min_value=0, max_value=40)
31
+ years_at_company = st.slider('Years At Company', min_value=0, max_value=40)
32
+ years_since_last_promotion = st.slider('Years Since Last Promotion', min_value=0, max_value=15)
33
+ years_with_curr_manager = st.slider('Years With Current Manager', min_value=0, max_value=17)
34
+
35
+ # Tombol untuk melakukan prediksi
36
+ submitted = st.form_submit_button('Prediksi')
37
+
38
+ # Menyusun data input menjadi DataFrame
39
+ data = {
40
+ 'BusinessTravel': business_travel,
41
+ 'Department': department,
42
+ 'EducationField': education_field,
43
+ 'JobRole': job_role,
44
+ 'MaritalStatus': marital_status,
45
+ 'TrainingTimesLastYear': training_times_last_year,
46
+ 'JobInvolvement': job_involvement,
47
+ 'EnvironmentSatisfaction': environment_satisfaction,
48
+ 'JobSatisfaction': job_satisfaction,
49
+ 'WorkLifeBalance': work_life_balance,
50
+ 'Age': age,
51
+ 'PercentSalaryHike': percent_salary_hike,
52
+ 'TotalWorkingYears': total_working_years,
53
+ 'YearsAtCompany': years_at_company,
54
+ 'YearsSinceLastPromotion': years_since_last_promotion,
55
+ 'YearsWithCurrManager': years_with_curr_manager
56
+ }
57
+
58
+ features = pd.DataFrame(data, index=[0])
59
+
60
+ # Menampilkan fitur input pengguna
61
+ st.write("## Fitur Input Pengguna")
62
+ st.write(features)
63
+
64
+ # Melakukan prediksi jika tombol prediksi ditekan
65
+ if submitted:
66
+ prediction = model.predict(features)
67
+ st.subheader('Hasil Prediksi')
68
+ st.write('Pengunduran Diri Karyawan:', 'Ya' if prediction[0] == 1 else 'Tidak')
69
+
70
+ if __name__ == '__main__':
71
+ run()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ seaborn
4
+ matplotlib
5
+ numpy
6
+ scikit-learn==1.2.2
7
+ Pillow
8
+ plotly