LawalAfeez commited on
Commit
144cad2
1 Parent(s): c1471b7
Files changed (5) hide show
  1. app.py +9 -0
  2. dev salary.ipynb +0 -0
  3. explore_page.py +66 -0
  4. predict_page.py +42 -0
  5. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from predict_page import show_predict_page
3
+ from explore_page import show_explore_page
4
+
5
+ new_page=st.sidebar.selectbox("Predict or Explore",("Predict","Explore"))
6
+ if new_page=="Predict":
7
+ show_predict_page()
8
+ else:
9
+ show_explore_page()
dev salary.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
explore_page.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+
5
+ def join_country(cat,cutoff):
6
+ cat_join={}
7
+ for i in cat.index:
8
+ if cat[i]>=cutoff:
9
+ cat_join[i]=i
10
+
11
+ else:
12
+ cat_join[i]="others"
13
+ return cat_join
14
+
15
+ def clean_experience(x):
16
+ if x=="More than 50 years":
17
+ return 50
18
+ if x=="Less than 1 year":
19
+ return 0.5
20
+ return float(x)
21
+
22
+ def clean_education(x):
23
+ if "Bachelor’s degree" in x:
24
+ return "Bachelor’s degree"
25
+ if "Master’s degree" in x:
26
+ return "Master’s degree"
27
+ if "Professional degree" in x or "Other doctoral" in x:
28
+ return "Post grad"
29
+ return "Less than a Bachelors"
30
+ @st.cache
31
+ def load_data():
32
+ df=pd.read_csv("survey.csv")
33
+ column_needed=df.loc[:,["Country","EdLevel","YearsCodePro","Employment","ConvertedComp"]]
34
+ column_needed=column_needed.rename({"ConvertedComp":"Salary"},axis=1)
35
+ column_needed=column_needed[~column_needed["Salary"].isnull()]
36
+ column_needed=column_needed.dropna()
37
+ column_needed= column_needed[column_needed["Employment"]=="Employed full-time"]
38
+
39
+ column_needed=column_needed.drop("Employment",axis=1)
40
+ country_map=join_country(column_needed["Country"].value_counts(),400)
41
+ column_needed["Country"]=column_needed["Country"].map(country_map)
42
+ column_needed=column_needed[column_needed["Salary"]<=250000]
43
+ column_needed=column_needed[column_needed["Salary"]>10000]
44
+ column_needed=column_needed[column_needed["Country"]!="others"]
45
+ column_needed["YearsCodePro"]=column_needed["YearsCodePro"].apply(clean_experience)
46
+ column_needed["EdLevel"]=column_needed["EdLevel"].apply(clean_education)
47
+ return column_needed
48
+ data=load_data()
49
+ def show_explore_page():
50
+ st.title("Explore Software Engineer Average Salary")
51
+ st.write("""### Stack Overflow Developer Salary""")
52
+ dataneeded=data["Country"].value_counts()
53
+ fig,ax=plt.subplots()
54
+ ax.pie(dataneeded,labels=dataneeded.index,shadow=True,startangle=90)
55
+ ax.axis("equal")
56
+ st.write("""#### Number Of Data From Each Country""")
57
+ st.pyplot(fig)
58
+
59
+ st.write("""#### Mean Salary Base On The Country""")
60
+ mean=data.groupby(["Country"])["Salary"].mean().sort_values(ascending=True)
61
+ st.bar_chart(mean)
62
+
63
+ st.write("""#### Mean Salary Base On The Experience""")
64
+ mean=data.groupby(["YearsCodePro"])["Salary"].mean().sort_values(ascending=True)
65
+ st.line_chart(mean)
66
+
predict_page.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pickle
3
+ import numpy as np
4
+
5
+ model=pickle.load(open("model.pkl","rb"))
6
+ trans_count=pickle.load(open("trans_count.pkl","rb"))
7
+ trans_edu=pickle.load(open("trans_edu.pkl","rb"))
8
+ print(st.__version__)
9
+
10
+ def show_predict_page():
11
+ st.title("Software Developer Salary Prediction")
12
+ st.write("""## we need some information to predict salary""")
13
+
14
+ countries=("United States",
15
+ "India",
16
+ "United Kingdom",
17
+ "Germany",
18
+ "Canada",
19
+ "Brazil",
20
+ "France" ,
21
+ "Spain",
22
+ "Australia",
23
+ "Netherlands",
24
+ "Poland",
25
+ "Italy",
26
+ "Russian Federation",
27
+ "Sweden")
28
+
29
+ education=("Bachelor’s degree", "Master’s degree", "Post grad","Less than a Bachelors")
30
+
31
+ country=st.selectbox("Country" ,countries)
32
+ education=st.selectbox("Education Level",education)
33
+ experience=st.slider("Year of EXperience",0,50,3)
34
+ ok=st.button("Calculate Salary")
35
+
36
+ if ok:
37
+ test=np.array([[country,education,experience]])
38
+ test[:,0]=trans_count.transform(test[:,0])
39
+ test[:,1]=trans_edu.transform(test[:,1])
40
+ test=test.astype(float)
41
+ salary=model.predict(test)
42
+ st.subheader(f"The estimated average salary per year is ${salary[0]:.2f}")
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ matplotlib==3.5.2
2
+ numpy==1.22.3
3
+ pandas==1.4.2
4
+ streamlit==1.9.0