Spaces:
Runtime error
Runtime error
Commit
•
144cad2
1
Parent(s):
c1471b7
app
Browse files- app.py +9 -0
- dev salary.ipynb +0 -0
- explore_page.py +66 -0
- predict_page.py +42 -0
- requirements.txt +4 -0
app.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from predict_page import show_predict_page
|
3 |
+
from explore_page import show_explore_page
|
4 |
+
|
5 |
+
new_page=st.sidebar.selectbox("Predict or Explore",("Predict","Explore"))
|
6 |
+
if new_page=="Predict":
|
7 |
+
show_predict_page()
|
8 |
+
else:
|
9 |
+
show_explore_page()
|
dev salary.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
explore_page.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
+
|
5 |
+
def join_country(cat,cutoff):
|
6 |
+
cat_join={}
|
7 |
+
for i in cat.index:
|
8 |
+
if cat[i]>=cutoff:
|
9 |
+
cat_join[i]=i
|
10 |
+
|
11 |
+
else:
|
12 |
+
cat_join[i]="others"
|
13 |
+
return cat_join
|
14 |
+
|
15 |
+
def clean_experience(x):
|
16 |
+
if x=="More than 50 years":
|
17 |
+
return 50
|
18 |
+
if x=="Less than 1 year":
|
19 |
+
return 0.5
|
20 |
+
return float(x)
|
21 |
+
|
22 |
+
def clean_education(x):
|
23 |
+
if "Bachelor’s degree" in x:
|
24 |
+
return "Bachelor’s degree"
|
25 |
+
if "Master’s degree" in x:
|
26 |
+
return "Master’s degree"
|
27 |
+
if "Professional degree" in x or "Other doctoral" in x:
|
28 |
+
return "Post grad"
|
29 |
+
return "Less than a Bachelors"
|
30 |
+
@st.cache
|
31 |
+
def load_data():
|
32 |
+
df=pd.read_csv("survey.csv")
|
33 |
+
column_needed=df.loc[:,["Country","EdLevel","YearsCodePro","Employment","ConvertedComp"]]
|
34 |
+
column_needed=column_needed.rename({"ConvertedComp":"Salary"},axis=1)
|
35 |
+
column_needed=column_needed[~column_needed["Salary"].isnull()]
|
36 |
+
column_needed=column_needed.dropna()
|
37 |
+
column_needed= column_needed[column_needed["Employment"]=="Employed full-time"]
|
38 |
+
|
39 |
+
column_needed=column_needed.drop("Employment",axis=1)
|
40 |
+
country_map=join_country(column_needed["Country"].value_counts(),400)
|
41 |
+
column_needed["Country"]=column_needed["Country"].map(country_map)
|
42 |
+
column_needed=column_needed[column_needed["Salary"]<=250000]
|
43 |
+
column_needed=column_needed[column_needed["Salary"]>10000]
|
44 |
+
column_needed=column_needed[column_needed["Country"]!="others"]
|
45 |
+
column_needed["YearsCodePro"]=column_needed["YearsCodePro"].apply(clean_experience)
|
46 |
+
column_needed["EdLevel"]=column_needed["EdLevel"].apply(clean_education)
|
47 |
+
return column_needed
|
48 |
+
data=load_data()
|
49 |
+
def show_explore_page():
|
50 |
+
st.title("Explore Software Engineer Average Salary")
|
51 |
+
st.write("""### Stack Overflow Developer Salary""")
|
52 |
+
dataneeded=data["Country"].value_counts()
|
53 |
+
fig,ax=plt.subplots()
|
54 |
+
ax.pie(dataneeded,labels=dataneeded.index,shadow=True,startangle=90)
|
55 |
+
ax.axis("equal")
|
56 |
+
st.write("""#### Number Of Data From Each Country""")
|
57 |
+
st.pyplot(fig)
|
58 |
+
|
59 |
+
st.write("""#### Mean Salary Base On The Country""")
|
60 |
+
mean=data.groupby(["Country"])["Salary"].mean().sort_values(ascending=True)
|
61 |
+
st.bar_chart(mean)
|
62 |
+
|
63 |
+
st.write("""#### Mean Salary Base On The Experience""")
|
64 |
+
mean=data.groupby(["YearsCodePro"])["Salary"].mean().sort_values(ascending=True)
|
65 |
+
st.line_chart(mean)
|
66 |
+
|
predict_page.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pickle
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
model=pickle.load(open("model.pkl","rb"))
|
6 |
+
trans_count=pickle.load(open("trans_count.pkl","rb"))
|
7 |
+
trans_edu=pickle.load(open("trans_edu.pkl","rb"))
|
8 |
+
print(st.__version__)
|
9 |
+
|
10 |
+
def show_predict_page():
|
11 |
+
st.title("Software Developer Salary Prediction")
|
12 |
+
st.write("""## we need some information to predict salary""")
|
13 |
+
|
14 |
+
countries=("United States",
|
15 |
+
"India",
|
16 |
+
"United Kingdom",
|
17 |
+
"Germany",
|
18 |
+
"Canada",
|
19 |
+
"Brazil",
|
20 |
+
"France" ,
|
21 |
+
"Spain",
|
22 |
+
"Australia",
|
23 |
+
"Netherlands",
|
24 |
+
"Poland",
|
25 |
+
"Italy",
|
26 |
+
"Russian Federation",
|
27 |
+
"Sweden")
|
28 |
+
|
29 |
+
education=("Bachelor’s degree", "Master’s degree", "Post grad","Less than a Bachelors")
|
30 |
+
|
31 |
+
country=st.selectbox("Country" ,countries)
|
32 |
+
education=st.selectbox("Education Level",education)
|
33 |
+
experience=st.slider("Year of EXperience",0,50,3)
|
34 |
+
ok=st.button("Calculate Salary")
|
35 |
+
|
36 |
+
if ok:
|
37 |
+
test=np.array([[country,education,experience]])
|
38 |
+
test[:,0]=trans_count.transform(test[:,0])
|
39 |
+
test[:,1]=trans_edu.transform(test[:,1])
|
40 |
+
test=test.astype(float)
|
41 |
+
salary=model.predict(test)
|
42 |
+
st.subheader(f"The estimated average salary per year is ${salary[0]:.2f}")
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
matplotlib==3.5.2
|
2 |
+
numpy==1.22.3
|
3 |
+
pandas==1.4.2
|
4 |
+
streamlit==1.9.0
|