MuhFaridanSutariya commited on
Commit
1400c09
1 Parent(s): 5623b18

first init

Browse files
Files changed (2) hide show
  1. app.py +111 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from selenium import webdriver
3
+ from selenium.webdriver.chrome.options import Options
4
+ from bs4 import BeautifulSoup
5
+ import pandas as pd
6
+ import time
7
+
8
+ # Instantiate global variables
9
+ df = pd.DataFrame(columns=["Title", "Location", "Company", "Link", "Description"])
10
+
11
+ # Get user input
12
+ inputJobTitle = st.text_input("Enter Job Title:")
13
+ inputJobLocation = st.text_input("Enter Job Location:")
14
+ totalPages = st.number_input("Enter Total Pages:", min_value=1, value=1)
15
+ submit_button = st.button("Submit")
16
+
17
+ def scrapeJobDescription(url):
18
+ options = Options()
19
+ options.add_argument("--window-size=1920,1080")
20
+ options.add_argument("--headless=new")
21
+ driver = webdriver.Chrome(options=options)
22
+ driver.get(url)
23
+ html = driver.page_source
24
+ soup = BeautifulSoup(html, "html.parser")
25
+ try:
26
+ jobDescription = soup.find(
27
+ "div", class_="show-more-less-html__markup"
28
+ ).text.strip()
29
+ return jobDescription
30
+ except:
31
+ return ""
32
+
33
+ def scrapeLinkedin():
34
+ global df
35
+ global inputJobTitle
36
+ global inputJobLocation
37
+ counter = 0
38
+ pageCounter = 1
39
+ options = Options()
40
+ options.add_argument("--window-size=1920,1080")
41
+ options.add_argument("--headless=new")
42
+ driver = webdriver.Chrome(options=options)
43
+
44
+ while pageCounter <= totalPages:
45
+ try:
46
+ driver.get(
47
+ f"https://www.linkedin.com/jobs/search/?&keywords={inputJobTitle}&location={inputJobLocation}&refresh=true&start={counter}"
48
+ )
49
+
50
+ html = driver.page_source
51
+ soup = BeautifulSoup(html, "html.parser")
52
+
53
+ ulElement = soup.find("ul", class_="jobs-search__results-list")
54
+ liElements = ulElement.find_all("li")
55
+
56
+ for item in liElements:
57
+ jobTitle = item.find(
58
+ "h3", class_="base-search-card__title"
59
+ ).text.strip()
60
+ jobLocation = item.find(
61
+ "span", class_="job-search-card__location"
62
+ ).text.strip()
63
+ jobCompany = item.find(
64
+ "h4", class_="base-search-card__subtitle"
65
+ ).text.strip()
66
+ jobLink = item.find_all("a")[0]["href"]
67
+
68
+ jobDescription = scrapeJobDescription(jobLink)
69
+
70
+ if jobTitle and jobLocation and jobCompany and jobLink:
71
+ df = pd.concat(
72
+ [
73
+ df,
74
+ pd.DataFrame(
75
+ {
76
+ "Title": [jobTitle],
77
+ "Location": [jobLocation],
78
+ "Company": [jobCompany],
79
+ "Link": [jobLink],
80
+ "Description": [jobDescription],
81
+ }
82
+ ),
83
+ ]
84
+ )
85
+
86
+ counter += 25
87
+ pageCounter += 1
88
+ except:
89
+ break
90
+
91
+ driver.quit()
92
+
93
+ def convert_df(df):
94
+ return df.to_csv(index=False).encode('utf-8')
95
+
96
+
97
+ if submit_button:
98
+ with st.spinner("Operation in progress. Please wait..."):
99
+ scrapeLinkedin()
100
+ time.sleep(1)
101
+ st.write(df)
102
+
103
+
104
+ csv = convert_df(df)
105
+ st.download_button(
106
+ "Press to Download",
107
+ csv,
108
+ "file.csv",
109
+ "text/csv",
110
+ key='download-csv'
111
+ )
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ pandas
2
+ streamlit
3
+ selenium
4
+ bs4