File size: 3,394 Bytes
1400c09
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import streamlit as st
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import pandas as pd
import time

# Instantiate global variables
df = pd.DataFrame(columns=["Title", "Location", "Company", "Link", "Description"])

# Get user input
inputJobTitle = st.text_input("Enter Job Title:")
inputJobLocation = st.text_input("Enter Job Location:")
totalPages = st.number_input("Enter Total Pages:", min_value=1, value=1)
submit_button = st.button("Submit")

def scrapeJobDescription(url):
    options = Options()
    options.add_argument("--window-size=1920,1080")
    options.add_argument("--headless=new")
    driver = webdriver.Chrome(options=options)
    driver.get(url)
    html = driver.page_source
    soup = BeautifulSoup(html, "html.parser")
    try:
        jobDescription = soup.find(
            "div", class_="show-more-less-html__markup"
        ).text.strip()
        return jobDescription
    except:
        return ""

def scrapeLinkedin():
    global df
    global inputJobTitle
    global inputJobLocation
    counter = 0
    pageCounter = 1
    options = Options()
    options.add_argument("--headless=new")
    driver = webdriver.Chrome(options=options)
    
    while pageCounter <= totalPages:
        try:
            driver.get(
                f"https://www.linkedin.com/jobs/search/?&keywords={inputJobTitle}&location={inputJobLocation}&refresh=true&start={counter}"
            )

            html = driver.page_source
            soup = BeautifulSoup(html, "html.parser")

            ulElement = soup.find("ul", class_="jobs-search__results-list")
            liElements = ulElement.find_all("li")

            for item in liElements:
                jobTitle = item.find(
                    "h3", class_="base-search-card__title"
                ).text.strip()
                jobLocation = item.find(
                    "span", class_="job-search-card__location"
                ).text.strip()
                jobCompany = item.find(
                    "h4", class_="base-search-card__subtitle"
                ).text.strip()
                jobLink = item.find_all("a")[0]["href"]

                jobDescription = scrapeJobDescription(jobLink)

                if jobTitle and jobLocation and jobCompany and jobLink:
                    df = pd.concat(
                        [
                            df,
                            pd.DataFrame(
                                {
                                    "Title": [jobTitle],
                                    "Location": [jobLocation],
                                    "Company": [jobCompany],
                                    "Link": [jobLink],
                                    "Description": [jobDescription],
                                }
                            ),
                        ]
                    )

            counter += 25
            pageCounter += 1
        except:
            break

    driver.quit()

def convert_df(df):
   return df.to_csv(index=False).encode('utf-8')


if submit_button:
    with st.spinner("Operation in progress. Please wait..."):
        scrapeLinkedin()
        time.sleep(1)
    st.write(df)


    csv = convert_df(df)
    st.download_button(
        "Press to Download",
        csv,
        "file.csv",
        "text/csv",
        key='download-csv'
    )