import streamlit as st from selenium import webdriver from selenium.webdriver.chrome.options import Options from bs4 import BeautifulSoup import pandas as pd import time # Instantiate global variables df = pd.DataFrame(columns=["Title", "Location", "Company", "Link", "Description"]) # Get user input inputJobTitle = st.text_input("Enter Job Title:") inputJobLocation = st.text_input("Enter Job Location:") totalPages = st.number_input("Enter Total Pages:", min_value=1, value=1) submit_button = st.button("Submit") def scrapeJobDescription(url): options = Options() options.add_argument("--window-size=1920,1080") options.add_argument("--headless=new") driver = webdriver.Chrome(options=options) driver.get(url) html = driver.page_source soup = BeautifulSoup(html, "html.parser") try: jobDescription = soup.find( "div", class_="show-more-less-html__markup" ).text.strip() return jobDescription except: return "" def scrapeLinkedin(): global df global inputJobTitle global inputJobLocation counter = 0 pageCounter = 1 options = Options() options.add_argument("--headless=new") driver = webdriver.Chrome(options=options) while pageCounter <= totalPages: try: driver.get( f"https://www.linkedin.com/jobs/search/?&keywords={inputJobTitle}&location={inputJobLocation}&refresh=true&start={counter}" ) html = driver.page_source soup = BeautifulSoup(html, "html.parser") ulElement = soup.find("ul", class_="jobs-search__results-list") liElements = ulElement.find_all("li") for item in liElements: jobTitle = item.find( "h3", class_="base-search-card__title" ).text.strip() jobLocation = item.find( "span", class_="job-search-card__location" ).text.strip() jobCompany = item.find( "h4", class_="base-search-card__subtitle" ).text.strip() jobLink = item.find_all("a")[0]["href"] jobDescription = scrapeJobDescription(jobLink) if jobTitle and jobLocation and jobCompany and jobLink: df = pd.concat( [ df, pd.DataFrame( { "Title": [jobTitle], "Location": [jobLocation], "Company": [jobCompany], "Link": [jobLink], "Description": [jobDescription], } ), ] ) counter += 25 pageCounter += 1 except: break driver.quit() def convert_df(df): return df.to_csv(index=False).encode('utf-8') if submit_button: with st.spinner("Operation in progress. Please wait..."): scrapeLinkedin() time.sleep(1) st.write(df) csv = convert_df(df) st.download_button( "Press to Download", csv, "file.csv", "text/csv", key='download-csv' )