Spaces:

Robzy
/

jobbert_knowledge_extraction

Running

File size: 1,661 Bytes

9c3e55b
30ccb18
9c3e55b
 
 
 
354c974
9c3e55b
 
 
 
 
 
354c974
9c3e55b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191d49c
9c3e55b
 
 
 
 
 
 
 
 
 
 
 
 
69ae922
3edd63d
 
c5d8c2e
3edd63d

import http.client
# from config import *
import json
import os
from datetime import datetime

api_key = os.getenv('RAPID_API_KEY')

def scrape_jobs():

    conn = http.client.HTTPSConnection("linkedin-job-search-api.p.rapidapi.com")

    headers = {
        'x-rapidapi-key': api_key,
        'x-rapidapi-host': "linkedin-job-search-api.p.rapidapi.com"
    }

    conn.request("GET", "/active-jb-7d?title_filter=machine%20learning&description_type=text", headers=headers)

    res = conn.getresponse()
    data = res.read()
    jobs_str = data.decode("utf-8")
    jobs = json.loads(jobs_str)

    return jobs

def extract_job_descriptions(jobs):

    # Get the current date in YYYY-MM-DD format and create folder
    current_date = datetime.now().strftime('%d-%m-%Y')
    folder_path = os.path.join("job-postings", current_date)
    print(f"Creating folder at: {folder_path}")
    os.makedirs(folder_path, exist_ok=True)

    for idx, job in enumerate(jobs, start=1):
        if 'description_text' in job.keys():
            jd = job['description_text']
            print(jd)

            # Save the job description to a text file
            file_path = os.path.join(folder_path, f"{idx}.txt")
            with open(file_path, "w", encoding="utf-8") as file:
                file.write(jd)
            print("Job {} saved".format(str(idx)))
        else:
            print("Job description not available")
jobs = scrape_jobs()
extract_job_descriptions(jobs)

# current_date = datetime.now().strftime('%d-%m-%Y')
# folder_path = os.path.join("job-postings", current_date)
# print(f"Creating folder at: {folder_path}")
# os.makedirs(folder_path, exist_ok=True)