Spaces:

Robzy
/

jobbert_knowledge_extraction

Running

jobbert_knowledge_extraction / trend_graph.py

Aqsa-K

embedding and graphs added

f8da2f0 about 2 months ago

2.23 kB

	import os
	import pandas as pd
	import matplotlib.pyplot as plt
	from collections import Counter

	# Path to the folder with date-wise subfolders
	base_folder = "./tags"

	# Directory to save the plots
	output_folder = "./plots"
	os.makedirs(output_folder, exist_ok=True)

	# Step 1: Initialize data structure to store skill counts
	date_skill_counts = {}

	# Step 2: Loop through the date folders
	for date_folder in sorted(os.listdir(base_folder)):
	folder_path = os.path.join(base_folder, date_folder)
	if os.path.isdir(folder_path):
	# Initialize skill counter for the date
	skill_counter = Counter()

	# Loop through all files in the date folder
	for file_name in os.listdir(folder_path):
	file_path = os.path.join(folder_path, file_name)
	if file_name.endswith(".txt"):
	with open(file_path, "r", encoding="utf-8") as file:
	# Read skills from the file
	skills = file.read().strip().splitlines()
	skill_counter.update(skills)

	# Save counts for the date
	date_skill_counts[date_folder] = skill_counter

	# Step 3: Aggregate the data into a DataFrame
	all_dates = sorted(date_skill_counts.keys())
	all_skills = set(skill for counts in date_skill_counts.values() for skill in counts)
	data = {skill: [date_skill_counts[date].get(skill, 0) for date in all_dates] for skill in all_skills}
	df = pd.DataFrame(data, index=all_dates)

	print(df)

	# Step 4: Identify the top 3 skills
	total_counts = df.sum(axis=0)
	top_skills = total_counts.nlargest(3).index

	# Step 5: Plot and save separate graphs for the top 3 skills
	for skill in top_skills:
	plt.figure(figsize=(8, 5))
	plt.plot(df.index, df[skill], marker="o", label=skill)

	# Add labels and legend
	plt.title(f"Trend of {skill} Over Time")
	plt.xlabel("Date")
	plt.ylabel("Count")
	plt.xticks(rotation=45)
	plt.legend(title="Skill")
	plt.grid()
	plt.tight_layout()

	# Save the plot
	plot_path = os.path.join(output_folder, f"{skill}_trend.png")
	plt.savefig(plot_path, format="png", dpi=300)
	print(f"Saved plot for {skill} at {plot_path}")

	# Show the plot
	plt.show()