Aqsa-K
embedding and graphs added
f8da2f0
raw
history blame
2.23 kB
import os
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter
# Path to the folder with date-wise subfolders
base_folder = "./tags"
# Directory to save the plots
output_folder = "./plots"
os.makedirs(output_folder, exist_ok=True)
# Step 1: Initialize data structure to store skill counts
date_skill_counts = {}
# Step 2: Loop through the date folders
for date_folder in sorted(os.listdir(base_folder)):
folder_path = os.path.join(base_folder, date_folder)
if os.path.isdir(folder_path):
# Initialize skill counter for the date
skill_counter = Counter()
# Loop through all files in the date folder
for file_name in os.listdir(folder_path):
file_path = os.path.join(folder_path, file_name)
if file_name.endswith(".txt"):
with open(file_path, "r", encoding="utf-8") as file:
# Read skills from the file
skills = file.read().strip().splitlines()
skill_counter.update(skills)
# Save counts for the date
date_skill_counts[date_folder] = skill_counter
# Step 3: Aggregate the data into a DataFrame
all_dates = sorted(date_skill_counts.keys())
all_skills = set(skill for counts in date_skill_counts.values() for skill in counts)
data = {skill: [date_skill_counts[date].get(skill, 0) for date in all_dates] for skill in all_skills}
df = pd.DataFrame(data, index=all_dates)
print(df)
# Step 4: Identify the top 3 skills
total_counts = df.sum(axis=0)
top_skills = total_counts.nlargest(3).index
# Step 5: Plot and save separate graphs for the top 3 skills
for skill in top_skills:
plt.figure(figsize=(8, 5))
plt.plot(df.index, df[skill], marker="o", label=skill)
# Add labels and legend
plt.title(f"Trend of {skill} Over Time")
plt.xlabel("Date")
plt.ylabel("Count")
plt.xticks(rotation=45)
plt.legend(title="Skill")
plt.grid()
plt.tight_layout()
# Save the plot
plot_path = os.path.join(output_folder, f"{skill}_trend.png")
plt.savefig(plot_path, format="png", dpi=300)
print(f"Saved plot for {skill} at {plot_path}")
# Show the plot
plt.show()