|
import os |
|
import pandas as pd |
|
import matplotlib.pyplot as plt |
|
from collections import Counter |
|
|
|
|
|
base_folder = "./tags" |
|
|
|
|
|
output_folder = "./plots" |
|
os.makedirs(output_folder, exist_ok=True) |
|
|
|
|
|
date_skill_counts = {} |
|
|
|
|
|
for date_folder in sorted(os.listdir(base_folder)): |
|
folder_path = os.path.join(base_folder, date_folder) |
|
if os.path.isdir(folder_path): |
|
|
|
skill_counter = Counter() |
|
|
|
|
|
for file_name in os.listdir(folder_path): |
|
file_path = os.path.join(folder_path, file_name) |
|
if file_name.endswith(".txt"): |
|
with open(file_path, "r", encoding="utf-8") as file: |
|
|
|
skills = file.read().strip().splitlines() |
|
skill_counter.update(skills) |
|
|
|
|
|
date_skill_counts[date_folder] = skill_counter |
|
|
|
|
|
all_dates = sorted(date_skill_counts.keys()) |
|
all_skills = set(skill for counts in date_skill_counts.values() for skill in counts) |
|
data = {skill: [date_skill_counts[date].get(skill, 0) for date in all_dates] for skill in all_skills} |
|
df = pd.DataFrame(data, index=all_dates) |
|
|
|
print(df) |
|
|
|
|
|
total_counts = df.sum(axis=0) |
|
top_skills = total_counts.nlargest(3).index |
|
|
|
|
|
for skill in top_skills: |
|
plt.figure(figsize=(8, 5)) |
|
plt.plot(df.index, df[skill], marker="o", label=skill) |
|
|
|
|
|
plt.title(f"Trend of {skill} Over Time") |
|
plt.xlabel("Date") |
|
plt.ylabel("Count") |
|
plt.xticks(rotation=45) |
|
plt.legend(title="Skill") |
|
plt.grid() |
|
plt.tight_layout() |
|
|
|
|
|
plot_path = os.path.join(output_folder, f"{skill}_trend.png") |
|
plt.savefig(plot_path, format="png", dpi=300) |
|
print(f"Saved plot for {skill} at {plot_path}") |
|
|
|
|
|
plt.show() |
|
|