File size: 2,228 Bytes
f8da2f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import os
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter

# Path to the folder with date-wise subfolders
base_folder = "./tags"

# Directory to save the plots
output_folder = "./plots"
os.makedirs(output_folder, exist_ok=True)

# Step 1: Initialize data structure to store skill counts
date_skill_counts = {}

# Step 2: Loop through the date folders
for date_folder in sorted(os.listdir(base_folder)):
    folder_path = os.path.join(base_folder, date_folder)
    if os.path.isdir(folder_path):
        # Initialize skill counter for the date
        skill_counter = Counter()
        
        # Loop through all files in the date folder
        for file_name in os.listdir(folder_path):
            file_path = os.path.join(folder_path, file_name)
            if file_name.endswith(".txt"):
                with open(file_path, "r", encoding="utf-8") as file:
                    # Read skills from the file
                    skills = file.read().strip().splitlines()
                    skill_counter.update(skills)
        
        # Save counts for the date
        date_skill_counts[date_folder] = skill_counter

# Step 3: Aggregate the data into a DataFrame
all_dates = sorted(date_skill_counts.keys())
all_skills = set(skill for counts in date_skill_counts.values() for skill in counts)
data = {skill: [date_skill_counts[date].get(skill, 0) for date in all_dates] for skill in all_skills}
df = pd.DataFrame(data, index=all_dates)

print(df)

# Step 4: Identify the top 3 skills
total_counts = df.sum(axis=0)
top_skills = total_counts.nlargest(3).index

# Step 5: Plot and save separate graphs for the top 3 skills
for skill in top_skills:
    plt.figure(figsize=(8, 5))
    plt.plot(df.index, df[skill], marker="o", label=skill)
    
    # Add labels and legend
    plt.title(f"Trend of {skill} Over Time")
    plt.xlabel("Date")
    plt.ylabel("Count")
    plt.xticks(rotation=45)
    plt.legend(title="Skill")
    plt.grid()
    plt.tight_layout()
    
    # Save the plot
    plot_path = os.path.join(output_folder, f"{skill}_trend.png")
    plt.savefig(plot_path, format="png", dpi=300)
    print(f"Saved plot for {skill} at {plot_path}")
    
    # Show the plot
    plt.show()