|
import streamlit as st |
|
import pandas as pd |
|
from github import Github |
|
from wordcloud import WordCloud |
|
import matplotlib.pyplot as plt |
|
import re |
|
import datetime |
|
|
|
g = Github(st.secrets["ACCESS_TOKEN"]) |
|
repos = st.secrets["REPO_NAME"].split(",") |
|
repos = [g.get_repo(repo) for repo in repos] |
|
|
|
@st.cache_data |
|
def fetch_data(): |
|
|
|
issues_data = [] |
|
|
|
for repo in repos: |
|
issues = repo.get_issues(state="all") |
|
|
|
for issue in issues: |
|
issues_data.append( |
|
{ |
|
'Issue': f"{issue.number} - {issue.title}", |
|
'State': issue.state, |
|
'Created at': issue.created_at, |
|
'Closed at': issue.closed_at, |
|
'Last update': issue.updated_at, |
|
'Labels': [label.name for label in issue.labels], |
|
'Reactions': issue.reactions['total_count'], |
|
'Comments': issue.comments, |
|
'URL': issue.html_url, |
|
'Repository': repo.name, |
|
} |
|
) |
|
return pd.DataFrame(issues_data) |
|
|
|
def save_data(df): |
|
df.to_json("issues.json", orient="records", indent=4, index=False) |
|
|
|
@st.cache_data |
|
def load_data(): |
|
try: |
|
df = pd.read_json("issues.json", convert_dates=["Created at", "Closed at", "Last update"], date_unit="ms") |
|
except: |
|
df = fetch_data() |
|
save_data(df) |
|
return df |
|
|
|
|
|
st.title(f"GitHub Issues Dashboard") |
|
status = st.status(label="Loading data...", state="running") |
|
|
|
df = load_data() |
|
|
|
today = datetime.date.today() |
|
|
|
|
|
st.header("Issue activity metrics") |
|
|
|
col1, col2, col3 = st.columns(3) |
|
|
|
state_counts = df['State'].value_counts() |
|
open_issues = df.loc[df['State'] == 'open'] |
|
closed_issues = df.loc[df['State'] == 'closed'] |
|
closed_issues['Time to Close'] = closed_issues['Closed at'] - closed_issues['Created at'] |
|
|
|
with col1: |
|
st.metric(label="Open issues", value=state_counts['open']) |
|
|
|
with col2: |
|
st.metric(label="Closed issues", value=state_counts['closed']) |
|
|
|
with col3: |
|
average_time_to_close = closed_issues['Time to Close'].mean().days |
|
st.metric(label="Avg. days to close", value=average_time_to_close) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.subheader("Latest updates π") |
|
col1, col2 = st.columns(2) |
|
with col1: |
|
last_update_date = st.date_input("Last updated after:", value=today - datetime.timedelta(days=7), format="DD-MM-YYYY") |
|
last_update_date = datetime.datetime.combine(last_update_date, datetime.datetime.min.time()) |
|
with col2: |
|
updated_issues = open_issues[open_issues["Last update"] > last_update_date] |
|
st.metric("Results:", updated_issues.shape[0]) |
|
|
|
st.dataframe( |
|
updated_issues[["Issue","Labels", "Repository", "Last update","URL"]].sort_values(by="Last update", ascending=False), |
|
hide_index=True, |
|
|
|
column_config={ |
|
"Issue": st.column_config.TextColumn("Issue", width="large"), |
|
"Labels": st.column_config.ListColumn("Labels", width="large"), |
|
"Last update": st.column_config.DatetimeColumn("Last update", width="medium"), |
|
"URL": st.column_config.LinkColumn("π", display_text="π", width="small") |
|
} |
|
) |
|
|
|
st.subheader("Stale issues? πΈοΈ") |
|
col1, col2 = st.columns(2) |
|
with col1: |
|
not_updated_since = st.date_input("Not updated since:", value=today - datetime.timedelta(days=90), format="DD-MM-YYYY") |
|
not_updated_since = datetime.datetime.combine(not_updated_since, datetime.datetime.min.time()) |
|
with col2: |
|
stale_issues = open_issues[open_issues["Last update"] < not_updated_since] |
|
st.metric("Results:", stale_issues.shape[0]) |
|
st.dataframe( |
|
stale_issues[["Issue","Labels", "Repository", "Last update","URL"]].sort_values(by="Last update", ascending=True), |
|
hide_index=True, |
|
|
|
column_config={ |
|
"Issue": st.column_config.TextColumn("Issue", width="large"), |
|
"Labels": st.column_config.ListColumn("Labels", width="large"), |
|
"Last update": st.column_config.DatetimeColumn("Last update", width="medium"), |
|
"URL": st.column_config.LinkColumn("π", display_text="π", width="small") |
|
} |
|
) |
|
|
|
|
|
st.header("Issue classification") |
|
col1, col2 = st.columns(2) |
|
|
|
|
|
with col1: |
|
st.subheader("Top ten labels π") |
|
label_counts = open_issues.groupby("Repository").apply(lambda x: x.explode("Labels").value_counts("Labels").to_frame().reset_index()).reset_index() |
|
|
|
def generate_labels_link(labels,repos): |
|
links = [] |
|
for label,repo in zip(labels,repos): |
|
label = label.replace(" ", "+") |
|
links.append(f"https://github.com/argilla-io/{repo}/issues?q=is:open+is:issue+label:%22{label}%22") |
|
return links |
|
|
|
label_counts['Link'] = generate_labels_link(label_counts['Labels'],label_counts['Repository']) |
|
|
|
st.dataframe( |
|
label_counts[["Labels","Repository", "count","Link"]].head(10), |
|
hide_index=True, |
|
column_config={ |
|
"Labels": st.column_config.TextColumn("Labels"), |
|
"count": st.column_config.NumberColumn("Count"), |
|
"Link": st.column_config.LinkColumn("Link", display_text="π") |
|
} |
|
) |
|
|
|
|
|
with col2: |
|
st.subheader("Cloud of words βοΈ") |
|
titles = " ".join(open_issues["Issue"]) |
|
titles = re.sub(r'\[.*?\]', '', titles) |
|
wordcloud = WordCloud(width=800, height=400, background_color="black").generate(titles) |
|
plt.figure(figsize=(10, 5)) |
|
plt.imshow(wordcloud, interpolation="bilinear") |
|
plt.axis("off") |
|
st.pyplot(plt, use_container_width=True) |
|
|
|
|
|
st.header("Community engagement") |
|
|
|
|
|
st.subheader("Top engaging issues π¬") |
|
engagement_df = open_issues[["Issue","Repository","Reactions","Comments","URL"]].sort_values(by=["Reactions", "Comments"], ascending=False).head(10) |
|
st.dataframe( |
|
engagement_df, |
|
hide_index=True, |
|
|
|
column_config={ |
|
"Issue": st.column_config.TextColumn("Issue", width="large"), |
|
"Reactions": st.column_config.NumberColumn("Reactions", format="%d π", width="small"), |
|
"Comments": st.column_config.NumberColumn("Comments", format="%d π¬", width="small"), |
|
"URL": st.column_config.LinkColumn("π", display_text="π", width="small") |
|
} |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
status.update(label="Checking for updated data...", state="running") |
|
updated_data = fetch_data() |
|
if df.equals(updated_data): |
|
status.update(label="Data is up to date!", state="complete") |
|
else: |
|
save_data(updated_data) |
|
status.update(label="Refresh for updated data!", state="complete") |
|
|