File size: 7,863 Bytes
1e08812 0dca33a fe06679 99e4eea 0b2c5d2 1e08812 b1dec7c 9b1266b 0dca33a 58bba8c b1dec7c 0dca33a b1dec7c 9b1266b b1dec7c 0dca33a 086fa02 b1dec7c 086fa02 58bba8c b1dec7c 9b1266b 086fa02 0b2c5d2 b1dec7c ef22b70 b1dec7c ef22b70 e8df381 b1dec7c 99e4eea b1dec7c 99e4eea b1dec7c e8df381 99e4eea ef22b70 b1dec7c 0b2c5d2 58bba8c 086fa02 0b2c5d2 086fa02 0b2c5d2 cc871a7 842bf87 b410ebc 58bba8c cc871a7 58bba8c cc871a7 b1dec7c 0b2c5d2 58bba8c 086fa02 0b2c5d2 086fa02 0b2c5d2 ef22b70 842bf87 ef22b70 58bba8c ef22b70 58bba8c ef22b70 b410ebc cc871a7 b410ebc 9b1266b b410ebc 9b1266b b410ebc 9b1266b b410ebc 9b1266b b410ebc 9b1266b b410ebc 842bf87 9b1266b b410ebc 842bf87 b410ebc fe06679 99e4eea 58bba8c fe06679 58bba8c b1dec7c b410ebc 842bf87 b1dec7c 58bba8c b1dec7c 58bba8c b1dec7c 086fa02 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 |
import streamlit as st
import pandas as pd
from github import Github
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import re
import datetime
g = Github(st.secrets["ACCESS_TOKEN"])
repos = st.secrets["REPO_NAME"].split(",")
repos = [g.get_repo(repo) for repo in repos]
@st.cache_data
def fetch_data():
issues_data = []
for repo in repos:
issues = repo.get_issues(state="all")
for issue in issues:
issues_data.append(
{
'Issue': f"{issue.number} - {issue.title}",
'State': issue.state,
'Created at': issue.created_at,
'Closed at': issue.closed_at,
'Last update': issue.updated_at,
'Labels': [label.name for label in issue.labels],
'Reactions': issue.reactions['total_count'],
'Comments': issue.comments,
'URL': issue.html_url,
'Repository': repo.name,
}
)
return pd.DataFrame(issues_data)
# def save_data(df):
# df.to_json("issues.json", orient="records", indent=4, index=False)
# @st.cache_data
# def load_data():
# try:
# df = pd.read_json("issues.json", convert_dates=["Created at", "Closed at", "Last update"], date_unit="ms")
# except:
# df = fetch_data()
# save_data(df)
# return df
st.title(f"GitHub Issues Dashboard")
with st.status(label="Loading data...", state="running") as status:
df = fetch_data()
status.update(label="Data loaded!", state="complete")
today = datetime.date.today()
# Section 1: Issue activity metrics
st.header("Issue activity metrics")
col1, col2, col3 = st.columns(3)
state_counts = df['State'].value_counts()
open_issues = df.loc[df['State'] == 'open']
closed_issues = df.loc[df['State'] == 'closed']
closed_issues['Time to Close'] = closed_issues['Closed at'] - closed_issues['Created at']
with col1:
st.metric(label="Open issues", value=state_counts['open'])
with col2:
st.metric(label="Closed issues", value=state_counts['closed'])
with col3:
average_time_to_close = closed_issues['Time to Close'].mean().days
st.metric(label="Avg. days to close", value=average_time_to_close)
# TODO Plot: number of open vs closed issues by date
# st.subheader("Latest bugs π")
# bug_issues = open_issues[open_issues["Labels"].apply(lambda labels: "type: bug" in labels)]
# bug_issues = bug_issues[["Issue","Labels","Created at","URL"]]
# st.dataframe(
# bug_issues.sort_values(by="Created at", ascending=False),
# hide_index=True,
# column_config={
# "Issue": st.column_config.TextColumn("Issue", width=400),
# "Labels": st.column_config.TextColumn("Labels"),
# "Created at": st.column_config.DatetimeColumn("Created at"),
# "URL": st.column_config.LinkColumn("π", display_text="π")
# }
# )
st.subheader("Latest updates π")
col1, col2 = st.columns(2)
with col1:
last_update_date = st.date_input("Last updated after:", value=today - datetime.timedelta(days=7), format="DD-MM-YYYY")
last_update_date = datetime.datetime.combine(last_update_date, datetime.datetime.min.time())
with col2:
updated_issues = open_issues[pd.to_datetime(open_issues["Last update"]).dt.tz_localize(None) > pd.to_datetime(last_update_date)]
st.metric("Results:", updated_issues.shape[0])
st.dataframe(
updated_issues[["URL","Issue","Labels", "Repository", "Last update"]].sort_values(by="Last update", ascending=False),
hide_index=True,
# use_container_width=True,
column_config={
"Issue": st.column_config.TextColumn("Issue", width="large"),
"Labels": st.column_config.ListColumn("Labels", width="large"),
"Last update": st.column_config.DatetimeColumn("Last update", width="medium"),
"URL": st.column_config.LinkColumn("π", display_text="π", width="small")
}
)
st.subheader("Stale issues? πΈοΈ")
col1, col2 = st.columns(2)
with col1:
not_updated_since = st.date_input("Not updated since:", value=today - datetime.timedelta(days=90), format="DD-MM-YYYY")
not_updated_since = datetime.datetime.combine(not_updated_since, datetime.datetime.min.time())
with col2:
stale_issues = open_issues[pd.to_datetime(open_issues["Last update"]).dt.tz_localize(None) < pd.to_datetime(not_updated_since)]
st.metric("Results:", stale_issues.shape[0])
st.dataframe(
stale_issues[["URL","Issue","Labels", "Repository", "Last update"]].sort_values(by="Last update", ascending=True),
hide_index=True,
# use_container_width=True,
column_config={
"Issue": st.column_config.TextColumn("Issue", width="large"),
"Labels": st.column_config.ListColumn("Labels", width="large"),
"Last update": st.column_config.DatetimeColumn("Last update", width="medium"),
"URL": st.column_config.LinkColumn("π", display_text="π", width="small")
}
)
# Section 2: Issue classification
st.header("Issue classification")
col1, col2 = st.columns(2)
## Dataframe: Number of open issues by label.
with col1:
st.subheader("Top ten labels π")
label_counts = open_issues.groupby("Repository").apply(lambda x: x.explode("Labels").value_counts("Labels").to_frame().reset_index()).reset_index()
def generate_labels_link(labels,repos):
links = []
for label,repo in zip(labels,repos):
label = label.replace(" ", "+")
links.append(f"https://github.com/argilla-io/{repo}/issues?q=is:open+is:issue+label:%22{label}%22")
return links
label_counts['Link'] = generate_labels_link(label_counts['Labels'],label_counts['Repository'])
st.dataframe(
label_counts[["Link","Labels","Repository", "count",]].head(10),
hide_index=True,
column_config={
"Labels": st.column_config.TextColumn("Labels"),
"count": st.column_config.NumberColumn("Count"),
"Link": st.column_config.LinkColumn("π", display_text="π")
}
)
## Cloud of words: Issue titles
with col2:
st.subheader("Cloud of words βοΈ")
titles = " ".join(open_issues["Issue"])
titles = re.sub(r'\[.*?\]', '', titles)
wordcloud = WordCloud(width=800, height=400, background_color="black").generate(titles)
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis("off")
st.pyplot(plt, use_container_width=True)
# # Community engagement
st.header("Community engagement")
# ## Dataframe: Latest issues open by the community
# ## Dataframe: issues sorted by number of comments
st.subheader("Top engaging issues π¬")
engagement_df = open_issues[["URL","Issue","Repository","Created at", "Reactions","Comments"]].sort_values(by=["Reactions", "Comments"], ascending=False).head(10)
st.dataframe(
engagement_df,
hide_index=True,
# use_container_width=True,
column_config={
"Issue": st.column_config.TextColumn("Issue", width="large"),
"Reactions": st.column_config.NumberColumn("Reactions", format="%d π", width="small"),
"Comments": st.column_config.NumberColumn("Comments", format="%d π¬", width="small"),
"URL": st.column_config.LinkColumn("π", display_text="π", width="small")
}
)
# ## Cloud of words: Comments??
# ## Dataframe: Contributor leaderboard.
# # Issue dependencies
# st.header("Issue dependencies")
# ## Map: dependencies between issues. Network of issue mentions.x
# status.update(label="Checking for updated data...", state="running")
# updated_data = fetch_data()
# if df.equals(updated_data):
# status.update(label="Data is up to date!", state="complete")
# else:
# save_data(updated_data)
# status.update(label="Refresh for updated data!", state="complete")
|