rosacastillo
commited on
Commit
·
5698f53
1
Parent(s):
df9801c
adding invalid markets graphs
Browse files- app.py +46 -5
- data/fpmmTrades.parquet +2 -2
- notebooks/invalid_markets.ipynb +0 -0
- tabs/invalid_markets.py +59 -0
app.py
CHANGED
@@ -25,6 +25,14 @@ from tabs.tool_accuracy import (
|
|
25 |
plot_tools_accuracy_graph,
|
26 |
plot_tools_weighted_accuracy_graph,
|
27 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
from tabs.error import (
|
29 |
get_error_data,
|
30 |
get_error_data_overall,
|
@@ -86,11 +94,18 @@ def get_last_one_month_data():
|
|
86 |
|
87 |
def get_all_data():
|
88 |
"""
|
89 |
-
Get all data from the tools.parquet, tools_accuracy and
|
90 |
"""
|
91 |
logger.info("Getting all data")
|
92 |
con = duckdb.connect(":memory:")
|
93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
# Query to fetch tools accuracy data
|
95 |
query3 = f"""
|
96 |
SELECT *
|
@@ -115,14 +130,14 @@ def get_all_data():
|
|
115 |
|
116 |
con.close()
|
117 |
|
118 |
-
return df1, df2, df3
|
119 |
|
120 |
|
121 |
def prepare_data():
|
122 |
"""
|
123 |
Prepare the data for the dashboard
|
124 |
"""
|
125 |
-
tools_df, trades_df, tools_accuracy_info = get_all_data()
|
126 |
|
127 |
tools_df["request_time"] = pd.to_datetime(tools_df["request_time"])
|
128 |
trades_df["creation_timestamp"] = pd.to_datetime(trades_df["creation_timestamp"])
|
@@ -133,10 +148,15 @@ def prepare_data():
|
|
133 |
print("weighted accuracy info")
|
134 |
print(tools_accuracy_info.head())
|
135 |
|
136 |
-
|
|
|
|
|
|
|
|
|
|
|
137 |
|
138 |
|
139 |
-
tools_df, trades_df, tools_accuracy_info = prepare_data()
|
140 |
|
141 |
|
142 |
demo = gr.Blocks()
|
@@ -279,6 +299,27 @@ with demo:
|
|
279 |
with gr.Row():
|
280 |
plot_tools_weighted_accuracy_graph(tools_accuracy_info)
|
281 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
282 |
with gr.TabItem("🏥 Tool Error Dashboard"):
|
283 |
with gr.Row():
|
284 |
gr.Markdown("# All tools errors")
|
|
|
25 |
plot_tools_accuracy_graph,
|
26 |
plot_tools_weighted_accuracy_graph,
|
27 |
)
|
28 |
+
|
29 |
+
from tabs.invalid_markets import (
|
30 |
+
plot_daily_dist_invalid_trades,
|
31 |
+
plot_ratio_invalid_trades_per_market,
|
32 |
+
plot_top_invalid_markets,
|
33 |
+
plot_daily_nr_invalid_markets,
|
34 |
+
)
|
35 |
+
|
36 |
from tabs.error import (
|
37 |
get_error_data,
|
38 |
get_error_data_overall,
|
|
|
94 |
|
95 |
def get_all_data():
|
96 |
"""
|
97 |
+
Get all data from the tools.parquet, tools_accuracy and trades parquet files
|
98 |
"""
|
99 |
logger.info("Getting all data")
|
100 |
con = duckdb.connect(":memory:")
|
101 |
|
102 |
+
# Query to fetch invalid trades data
|
103 |
+
query4 = f"""
|
104 |
+
SELECT *
|
105 |
+
FROM read_parquet('./data/invalid_trades.parquet')
|
106 |
+
"""
|
107 |
+
df4 = con.execute(query4).fetchdf()
|
108 |
+
|
109 |
# Query to fetch tools accuracy data
|
110 |
query3 = f"""
|
111 |
SELECT *
|
|
|
130 |
|
131 |
con.close()
|
132 |
|
133 |
+
return df1, df2, df3, df4
|
134 |
|
135 |
|
136 |
def prepare_data():
|
137 |
"""
|
138 |
Prepare the data for the dashboard
|
139 |
"""
|
140 |
+
tools_df, trades_df, tools_accuracy_info, invalid_trades = get_all_data()
|
141 |
|
142 |
tools_df["request_time"] = pd.to_datetime(tools_df["request_time"])
|
143 |
trades_df["creation_timestamp"] = pd.to_datetime(trades_df["creation_timestamp"])
|
|
|
148 |
print("weighted accuracy info")
|
149 |
print(tools_accuracy_info.head())
|
150 |
|
151 |
+
invalid_trades["creation_timestamp"] = pd.to_datetime(
|
152 |
+
invalid_trades["creation_timestamp"]
|
153 |
+
)
|
154 |
+
invalid_trades["creation_date"] = invalid_trades["creation_timestamp"].dt.date
|
155 |
+
|
156 |
+
return tools_df, trades_df, tools_accuracy_info, invalid_trades
|
157 |
|
158 |
|
159 |
+
tools_df, trades_df, tools_accuracy_info, invalid_trades = prepare_data()
|
160 |
|
161 |
|
162 |
demo = gr.Blocks()
|
|
|
299 |
with gr.Row():
|
300 |
plot_tools_weighted_accuracy_graph(tools_accuracy_info)
|
301 |
|
302 |
+
with gr.TabItem("⛔ Invalid Markets Dashboard"):
|
303 |
+
with gr.Row():
|
304 |
+
gr.Markdown("# Daily distribution of invalid trades")
|
305 |
+
with gr.Row():
|
306 |
+
plot_daily_dist_invalid_trades(invalid_trades)
|
307 |
+
|
308 |
+
with gr.Row():
|
309 |
+
gr.Markdown("# Ratio of invalid trades per market")
|
310 |
+
with gr.Row():
|
311 |
+
plot_ratio_invalid_trades_per_market(invalid_trades)
|
312 |
+
|
313 |
+
with gr.Row():
|
314 |
+
gr.Markdown("# Top markets with invalid trades")
|
315 |
+
with gr.Row():
|
316 |
+
plot_top_invalid_markets(invalid_trades)
|
317 |
+
|
318 |
+
with gr.Row():
|
319 |
+
gr.Markdown("# Daily distribution of invalid markets")
|
320 |
+
with gr.Row():
|
321 |
+
plot_daily_nr_invalid_markets(invalid_trades)
|
322 |
+
|
323 |
with gr.TabItem("🏥 Tool Error Dashboard"):
|
324 |
with gr.Row():
|
325 |
gr.Markdown("# All tools errors")
|
data/fpmmTrades.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33d2a861848b7f5cd5ccd562355a16cc1ac6ee7ed41ae910d3e837290356b89c
|
3 |
+
size 1372727
|
notebooks/invalid_markets.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tabs/invalid_markets.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import gradio as gr
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
+
import seaborn as sns
|
5 |
+
|
6 |
+
|
7 |
+
def plot_daily_dist_invalid_trades(invalid_trades: pd.DataFrame):
|
8 |
+
"""Function to paint the distribution of daily invalid trades, no matter which market"""
|
9 |
+
plot = sns.histplot(data=invalid_trades, x="creation_date", kde=True)
|
10 |
+
plt.xticks(rotation=45, ha="right")
|
11 |
+
plt.xlabel("Creation date")
|
12 |
+
plt.ylabel("Daily number of invalid trades")
|
13 |
+
plt.title("Distribution of daily invalid trades over time")
|
14 |
+
return gr.Plot(value=plot.get_figure())
|
15 |
+
|
16 |
+
|
17 |
+
def plot_daily_nr_invalid_markets(invalid_trades: pd.DataFrame):
|
18 |
+
"""Function to paint the number of invalid markets over time"""
|
19 |
+
daily_invalid_markets = (
|
20 |
+
invalid_trades.groupby("creation_date")
|
21 |
+
.agg(trades_count=("title", "count"), nr_markets=("title", "nunique"))
|
22 |
+
.reset_index()
|
23 |
+
)
|
24 |
+
sns.set_theme(palette="viridis")
|
25 |
+
plot = sns.lineplot(data=daily_invalid_markets, x="creation_date", y="nr_markets")
|
26 |
+
plt.xticks(rotation=45, ha="right")
|
27 |
+
plt.xlabel("Creation date")
|
28 |
+
plt.ylabel("Daily number of invalid markets")
|
29 |
+
plt.title("Evolution of daily invalid markets over time")
|
30 |
+
return gr.Plot(value=plot.get_figure())
|
31 |
+
|
32 |
+
|
33 |
+
def plot_ratio_invalid_trades_per_market(invalid_trades: pd.DataFrame):
|
34 |
+
"""Function to paint the number of invalid trades that the same market accummulates"""
|
35 |
+
cat = invalid_trades["title"]
|
36 |
+
codes, uniques = pd.factorize(cat)
|
37 |
+
|
38 |
+
# add the IDs as a new column to the original dataframe
|
39 |
+
invalid_trades["title_id"] = codes
|
40 |
+
plot = sns.displot(invalid_trades, x="title_id")
|
41 |
+
plt.xlabel("market id")
|
42 |
+
plt.ylabel("Total number of invalid trades by market")
|
43 |
+
plt.title("Distribution of invalid trades per market")
|
44 |
+
return gr.Plot(value=plot.get_figure())
|
45 |
+
|
46 |
+
|
47 |
+
def plot_top_invalid_markets(invalid_trades: pd.DataFrame):
|
48 |
+
"""Function to paint the top markets with the highest number of invalid trades"""
|
49 |
+
top_invalid_markets = invalid_trades.title.value_counts().reset_index()
|
50 |
+
top_invalid_markets.rename(columns={"count": "nr_invalid_trades"}, inplace=True)
|
51 |
+
plt.figure(figsize=(25, 10))
|
52 |
+
plot = sns.barplot(
|
53 |
+
top_invalid_markets,
|
54 |
+
x="nr_invalid_trades",
|
55 |
+
y="title",
|
56 |
+
hue="title",
|
57 |
+
dodge=False,
|
58 |
+
)
|
59 |
+
return gr.Plot(value=plot.get_figure())
|