rosacastillo commited on
Commit
5698f53
·
1 Parent(s): df9801c

adding invalid markets graphs

Browse files
app.py CHANGED
@@ -25,6 +25,14 @@ from tabs.tool_accuracy import (
25
  plot_tools_accuracy_graph,
26
  plot_tools_weighted_accuracy_graph,
27
  )
 
 
 
 
 
 
 
 
28
  from tabs.error import (
29
  get_error_data,
30
  get_error_data_overall,
@@ -86,11 +94,18 @@ def get_last_one_month_data():
86
 
87
  def get_all_data():
88
  """
89
- Get all data from the tools.parquet, tools_accuracy and all_trades_profitability.parquet files
90
  """
91
  logger.info("Getting all data")
92
  con = duckdb.connect(":memory:")
93
 
 
 
 
 
 
 
 
94
  # Query to fetch tools accuracy data
95
  query3 = f"""
96
  SELECT *
@@ -115,14 +130,14 @@ def get_all_data():
115
 
116
  con.close()
117
 
118
- return df1, df2, df3
119
 
120
 
121
  def prepare_data():
122
  """
123
  Prepare the data for the dashboard
124
  """
125
- tools_df, trades_df, tools_accuracy_info = get_all_data()
126
 
127
  tools_df["request_time"] = pd.to_datetime(tools_df["request_time"])
128
  trades_df["creation_timestamp"] = pd.to_datetime(trades_df["creation_timestamp"])
@@ -133,10 +148,15 @@ def prepare_data():
133
  print("weighted accuracy info")
134
  print(tools_accuracy_info.head())
135
 
136
- return tools_df, trades_df, tools_accuracy_info
 
 
 
 
 
137
 
138
 
139
- tools_df, trades_df, tools_accuracy_info = prepare_data()
140
 
141
 
142
  demo = gr.Blocks()
@@ -279,6 +299,27 @@ with demo:
279
  with gr.Row():
280
  plot_tools_weighted_accuracy_graph(tools_accuracy_info)
281
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
  with gr.TabItem("🏥 Tool Error Dashboard"):
283
  with gr.Row():
284
  gr.Markdown("# All tools errors")
 
25
  plot_tools_accuracy_graph,
26
  plot_tools_weighted_accuracy_graph,
27
  )
28
+
29
+ from tabs.invalid_markets import (
30
+ plot_daily_dist_invalid_trades,
31
+ plot_ratio_invalid_trades_per_market,
32
+ plot_top_invalid_markets,
33
+ plot_daily_nr_invalid_markets,
34
+ )
35
+
36
  from tabs.error import (
37
  get_error_data,
38
  get_error_data_overall,
 
94
 
95
  def get_all_data():
96
  """
97
+ Get all data from the tools.parquet, tools_accuracy and trades parquet files
98
  """
99
  logger.info("Getting all data")
100
  con = duckdb.connect(":memory:")
101
 
102
+ # Query to fetch invalid trades data
103
+ query4 = f"""
104
+ SELECT *
105
+ FROM read_parquet('./data/invalid_trades.parquet')
106
+ """
107
+ df4 = con.execute(query4).fetchdf()
108
+
109
  # Query to fetch tools accuracy data
110
  query3 = f"""
111
  SELECT *
 
130
 
131
  con.close()
132
 
133
+ return df1, df2, df3, df4
134
 
135
 
136
  def prepare_data():
137
  """
138
  Prepare the data for the dashboard
139
  """
140
+ tools_df, trades_df, tools_accuracy_info, invalid_trades = get_all_data()
141
 
142
  tools_df["request_time"] = pd.to_datetime(tools_df["request_time"])
143
  trades_df["creation_timestamp"] = pd.to_datetime(trades_df["creation_timestamp"])
 
148
  print("weighted accuracy info")
149
  print(tools_accuracy_info.head())
150
 
151
+ invalid_trades["creation_timestamp"] = pd.to_datetime(
152
+ invalid_trades["creation_timestamp"]
153
+ )
154
+ invalid_trades["creation_date"] = invalid_trades["creation_timestamp"].dt.date
155
+
156
+ return tools_df, trades_df, tools_accuracy_info, invalid_trades
157
 
158
 
159
+ tools_df, trades_df, tools_accuracy_info, invalid_trades = prepare_data()
160
 
161
 
162
  demo = gr.Blocks()
 
299
  with gr.Row():
300
  plot_tools_weighted_accuracy_graph(tools_accuracy_info)
301
 
302
+ with gr.TabItem("⛔ Invalid Markets Dashboard"):
303
+ with gr.Row():
304
+ gr.Markdown("# Daily distribution of invalid trades")
305
+ with gr.Row():
306
+ plot_daily_dist_invalid_trades(invalid_trades)
307
+
308
+ with gr.Row():
309
+ gr.Markdown("# Ratio of invalid trades per market")
310
+ with gr.Row():
311
+ plot_ratio_invalid_trades_per_market(invalid_trades)
312
+
313
+ with gr.Row():
314
+ gr.Markdown("# Top markets with invalid trades")
315
+ with gr.Row():
316
+ plot_top_invalid_markets(invalid_trades)
317
+
318
+ with gr.Row():
319
+ gr.Markdown("# Daily distribution of invalid markets")
320
+ with gr.Row():
321
+ plot_daily_nr_invalid_markets(invalid_trades)
322
+
323
  with gr.TabItem("🏥 Tool Error Dashboard"):
324
  with gr.Row():
325
  gr.Markdown("# All tools errors")
data/fpmmTrades.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1bdad279707286ea22f963e56e3aa9ba38341cbc5e5bce6f2c9384c95b949bf6
3
- size 5618058
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33d2a861848b7f5cd5ccd562355a16cc1ac6ee7ed41ae910d3e837290356b89c
3
+ size 1372727
notebooks/invalid_markets.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
tabs/invalid_markets.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import gradio as gr
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+
6
+
7
+ def plot_daily_dist_invalid_trades(invalid_trades: pd.DataFrame):
8
+ """Function to paint the distribution of daily invalid trades, no matter which market"""
9
+ plot = sns.histplot(data=invalid_trades, x="creation_date", kde=True)
10
+ plt.xticks(rotation=45, ha="right")
11
+ plt.xlabel("Creation date")
12
+ plt.ylabel("Daily number of invalid trades")
13
+ plt.title("Distribution of daily invalid trades over time")
14
+ return gr.Plot(value=plot.get_figure())
15
+
16
+
17
+ def plot_daily_nr_invalid_markets(invalid_trades: pd.DataFrame):
18
+ """Function to paint the number of invalid markets over time"""
19
+ daily_invalid_markets = (
20
+ invalid_trades.groupby("creation_date")
21
+ .agg(trades_count=("title", "count"), nr_markets=("title", "nunique"))
22
+ .reset_index()
23
+ )
24
+ sns.set_theme(palette="viridis")
25
+ plot = sns.lineplot(data=daily_invalid_markets, x="creation_date", y="nr_markets")
26
+ plt.xticks(rotation=45, ha="right")
27
+ plt.xlabel("Creation date")
28
+ plt.ylabel("Daily number of invalid markets")
29
+ plt.title("Evolution of daily invalid markets over time")
30
+ return gr.Plot(value=plot.get_figure())
31
+
32
+
33
+ def plot_ratio_invalid_trades_per_market(invalid_trades: pd.DataFrame):
34
+ """Function to paint the number of invalid trades that the same market accummulates"""
35
+ cat = invalid_trades["title"]
36
+ codes, uniques = pd.factorize(cat)
37
+
38
+ # add the IDs as a new column to the original dataframe
39
+ invalid_trades["title_id"] = codes
40
+ plot = sns.displot(invalid_trades, x="title_id")
41
+ plt.xlabel("market id")
42
+ plt.ylabel("Total number of invalid trades by market")
43
+ plt.title("Distribution of invalid trades per market")
44
+ return gr.Plot(value=plot.get_figure())
45
+
46
+
47
+ def plot_top_invalid_markets(invalid_trades: pd.DataFrame):
48
+ """Function to paint the top markets with the highest number of invalid trades"""
49
+ top_invalid_markets = invalid_trades.title.value_counts().reset_index()
50
+ top_invalid_markets.rename(columns={"count": "nr_invalid_trades"}, inplace=True)
51
+ plt.figure(figsize=(25, 10))
52
+ plot = sns.barplot(
53
+ top_invalid_markets,
54
+ x="nr_invalid_trades",
55
+ y="title",
56
+ hue="title",
57
+ dodge=False,
58
+ )
59
+ return gr.Plot(value=plot.get_figure())