Spaces:

valory
/

olas-prediction-live-dashboard

Running

arshy commited on May 25, 2024

Commit

b7308a9

1 Parent(s): b7433f5

chunk based

Files changed (1) hide show

app.py CHANGED Viewed

@@ -42,24 +42,48 @@ logger = get_logger()
 def get_last_one_month_data():
     con = duckdb.connect(':memory:')
-    one_month_ago = (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d')
-    query1 = f"""
-    SELECT *
-    FROM read_parquet('./data/tools.parquet')
-    WHERE request_time >= '{one_month_ago}'
-    """
     query2 = f"""
     SELECT *
     FROM read_parquet('./data/all_trades_profitability.parquet')
-    WHERE creation_timestamp >= '{one_month_ago}'
     """
-    logger.info("Query 1: " + query1)
     logger.info("Query 2: " + query2)
     df2 = con.execute(query2).fetchdf()
     logger.info("here2")
-    df1 = con.execute(query1).fetchdf()
     logger.info("here3")
     con.close()
     return df1, df2

 def get_last_one_month_data():
     con = duckdb.connect(':memory:')
+    # one_month_ago = (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d')
+    # Query to fetch data from all_trades_profitability.parquet
     query2 = f"""
     SELECT *
     FROM read_parquet('./data/all_trades_profitability.parquet')
     """
     logger.info("Query 2: " + query2)
     df2 = con.execute(query2).fetchdf()
     logger.info("here2")
+    # Reading tools.parquet in chunks
+    parquet_file_path = './data/tools.parquet'
+    chunk_size = 100000  # Adjust chunk size based on your memory capacity
+    offset = 0
+    all_filtered_data = []
+    while True:
+        # Read a chunk of data
+        query1 = f"""
+        SELECT *
+        FROM read_parquet('{parquet_file_path}', OFFSET {offset}, ROWS {chunk_size})
+        """
+        logger.info("Query 1: " + query1)
+        chunk_df = con.execute(query1).fetchdf()
+        # Break if the chunk is empty (end of file)
+        if chunk_df.empty:
+            break
+        # Collect the filtered chunk
+        all_filtered_data.append(chunk_df)
+        # Update offset
+        offset += chunk_size
+    # Concatenate all filtered chunks into a single DataFrame
+    df1 = pd.concat(all_filtered_data, ignore_index=True)
     logger.info("here3")
     con.close()
     return df1, df2