Stefan commited on
Commit
1119c6c
·
1 Parent(s): 6937a88

Last Update

Browse files
Files changed (2) hide show
  1. app/data-formatted.csv +0 -0
  2. app/routes.py +69 -5
app/data-formatted.csv CHANGED
The diff for this file is too large to render. See raw diff
 
app/routes.py CHANGED
@@ -5,11 +5,70 @@ import numpy as np
5
  import requests
6
  from bs4 import BeautifulSoup
7
  import os
 
8
 
 
9
  router = APIRouter()
10
- data = pd.read_csv('app/data-formatted.csv')
11
  HF_API_KEY = os.getenv("HF_API_KEY")
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  def getBerzaNews(symbol):
15
  url = f'https://www.mse.mk/en/symbol/{symbol}'
@@ -34,7 +93,7 @@ def getBerzaNews(symbol):
34
  return news
35
 
36
 
37
-
38
  def analyzeSentiment(symbol):
39
  API_URL = "https://api-inference.huggingface.co/models/mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis"
40
  headers = {"Authorization": f"Bearer {HF_API_KEY}"}
@@ -110,6 +169,9 @@ def predict_future_price(input_data):
110
  # Function to resample data for timeframes
111
  def resample_data(data, timeframe):
112
  data["DATE"] = pd.to_datetime(data["DATE"]) # Ensure DATE is in datetime format
 
 
 
113
  data = data.set_index("DATE") # Set DATE as the index
114
 
115
  # Select only numeric columns for resampling
@@ -125,7 +187,9 @@ def resample_data(data, timeframe):
125
  else:
126
  raise ValueError("Invalid timeframe. Choose '1D', '1W', or '1M'.")
127
 
128
- # Reset the index to bring DATE back as a column
 
 
129
  resampled_data = resampled_data.reset_index()
130
 
131
  # Reattach non-numeric columns (e.g., COMPANY)
@@ -182,8 +246,8 @@ def calculate_technical_indicators(data, column="PRICE OF LAST TRANSACTION"):
182
  @router.get("/stock-data/{ticker}")
183
  async def get_stock_data(ticker: str):
184
  print(f"Fetching data for ticker: {ticker}")
185
-
186
- stock_data = data[data["COMPANY"] == ticker]
187
  if stock_data.empty:
188
  print("No data found for the given ticker.")
189
  return {"error": "Ticker not found"}
 
5
  import requests
6
  from bs4 import BeautifulSoup
7
  import os
8
+ from dotenv import load_dotenv
9
 
10
+ load_dotenv()
11
  router = APIRouter()
 
12
  HF_API_KEY = os.getenv("HF_API_KEY")
13
 
14
+ from bs4 import BeautifulSoup
15
+ import requests
16
+ import csv
17
+ import pandas as pd
18
+
19
+
20
+ def getLatestStatistics(symbol):
21
+ all_company_data = []
22
+ file_path = 'output_file_update.csv'
23
+
24
+ url = f'https://www.mse.mk/mk/stats/symbolhistory/{symbol}'
25
+ response = requests.get(url)
26
+ content = BeautifulSoup(response.text, 'html.parser')
27
+ table_of_data = content.select("#resultsTable tbody tr")
28
+
29
+ for row in table_of_data:
30
+ data = row.text.split('\n')
31
+ data.pop()
32
+ data.remove('')
33
+ if data.__contains__(''):
34
+ continue
35
+ data.insert(0, symbol)
36
+ all_company_data.append(data)
37
+
38
+ dataframe = pd.DataFrame(all_company_data, columns=['COMPANY', 'DATE', 'PRICE OF LAST TRANSACTION', 'MAX', 'MIN', 'AVERAGE PRICE', '% PERCENT', 'QUANTITY', 'REVENUE IN BEST DENARS', 'TOTAL REVENUE IN DENARS'])
39
+
40
+ def string_to_float(column):
41
+ if column == 'DATE':
42
+ return dataframe[column]
43
+ if column == 'TOTAL REVENUE IN DENARS':
44
+ dataframe[column] = dataframe[column].str.replace('.', '').astype(float)
45
+ return dataframe[column]
46
+ if column == 'COMPANY':
47
+ return dataframe[column]
48
+ try:
49
+ # Replace commas with dots, then remove extra dots and convert to float
50
+ dataframe[column] = dataframe[column].str.replace(',', '.').str.replace('.', '', 1)
51
+ dataframe[column] = dataframe[column].astype(float)
52
+ except Exception as e:
53
+ print(f"Error processing column {column}: {e}")
54
+ return dataframe[column]
55
+
56
+ # Apply the transformation to each column
57
+ for col in dataframe.columns:
58
+ dataframe[col] = string_to_float(col)
59
+
60
+ dataframe['DATE'] = pd.to_datetime(dataframe['DATE'])
61
+
62
+ current_data = pd.read_csv('app/data-formatted.csv')
63
+ current_data['DATE'] = pd.to_datetime(current_data['DATE'])
64
+
65
+ merged_df = pd.concat([current_data, dataframe], ignore_index=True)
66
+ updated_df = merged_df.drop_duplicates(keep='first')
67
+
68
+ updated_df.to_csv('app/data-formatted.csv', index=False)
69
+
70
+ return updated_df
71
+
72
 
73
  def getBerzaNews(symbol):
74
  url = f'https://www.mse.mk/en/symbol/{symbol}'
 
93
  return news
94
 
95
 
96
+ # Load a multilingual model
97
  def analyzeSentiment(symbol):
98
  API_URL = "https://api-inference.huggingface.co/models/mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis"
99
  headers = {"Authorization": f"Bearer {HF_API_KEY}"}
 
169
  # Function to resample data for timeframes
170
  def resample_data(data, timeframe):
171
  data["DATE"] = pd.to_datetime(data["DATE"]) # Ensure DATE is in datetime format
172
+
173
+ data = data.drop_duplicates(subset="DATE", keep="first") # Drop duplicate dates
174
+
175
  data = data.set_index("DATE") # Set DATE as the index
176
 
177
  # Select only numeric columns for resampling
 
187
  else:
188
  raise ValueError("Invalid timeframe. Choose '1D', '1W', or '1M'.")
189
 
190
+ print(f"Resampled data for {timeframe} timeframe:")
191
+ print(resampled_data)
192
+
193
  resampled_data = resampled_data.reset_index()
194
 
195
  # Reattach non-numeric columns (e.g., COMPANY)
 
246
  @router.get("/stock-data/{ticker}")
247
  async def get_stock_data(ticker: str):
248
  print(f"Fetching data for ticker: {ticker}")
249
+ latest_data = getLatestStatistics(ticker)
250
+ stock_data = latest_data[latest_data["COMPANY"] == ticker]
251
  if stock_data.empty:
252
  print("No data found for the given ticker.")
253
  return {"error": "Ticker not found"}