Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -7,6 +7,7 @@ from db_operations.db_operations import DBOperations
|
|
7 |
import logging
|
8 |
import traceback
|
9 |
import redis
|
|
|
10 |
from datetime import datetime
|
11 |
from functools import lru_cache
|
12 |
from word_cloud import get_frequent_words_html
|
@@ -16,8 +17,10 @@ from config import NEWS_RETENTION_SECONDS, UK_EDITION_URL
|
|
16 |
app = Flask(__name__)
|
17 |
CORS(app)
|
18 |
redis_client = redis.Redis(host='localhost', port=6379, decode_responses=True)
|
19 |
-
logging.
|
|
|
20 |
db = DBOperations()
|
|
|
21 |
|
22 |
|
23 |
REFRESH_FREQ = 300 # 300 secs = 5 mins
|
@@ -25,7 +28,7 @@ REFRESH_FREQ = 300 # 300 secs = 5 mins
|
|
25 |
def is_db_fetch_reqd():
|
26 |
try:
|
27 |
env_news_time = redis_client.get('NEWSFETCHTIME')
|
28 |
-
logging.warning(f'fetch_time_env_var: {env_news_time}')
|
29 |
fetch_flag = 1
|
30 |
if env_news_time is None:
|
31 |
redis_client.set("NEWSFETCHTIME", str(datetime.now()))
|
@@ -46,7 +49,7 @@ def is_db_fetch_reqd():
|
|
46 |
|
47 |
def correct_date(x):
|
48 |
if (not isinstance(x, str)) or (str(x).find(":") == -1):
|
49 |
-
logging.warning(f'correct_date() error: {x} is not the right date format')
|
50 |
return "2020-11-07 00:36:44+05:30"
|
51 |
return x
|
52 |
|
@@ -59,7 +62,7 @@ def date_time_parser(dt):
|
|
59 |
try:
|
60 |
return int(np.round((dt.now(dt.tz) - dt).total_seconds() / 60, 0))
|
61 |
except:
|
62 |
-
logging.warning(f'date_time_parser() error: {dt} is not the right date format')
|
63 |
return 100000
|
64 |
|
65 |
|
@@ -104,11 +107,11 @@ def elapsed_time_str(mins):
|
|
104 |
|
105 |
def fetch_from_db(fetch_flag):
|
106 |
try:
|
107 |
-
logging.warning(f'fetch_flag: {fetch_flag}')
|
108 |
if fetch_flag == 1:
|
109 |
final_df = db.read_news_from_db()
|
110 |
freq_tokens = get_frequent_words_html(final_df)
|
111 |
-
logging.warning('Fetched From DB\n\n')
|
112 |
|
113 |
final_df['_id'] = final_df['_id'].astype('str')
|
114 |
|
@@ -117,7 +120,7 @@ def fetch_from_db(fetch_flag):
|
|
117 |
else:
|
118 |
final_df = pd.read_json(redis_client.get("NEWSDF"))
|
119 |
freq_tokens = redis_client.get("NEWSWORDCLOUD")
|
120 |
-
logging.warning('Fetched From Cache\n\n')
|
121 |
|
122 |
except Exception as e:
|
123 |
print(e)
|
@@ -134,6 +137,8 @@ def index():
|
|
134 |
Entry point
|
135 |
"""
|
136 |
try:
|
|
|
|
|
137 |
src_str = ''
|
138 |
status_code = 200
|
139 |
final_df, freq_tokens = fetch_from_db(is_db_fetch_reqd())
|
@@ -157,7 +162,7 @@ def index():
|
|
157 |
except Exception as e:
|
158 |
final_df = pd.DataFrame({'title': '', 'url': '',
|
159 |
'description': '', 'src_time': ''}, index=[0])
|
160 |
-
logging.warning(traceback.print_exc())
|
161 |
|
162 |
result_str = f'''
|
163 |
<div class="box" id="main">
|
|
|
7 |
import logging
|
8 |
import traceback
|
9 |
import redis
|
10 |
+
import uuid
|
11 |
from datetime import datetime
|
12 |
from functools import lru_cache
|
13 |
from word_cloud import get_frequent_words_html
|
|
|
17 |
app = Flask(__name__)
|
18 |
CORS(app)
|
19 |
redis_client = redis.Redis(host='localhost', port=6379, decode_responses=True)
|
20 |
+
logging.basicConfig(format='%(asctime)s %(message)s')
|
21 |
+
logging.warning(f'[session_id: {session_id}] Is Redis available?: {redis_client.ping()}')
|
22 |
db = DBOperations()
|
23 |
+
session_id = None
|
24 |
|
25 |
|
26 |
REFRESH_FREQ = 300 # 300 secs = 5 mins
|
|
|
28 |
def is_db_fetch_reqd():
|
29 |
try:
|
30 |
env_news_time = redis_client.get('NEWSFETCHTIME')
|
31 |
+
logging.warning(f'[session_id: {session_id}] fetch_time_env_var: {env_news_time}')
|
32 |
fetch_flag = 1
|
33 |
if env_news_time is None:
|
34 |
redis_client.set("NEWSFETCHTIME", str(datetime.now()))
|
|
|
49 |
|
50 |
def correct_date(x):
|
51 |
if (not isinstance(x, str)) or (str(x).find(":") == -1):
|
52 |
+
logging.warning(f'[session_id: {session_id}] correct_date() error: {x} is not the right date format')
|
53 |
return "2020-11-07 00:36:44+05:30"
|
54 |
return x
|
55 |
|
|
|
62 |
try:
|
63 |
return int(np.round((dt.now(dt.tz) - dt).total_seconds() / 60, 0))
|
64 |
except:
|
65 |
+
logging.warning(f'[session_id: {session_id}] date_time_parser() error: {dt} is not the right date format')
|
66 |
return 100000
|
67 |
|
68 |
|
|
|
107 |
|
108 |
def fetch_from_db(fetch_flag):
|
109 |
try:
|
110 |
+
logging.warning(f'[session_id: {session_id}] fetch_flag: {fetch_flag}')
|
111 |
if fetch_flag == 1:
|
112 |
final_df = db.read_news_from_db()
|
113 |
freq_tokens = get_frequent_words_html(final_df)
|
114 |
+
logging.warning(f'[session_id: {session_id}] Fetched From DB\n\n')
|
115 |
|
116 |
final_df['_id'] = final_df['_id'].astype('str')
|
117 |
|
|
|
120 |
else:
|
121 |
final_df = pd.read_json(redis_client.get("NEWSDF"))
|
122 |
freq_tokens = redis_client.get("NEWSWORDCLOUD")
|
123 |
+
logging.warning(f'[session_id: {session_id}] Fetched From Cache\n\n')
|
124 |
|
125 |
except Exception as e:
|
126 |
print(e)
|
|
|
137 |
Entry point
|
138 |
"""
|
139 |
try:
|
140 |
+
global session_id
|
141 |
+
session_id = uuid.uuid4().hex
|
142 |
src_str = ''
|
143 |
status_code = 200
|
144 |
final_df, freq_tokens = fetch_from_db(is_db_fetch_reqd())
|
|
|
162 |
except Exception as e:
|
163 |
final_df = pd.DataFrame({'title': '', 'url': '',
|
164 |
'description': '', 'src_time': ''}, index=[0])
|
165 |
+
logging.warning(f'[session_id: {session_id}] {traceback.print_exc()}')
|
166 |
|
167 |
result_str = f'''
|
168 |
<div class="box" id="main">
|