poemsforaphrodite commited on
Commit
3d7a954
1 Parent(s): 79695fc

Upload 2 files

Browse files
Files changed (2) hide show
  1. main.py +522 -0
  2. requirements.txt +10 -0
main.py ADDED
@@ -0,0 +1,522 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Standard library imports
2
+ import datetime
3
+ import base64
4
+
5
+ # Related third-party imports
6
+ import streamlit as st
7
+ from streamlit_elements import elements
8
+ from google_auth_oauthlib.flow import Flow
9
+ from googleapiclient.discovery import build
10
+ from dotenv import load_dotenv
11
+ import pandas as pd
12
+ import searchconsole
13
+ import os
14
+ import cohere
15
+ from sklearn.metrics.pairwise import cosine_similarity
16
+ import requests
17
+ from bs4 import BeautifulSoup
18
+
19
+ load_dotenv()
20
+ # Initialize Cohere client
21
+ COHERE_API_KEY = os.environ["COHERE_API_KEY"]
22
+ co = cohere.Client(COHERE_API_KEY)
23
+
24
+ # Configuration: Set to True if running locally, False if running on Streamlit Cloud
25
+ IS_LOCAL = False
26
+
27
+ # Constants
28
+ SEARCH_TYPES = ["web", "image", "video", "news", "discover", "googleNews"]
29
+ DATE_RANGE_OPTIONS = [
30
+ "Last 7 Days",
31
+ "Last 30 Days",
32
+ "Last 3 Months",
33
+ "Last 6 Months",
34
+ "Last 12 Months",
35
+ "Last 16 Months",
36
+ "Custom Range"
37
+ ]
38
+ DEVICE_OPTIONS = ["All Devices", "desktop", "mobile", "tablet"]
39
+ BASE_DIMENSIONS = ["page", "query", "country", "date"]
40
+ MAX_ROWS = 250_000
41
+ DF_PREVIEW_ROWS = 100
42
+
43
+ # -------------
44
+ # Streamlit App Configuration
45
+ # -------------
46
+
47
+ def setup_streamlit():
48
+ """
49
+ Configures Streamlit's page settings and displays the app title and markdown information.
50
+ Sets the page layout, title, and markdown content with links and app description.
51
+ """
52
+ st.set_page_config(page_title="✨ Simple Google Search Console Data | LeeFoot.co.uk", layout="wide")
53
+ st.title("✨ Simple Google Search Console Data | June 2024")
54
+ st.markdown(f"### Lightweight GSC Data Extractor. (Max {MAX_ROWS:,} Rows)")
55
+
56
+ st.markdown(
57
+ """
58
+ <p>
59
+ Created by <a href="https://twitter.com/LeeFootSEO" target="_blank">LeeFootSEO</a> |
60
+ <a href="https://leefoot.co.uk" target="_blank">More Apps & Scripts on my Website</a>
61
+ """,
62
+ unsafe_allow_html=True
63
+ )
64
+ st.divider()
65
+
66
+ def init_session_state():
67
+ """
68
+ Initialises or updates the Streamlit session state variables for property selection,
69
+ search type, date range, dimensions, and device type.
70
+ """
71
+ if 'selected_property' not in st.session_state:
72
+ st.session_state.selected_property = None
73
+ if 'selected_search_type' not in st.session_state:
74
+ st.session_state.selected_search_type = 'web'
75
+ if 'selected_date_range' not in st.session_state:
76
+ st.session_state.selected_date_range = 'Last 7 Days'
77
+ if 'start_date' not in st.session_state:
78
+ st.session_state.start_date = datetime.date.today() - datetime.timedelta(days=7)
79
+ if 'end_date' not in st.session_state:
80
+ st.session_state.end_date = datetime.date.today()
81
+ if 'selected_dimensions' not in st.session_state:
82
+ st.session_state.selected_dimensions = ['page', 'query']
83
+ if 'selected_device' not in st.session_state:
84
+ st.session_state.selected_device = 'All Devices'
85
+ if 'custom_start_date' not in st.session_state:
86
+ st.session_state.custom_start_date = datetime.date.today() - datetime.timedelta(days=7)
87
+ if 'custom_end_date' not in st.session_state:
88
+ st.session_state.custom_end_date = datetime.date.today()
89
+
90
+
91
+ def fetch_content(url):
92
+ """
93
+ Fetches the content of a webpage.
94
+ """
95
+ try:
96
+ response = requests.get(url)
97
+ response.raise_for_status()
98
+ soup = BeautifulSoup(response.text, 'html.parser')
99
+ content = soup.get_text(separator=' ', strip=True)
100
+ return content
101
+ except requests.RequestException as e:
102
+ return str(e)
103
+
104
+ def generate_embeddings(text_list):
105
+ """
106
+ Generates embeddings for a list of texts using Cohere's API.
107
+ """
108
+ if not text_list:
109
+ return []
110
+
111
+ model = 'embed-english-v3.0'
112
+ input_type = 'search_document'
113
+ response = co.embed(model=model, texts=text_list, input_type=input_type)
114
+ embeddings = response.embeddings
115
+ return embeddings
116
+
117
+
118
+ def calculate_relevancy_scores(df):
119
+ """
120
+ Calculates relevancy scores for each row in the dataframe.
121
+ """
122
+ try:
123
+ st.write("Calculating relevancy scores...")
124
+ st.write(f"Input DataFrame shape: {df.shape}")
125
+ st.write(f"Input DataFrame columns: {df.columns}")
126
+
127
+ page_contents = [fetch_content(url) for url in df['page']]
128
+ st.write(f"Fetched {len(page_contents)} page contents")
129
+
130
+ page_embeddings = generate_embeddings(page_contents)
131
+ st.write(f"Generated {len(page_embeddings)} page embeddings")
132
+
133
+ query_embeddings = generate_embeddings(df['query'].tolist())
134
+ st.write(f"Generated {len(query_embeddings)} query embeddings")
135
+
136
+ relevancy_scores = cosine_similarity(query_embeddings, page_embeddings).diagonal()
137
+ st.write(f"Calculated {len(relevancy_scores)} relevancy scores")
138
+ st.write(f"Sample relevancy scores: {relevancy_scores[:5]}")
139
+
140
+ df = df.assign(relevancy_score=relevancy_scores)
141
+ st.write(f"Assigned relevancy scores to DataFrame")
142
+ st.write(f"DataFrame shape after assigning scores: {df.shape}")
143
+ st.write(f"DataFrame columns after assigning scores: {df.columns}")
144
+ st.write(f"Sample relevancy scores from DataFrame: {df['relevancy_score'].head()}")
145
+
146
+ except Exception as e:
147
+ st.warning(f"Error calculating relevancy scores: {e}")
148
+ df = df.assign(relevancy_score=0) # Default value if calculation fails
149
+
150
+ return df
151
+ def fetch_data_loading(webproperty, search_type, start_date, end_date, dimensions, device_type=None):
152
+ """
153
+ Fetches Google Search Console data with a loading indicator and calculates relevancy scores.
154
+ """
155
+ with st.spinner('Fetching data and calculating relevancy scores...'):
156
+ df = fetch_gsc_data(webproperty, search_type, start_date, end_date, dimensions, device_type)
157
+ if not df.empty:
158
+ df = calculate_relevancy_scores(df)
159
+ st.write(f"Data fetched. Shape: {df.shape}")
160
+ return df
161
+ # -------------
162
+
163
+ # Google Authentication Functions
164
+ # -------------
165
+
166
+ def load_config():
167
+ """
168
+ Loads the Google API client configuration from Streamlit secrets.
169
+ Returns a dictionary with the client configuration for OAuth.
170
+ """
171
+ client_config = {
172
+ "installed": {
173
+ "client_id": os.environ["CLIENT_ID"],
174
+ "client_secret": os.environ["CLIENT_SECRET"],
175
+ "redirect_uris": [os.environ["REDIRECT_URI"]],
176
+ }}
177
+ return client_config
178
+
179
+ def init_oauth_flow(client_config):
180
+ """
181
+ Initialises the OAuth flow for Google API authentication using the client configuration.
182
+ Sets the necessary scopes and returns the configured Flow object.
183
+ """
184
+ scopes = ["https://www.googleapis.com/auth/webmasters"]
185
+ return Flow.from_client_config(
186
+ client_config,
187
+ scopes=scopes,
188
+ redirect_uri=client_config["installed"]["redirect_uris"][0],
189
+ )
190
+
191
+ def google_auth(client_config):
192
+ """
193
+ Starts the Google authentication process using OAuth.
194
+ Generates and returns the OAuth flow and the authentication URL.
195
+ """
196
+ flow = init_oauth_flow(client_config)
197
+ auth_url, _ = flow.authorization_url(prompt="consent")
198
+ return flow, auth_url
199
+
200
+ def auth_search_console(client_config, credentials):
201
+ """
202
+ Authenticates the user with the Google Search Console API using provided credentials.
203
+ Returns an authenticated searchconsole client.
204
+ """
205
+ token = {
206
+ "token": credentials.token,
207
+ "refresh_token": credentials.refresh_token,
208
+ "token_uri": credentials.token_uri,
209
+ "client_id": credentials.client_id,
210
+ "client_secret": credentials.client_secret,
211
+ "scopes": credentials.scopes,
212
+ "id_token": getattr(credentials, "id_token", None),
213
+ }
214
+ return searchconsole.authenticate(client_config=client_config, credentials=token)
215
+
216
+ # -------------
217
+ # Data Fetching Functions
218
+ # -------------
219
+
220
+ def list_gsc_properties(credentials):
221
+ """
222
+ Lists all Google Search Console properties accessible with the given credentials.
223
+ Returns a list of property URLs or a message if no properties are found.
224
+ """
225
+ service = build('webmasters', 'v3', credentials=credentials)
226
+ site_list = service.sites().list().execute()
227
+ return [site['siteUrl'] for site in site_list.get('siteEntry', [])] or ["No properties found"]
228
+
229
+ def fetch_gsc_data(webproperty, search_type, start_date, end_date, dimensions, device_type=None):
230
+ """
231
+ Fetches Google Search Console data for a specified property, date range, dimensions, and device type.
232
+ Handles errors and returns the data as a DataFrame.
233
+ """
234
+ query = webproperty.query.range(start_date, end_date).search_type(search_type).dimension(*dimensions)
235
+
236
+ if 'device' in dimensions and device_type and device_type != 'All Devices':
237
+ query = query.filter('device', 'equals', device_type.lower())
238
+
239
+ try:
240
+ df = query.limit(MAX_ROWS).get().to_dataframe()
241
+ return process_gsc_data(df)
242
+ except Exception as e:
243
+ show_error(e)
244
+ return pd.DataFrame()
245
+
246
+ def process_gsc_data(df):
247
+ """
248
+ Processes the GSC data to return only unique pages with their first query and relevancy score.
249
+ """
250
+ st.write("Processing GSC data...")
251
+ st.write(f"Input DataFrame shape: {df.shape}")
252
+ st.write(f"Input DataFrame columns: {df.columns}")
253
+
254
+ # Sort the dataframe by page and clicks (descending) to get the most relevant query first
255
+ df_sorted = df.sort_values(['page', 'clicks'], ascending=[True, False])
256
+
257
+ # Get the first occurrence of each page (which will be the one with the highest clicks)
258
+ df_unique = df_sorted.drop_duplicates(subset='page', keep='first').copy()
259
+
260
+ st.write(f"Unique pages DataFrame shape: {df_unique.shape}")
261
+ st.write(f"Unique pages DataFrame columns: {df_unique.columns}")
262
+
263
+ # Ensure 'relevancy_score' column exists and is preserved
264
+ if 'relevancy_score' not in df_unique.columns:
265
+ st.write("Relevancy score column not found, adding default values")
266
+ df_unique['relevancy_score'] = 0 # Default value if column doesn't exist
267
+ else:
268
+ st.write("Preserving relevancy scores")
269
+ # Make sure to keep the original relevancy scores
270
+ df_unique['relevancy_score'] = df_sorted.groupby('page')['relevancy_score'].first().values
271
+
272
+ # Select only the relevant columns, including the relevancy_score
273
+ result = df_unique[['page', 'query', 'clicks', 'impressions', 'ctr', 'position', 'relevancy_score']]
274
+
275
+ st.write(f"Processed data. Shape: {result.shape}")
276
+ st.write(f"Columns: {result.columns}")
277
+ st.write(f"Sample relevancy scores: {result['relevancy_score'].head()}")
278
+
279
+ return result
280
+
281
+
282
+ def fetch_data_loading(webproperty, search_type, start_date, end_date, dimensions, device_type=None):
283
+ """
284
+ Fetches Google Search Console data with a loading indicator and calculates relevancy scores.
285
+ """
286
+ with st.spinner('Fetching data and calculating relevancy scores...'):
287
+ df = fetch_gsc_data(webproperty, search_type, start_date, end_date, dimensions, device_type)
288
+ st.write(f"Data fetched. Shape: {df.shape}")
289
+ st.write(f"Columns: {df.columns}")
290
+
291
+ if not df.empty:
292
+ df = calculate_relevancy_scores(df)
293
+ st.write("Relevancy scores calculated.")
294
+ st.write(f"DataFrame shape after calculating scores: {df.shape}")
295
+ st.write(f"DataFrame columns after calculating scores: {df.columns}")
296
+ st.write(f"Sample relevancy scores after calculation: {df['relevancy_score'].head()}")
297
+
298
+ processed_df = process_gsc_data(df)
299
+ st.write("Data processed.")
300
+ st.write(f"Final DataFrame shape: {processed_df.shape}")
301
+ st.write(f"Final DataFrame columns: {processed_df.columns}")
302
+ st.write(f"Final sample relevancy scores: {processed_df['relevancy_score'].head()}")
303
+
304
+ return processed_df
305
+ """
306
+ Fetches Google Search Console data with a loading indicator. Utilises 'fetch_gsc_data' for data retrieval.
307
+ Returns the fetched data as a DataFrame.
308
+ """
309
+ with st.spinner('Fetching data...'):
310
+ return fetch_gsc_data(webproperty, search_type, start_date, end_date, dimensions, device_type)
311
+
312
+ # -------------
313
+ # Utility Functions
314
+ # -------------
315
+
316
+ def update_dimensions(selected_search_type):
317
+ """
318
+ Updates and returns the list of dimensions based on the selected search type.
319
+ Adds 'device' to dimensions if the search type requires it.
320
+ """
321
+ return BASE_DIMENSIONS + ['device'] if selected_search_type in SEARCH_TYPES else BASE_DIMENSIONS
322
+
323
+ def calc_date_range(selection, custom_start=None, custom_end=None):
324
+ """
325
+ Calculates the date range based on the selected range option.
326
+ Returns the start and end dates for the specified range.
327
+ """
328
+ range_map = {
329
+ 'Last 7 Days': 7,
330
+ 'Last 30 Days': 30,
331
+ 'Last 3 Months': 90,
332
+ 'Last 6 Months': 180,
333
+ 'Last 12 Months': 365,
334
+ 'Last 16 Months': 480
335
+ }
336
+ today = datetime.date.today()
337
+ if selection == 'Custom Range':
338
+ if custom_start and custom_end:
339
+ return custom_start, custom_end
340
+ else:
341
+ return today - datetime.timedelta(days=7), today
342
+ return today - datetime.timedelta(days=range_map.get(selection, 0)), today
343
+
344
+ def show_error(e):
345
+ """
346
+ Displays an error message in the Streamlit app.
347
+ Formats and shows the provided error 'e'.
348
+ """
349
+ st.error(f"An error occurred: {e}")
350
+
351
+ def property_change():
352
+ """
353
+ Updates the 'selected_property' in the Streamlit session state.
354
+ Triggered on change of the property selection.
355
+ """
356
+ st.session_state.selected_property = st.session_state['selected_property_selector']
357
+
358
+ # -------------
359
+ # File & Download Operations
360
+ # -------------
361
+
362
+ def show_dataframe(report):
363
+ """
364
+ Shows a preview of the first 100 rows of the processed report DataFrame in an expandable section.
365
+ """
366
+ with st.expander("Preview the First 100 Rows (Unique Pages with Top Query)"):
367
+ st.dataframe(report.head(DF_PREVIEW_ROWS))
368
+
369
+ def download_csv_link(report):
370
+ """
371
+ Generates and displays a download link for the report DataFrame in CSV format.
372
+ """
373
+ def to_csv(df):
374
+ return df.to_csv(index=False, encoding='utf-8-sig')
375
+
376
+ csv = to_csv(report)
377
+ b64_csv = base64.b64encode(csv.encode()).decode()
378
+ href = f'<a href="data:file/csv;base64,{b64_csv}" download="search_console_data.csv">Download CSV File</a>'
379
+ st.markdown(href, unsafe_allow_html=True)
380
+
381
+ # -------------
382
+ # Streamlit UI Components
383
+ # -------------
384
+
385
+ def show_google_sign_in(auth_url):
386
+ """
387
+ Displays the Google sign-in button and authentication URL in the Streamlit sidebar.
388
+ """
389
+ with st.sidebar:
390
+ if st.button("Sign in with Google"):
391
+ # Open the authentication URL
392
+ st.write('Please click the link below to sign in:')
393
+ st.markdown(f'[Google Sign-In]({auth_url})', unsafe_allow_html=True)
394
+
395
+ def show_property_selector(properties, account):
396
+ """
397
+ Displays a dropdown selector for Google Search Console properties.
398
+ Returns the selected property's webproperty object.
399
+ """
400
+ selected_property = st.selectbox(
401
+ "Select a Search Console Property:",
402
+ properties,
403
+ index=properties.index(
404
+ st.session_state.selected_property) if st.session_state.selected_property in properties else 0,
405
+ key='selected_property_selector',
406
+ on_change=property_change
407
+ )
408
+ return account[selected_property]
409
+
410
+ def show_search_type_selector():
411
+ """
412
+ Displays a dropdown selector for choosing the search type.
413
+ Returns the selected search type.
414
+ """
415
+ return st.selectbox(
416
+ "Select Search Type:",
417
+ SEARCH_TYPES,
418
+ index=SEARCH_TYPES.index(st.session_state.selected_search_type),
419
+ key='search_type_selector'
420
+ )
421
+
422
+ def show_date_range_selector():
423
+ """
424
+ Displays a dropdown selector for choosing the date range.
425
+ Returns the selected date range option.
426
+ """
427
+ return st.selectbox(
428
+ "Select Date Range:",
429
+ DATE_RANGE_OPTIONS,
430
+ index=DATE_RANGE_OPTIONS.index(st.session_state.selected_date_range),
431
+ key='date_range_selector'
432
+ )
433
+
434
+ def show_custom_date_inputs():
435
+ """
436
+ Displays date input fields for custom date range selection.
437
+ Updates session state with the selected dates.
438
+ """
439
+ st.session_state.custom_start_date = st.date_input("Start Date", st.session_state.custom_start_date)
440
+ st.session_state.custom_end_date = st.date_input("End Date", st.session_state.custom_end_date)
441
+
442
+ def show_dimensions_selector(search_type):
443
+ """
444
+ Displays a multi-select box for choosing dimensions based on the selected search type.
445
+ Returns the selected dimensions.
446
+ """
447
+ available_dimensions = update_dimensions(search_type)
448
+ return st.multiselect(
449
+ "Select Dimensions:",
450
+ available_dimensions,
451
+ default=st.session_state.selected_dimensions,
452
+ key='dimensions_selector'
453
+ )
454
+
455
+ def show_fetch_data_button(webproperty, search_type, start_date, end_date, selected_dimensions):
456
+ """
457
+ Displays a button to fetch data based on selected parameters.
458
+ Shows the report DataFrame and download link upon successful data fetching.
459
+ """
460
+ if st.button("Fetch Data"):
461
+ report = fetch_data_loading(webproperty, search_type, start_date, end_date, selected_dimensions)
462
+
463
+ if report is not None and not report.empty:
464
+ show_dataframe(report)
465
+ download_csv_link(report)
466
+ else:
467
+ st.warning("No data found for the selected criteria.")
468
+
469
+
470
+
471
+ # -------------
472
+ # Main Streamlit App Function
473
+ # -------------
474
+
475
+ # Main Streamlit App Function
476
+ def main():
477
+ """
478
+ The main function for the Streamlit application.
479
+ Handles the app setup, authentication, UI components, and data fetching logic.
480
+ """
481
+ setup_streamlit()
482
+ client_config = load_config()
483
+ st.session_state.auth_flow, st.session_state.auth_url = google_auth(client_config)
484
+
485
+ query_params = st.experimental_get_query_params()
486
+ auth_code = query_params.get("code", [None])[0]
487
+
488
+ if auth_code and not st.session_state.get('credentials'):
489
+ st.session_state.auth_flow.fetch_token(code=auth_code)
490
+ st.session_state.credentials = st.session_state.auth_flow.credentials
491
+
492
+ if not st.session_state.get('credentials'):
493
+ show_google_sign_in(st.session_state.auth_url)
494
+ else:
495
+ init_session_state()
496
+ account = auth_search_console(client_config, st.session_state.credentials)
497
+ properties = list_gsc_properties(st.session_state.credentials)
498
+
499
+ if properties:
500
+ webproperty = show_property_selector(properties, account)
501
+ search_type = show_search_type_selector()
502
+ date_range_selection = show_date_range_selector()
503
+
504
+ if date_range_selection == 'Custom Range':
505
+ show_custom_date_inputs()
506
+ start_date, end_date = st.session_state.custom_start_date, st.session_state.custom_end_date
507
+ else:
508
+ start_date, end_date = calc_date_range(date_range_selection)
509
+
510
+ selected_dimensions = show_dimensions_selector(search_type)
511
+
512
+ if st.button("Fetch Data and Calculate Relevancy"):
513
+ report = fetch_data_loading(webproperty, search_type, start_date, end_date, selected_dimensions)
514
+
515
+ if report is not None and not report.empty:
516
+ show_dataframe(report)
517
+ download_csv_link(report)
518
+ else:
519
+ st.warning("No data found for the selected criteria.")
520
+
521
+ if __name__ == "__main__":
522
+ main()
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ google-auth-oauthlib
3
+ google-api-python-client
4
+ pandas
5
+ searchconsole
6
+ python-dotenv
7
+ cohere
8
+ scikit-learn
9
+ beautifulsoup4
10
+ python-dotenv