MaksG commited on
Commit
9b3fe22
1 Parent(s): 24c67cc

Update scrape_3gpp.py

Browse files
Files changed (1) hide show
  1. scrape_3gpp.py +24 -25
scrape_3gpp.py CHANGED
@@ -426,33 +426,32 @@ def extractionPrincipale(url, excel_file=None, status_list=None, progress=gr.Pro
426
  status = ""
427
  data.append([url+ "/" + folder + '.zip', folder , category, title, source,status, contenu])
428
 
429
- # After processing all files and directories
430
- # Read the guide.xlsx file into a DataFrame to map 'TDoc' to 'Source'
431
- guide_df = None
432
-
433
- # Attempt to load the guide.xlsx file if it exists
434
- guide_file_path = 'guide.xlsx'
435
- if os.path.exists(guide_file_path):
436
- guide_df = pd.read_excel(guide_file_path, usecols=['Source', 'TDoc', 'TDoc Status'])
 
 
 
 
 
 
 
 
 
 
 
 
437
  else:
438
- print(f"Warning: {guide_file_path} not found.")
439
-
440
- # Proceed with the rest of the function, ensuring guide_df is checked before use
441
- if guide_df is not None:
442
- tdoc_source_map = {row['TDoc']: row['Source'] for index, row in guide_df.iterrows()}
443
- # Use tdoc_source_map as needed
444
- else:
445
- print("Error: guide_df is not initialized. Exiting function.")
446
- return
447
 
448
- tdoc_source_map = {row['TDoc']: row['Source'] for index, row in guide_df.iterrows()}
449
- tdoc_status_map = {row['TDoc']: row['TDoc Status'] for index, row in guide_df.iterrows()}
450
- # Update the 'Source' in your data based on matching 'Nom du fichier' with 'TDoc'
451
- for item in data:
452
- nom_du_fichier = item[1] # Assuming 'Nom du fichier' is the first item in your data list
453
- if nom_du_fichier in tdoc_source_map:
454
- item[4] = tdoc_source_map[nom_du_fichier] # Update the 'Source' field, assuming it's the fourth item
455
- item[5] = tdoc_status_map[nom_du_fichier]
456
 
457
 
458
  processed_count += 1
 
426
  status = ""
427
  data.append([url+ "/" + folder + '.zip', folder , category, title, source,status, contenu])
428
 
429
+ guide_file = 'guide.xlsx'
430
+ if os.path.exists(guide_file):
431
+ # If guide.xlsx exists, proceed with operations that require it
432
+ try:
433
+ guide_df = pd.read_excel(guide_file, usecols=['Source', 'TDoc', 'TDoc Status'])
434
+ # Continue with the operations that require guide.xlsx
435
+ # For example, reading the file, processing the data, etc.
436
+ tdoc_source_map = {row['TDoc']: row['Source'] for index, row in guide_df.iterrows()}
437
+ tdoc_status_map = {row['TDoc']: row['TDoc Status'] for index, row in guide_df.iterrows()}
438
+ # Update the 'Source' in your data based on matching 'Nom du fichier' with 'TDoc'
439
+ for item in data:
440
+ nom_du_fichier = item[1] # Assuming 'Nom du fichier' is the first item in your data list
441
+ if nom_du_fichier in tdoc_source_map:
442
+ item[4] = tdoc_source_map[nom_du_fichier] # Update the 'Source' field, assuming it's the fourth item
443
+ item[5] = tdoc_status_map[nom_du_fichier]
444
+ # Your code that depends on guide.xlsx goes here
445
+
446
+ except Exception as e:
447
+ print(f"An error occurred while processing {guide_file}: {e}")
448
+ # Handle any errors that arise during processing
449
  else:
450
+ print(f"File {guide_file} not found. Skipping operations that require this file.")
451
+ # Since guide.xlsx is not found, skip the related operations
452
+
 
 
 
 
 
 
453
 
454
+
 
 
 
 
 
 
 
455
 
456
 
457
  processed_count += 1