Spaces:
Sleeping
Sleeping
Update scrape_3gpp.py
Browse files- scrape_3gpp.py +24 -25
scrape_3gpp.py
CHANGED
@@ -426,33 +426,32 @@ def extractionPrincipale(url, excel_file=None, status_list=None, progress=gr.Pro
|
|
426 |
status = ""
|
427 |
data.append([url+ "/" + folder + '.zip', folder , category, title, source,status, contenu])
|
428 |
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
437 |
else:
|
438 |
-
print(f"
|
439 |
-
|
440 |
-
|
441 |
-
if guide_df is not None:
|
442 |
-
tdoc_source_map = {row['TDoc']: row['Source'] for index, row in guide_df.iterrows()}
|
443 |
-
# Use tdoc_source_map as needed
|
444 |
-
else:
|
445 |
-
print("Error: guide_df is not initialized. Exiting function.")
|
446 |
-
return
|
447 |
|
448 |
-
|
449 |
-
tdoc_status_map = {row['TDoc']: row['TDoc Status'] for index, row in guide_df.iterrows()}
|
450 |
-
# Update the 'Source' in your data based on matching 'Nom du fichier' with 'TDoc'
|
451 |
-
for item in data:
|
452 |
-
nom_du_fichier = item[1] # Assuming 'Nom du fichier' is the first item in your data list
|
453 |
-
if nom_du_fichier in tdoc_source_map:
|
454 |
-
item[4] = tdoc_source_map[nom_du_fichier] # Update the 'Source' field, assuming it's the fourth item
|
455 |
-
item[5] = tdoc_status_map[nom_du_fichier]
|
456 |
|
457 |
|
458 |
processed_count += 1
|
|
|
426 |
status = ""
|
427 |
data.append([url+ "/" + folder + '.zip', folder , category, title, source,status, contenu])
|
428 |
|
429 |
+
guide_file = 'guide.xlsx'
|
430 |
+
if os.path.exists(guide_file):
|
431 |
+
# If guide.xlsx exists, proceed with operations that require it
|
432 |
+
try:
|
433 |
+
guide_df = pd.read_excel(guide_file, usecols=['Source', 'TDoc', 'TDoc Status'])
|
434 |
+
# Continue with the operations that require guide.xlsx
|
435 |
+
# For example, reading the file, processing the data, etc.
|
436 |
+
tdoc_source_map = {row['TDoc']: row['Source'] for index, row in guide_df.iterrows()}
|
437 |
+
tdoc_status_map = {row['TDoc']: row['TDoc Status'] for index, row in guide_df.iterrows()}
|
438 |
+
# Update the 'Source' in your data based on matching 'Nom du fichier' with 'TDoc'
|
439 |
+
for item in data:
|
440 |
+
nom_du_fichier = item[1] # Assuming 'Nom du fichier' is the first item in your data list
|
441 |
+
if nom_du_fichier in tdoc_source_map:
|
442 |
+
item[4] = tdoc_source_map[nom_du_fichier] # Update the 'Source' field, assuming it's the fourth item
|
443 |
+
item[5] = tdoc_status_map[nom_du_fichier]
|
444 |
+
# Your code that depends on guide.xlsx goes here
|
445 |
+
|
446 |
+
except Exception as e:
|
447 |
+
print(f"An error occurred while processing {guide_file}: {e}")
|
448 |
+
# Handle any errors that arise during processing
|
449 |
else:
|
450 |
+
print(f"File {guide_file} not found. Skipping operations that require this file.")
|
451 |
+
# Since guide.xlsx is not found, skip the related operations
|
452 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
453 |
|
454 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
455 |
|
456 |
|
457 |
processed_count += 1
|