Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -227,11 +227,11 @@ def save_figure_as_jpeg(fig, filename):
|
|
227 |
pio.write_image(fig, filename, format='jpeg', width=1000, height=600, scale=5)
|
228 |
|
229 |
# Generate reports (page and sentence levels)
|
230 |
-
def generate_page_report(df_pages):
|
231 |
doc = Document()
|
232 |
doc.add_heading("Page-Level SDG Analysis Report", 0)
|
233 |
|
234 |
-
doc.add_heading("General Notes", level=2)
|
235 |
doc.add_paragraph(
|
236 |
'This app conducts page-level analysis of the uploaded document. Each page is processed by the sdgBERT AI model trained to predict the first 16 '
|
237 |
'Sustainable Development Goals (SDGs). The model analyzes the content and returns scores '
|
@@ -242,26 +242,29 @@ def generate_page_report(df_pages):
|
|
242 |
'(Primary and Secondary) for each page with a probability score greater than zero.'
|
243 |
)
|
244 |
|
245 |
-
doc.add_heading("Primary SDGs Bar Graph", level=3)
|
246 |
doc.add_paragraph(
|
247 |
'This graph displays the most essential SDG the AI model associates with pages. The bars '
|
248 |
'represent the percentage of pages most strongly aligned with each SDG. This offers insight into the dominant '
|
249 |
'sustainable development theme within the document.'
|
250 |
)
|
251 |
|
252 |
-
doc.add_heading("Secondary SDGs Bar Graph", level=3)
|
253 |
doc.add_paragraph(
|
254 |
'This graph shows the second most relevant SDGs for pages. Although these SDGs are '
|
255 |
'not the primary focus, the text has some relevance to these goals.'
|
256 |
)
|
257 |
|
258 |
for doc_name in df_pages['Document'].unique():
|
259 |
-
|
|
|
|
|
|
|
260 |
df_doc = df_pages[df_pages['Document'] == doc_name]
|
261 |
|
262 |
# Generate and save graphs
|
263 |
-
first_sdg_plot_path = f"{
|
264 |
-
second_sdg_plot_path = f"{
|
265 |
|
266 |
plot_sdg(df_doc, "Primary SDGs", 'pred1').write_image(
|
267 |
first_sdg_plot_path, format='jpeg', scale=7, engine="kaleido")
|
@@ -272,14 +275,14 @@ def generate_page_report(df_pages):
|
|
272 |
doc.add_picture(first_sdg_plot_path, width=Inches(6))
|
273 |
doc.add_picture(second_sdg_plot_path, width=Inches(6))
|
274 |
|
275 |
-
doc.save(
|
276 |
-
return
|
277 |
|
278 |
-
def generate_sentence_report(df_sentences):
|
279 |
doc = Document()
|
280 |
doc.add_heading("Sentence-Level SDG Analysis Report", 0)
|
281 |
|
282 |
-
doc.add_heading("General Notes", level=2)
|
283 |
doc.add_paragraph(
|
284 |
'This app splits documents into sentences using a natural language processing algorithm. '
|
285 |
'Each sentence is processed by the sdgBERT AI model trained to predict the first 16 '
|
@@ -291,26 +294,29 @@ def generate_sentence_report(df_sentences):
|
|
291 |
'(Primary and Secondary) for each sentence with a probability score greater than zero.'
|
292 |
)
|
293 |
|
294 |
-
doc.add_heading("Primary SDGs Bar Graph", level=3)
|
295 |
doc.add_paragraph(
|
296 |
'This graph displays the most essential SDG the AI model associates with sentences. The bars '
|
297 |
'represent the percentage of sentences most strongly aligned with each SDG. This offers more profound insight '
|
298 |
'into the dominant sustainable development theme within the document.'
|
299 |
)
|
300 |
|
301 |
-
doc.add_heading("Secondary SDGs Bar Graph", level=3)
|
302 |
doc.add_paragraph(
|
303 |
'This graph shows the second most relevant SDGs for sentences. Although these SDGs are not '
|
304 |
'the primary focus, the text has some relevance to these goals.'
|
305 |
)
|
306 |
|
307 |
for doc_name in df_sentences['Document'].unique():
|
308 |
-
|
|
|
|
|
|
|
309 |
df_doc = df_sentences[df_sentences['Document'] == doc_name]
|
310 |
|
311 |
# Generate and save graphs
|
312 |
-
first_sdg_plot_path = f"{
|
313 |
-
second_sdg_plot_path = f"{
|
314 |
|
315 |
plot_sdg(df_doc, "Primary SDGs", 'pred1').write_image(
|
316 |
first_sdg_plot_path, format='jpeg', scale=7, engine="kaleido")
|
@@ -321,8 +327,8 @@ def generate_sentence_report(df_sentences):
|
|
321 |
doc.add_picture(first_sdg_plot_path, width=Inches(6))
|
322 |
doc.add_picture(second_sdg_plot_path, width=Inches(6))
|
323 |
|
324 |
-
doc.save(
|
325 |
-
return
|
326 |
|
327 |
# New text extraction functions with text cleaning and line joining
|
328 |
def extract_text_with_py_pdf_loader(pdf_file_path, start_page=None, end_page=None):
|
@@ -439,13 +445,13 @@ def launch_interface():
|
|
439 |
# Shared PDF file input for both analyses
|
440 |
with gr.Row():
|
441 |
file_input = gr.File(
|
442 |
-
label="Upload PDF File for Analysis", file_types=[".pdf"]
|
443 |
)
|
444 |
|
445 |
# Extraction mode selection with explanatory text
|
446 |
gr.Markdown(
|
447 |
"""
|
448 |
-
###
|
449 |
Choose whether to analyze all pages or a specific range of pages. If you want to exclude certain pages from the analysis, select "Range of Pages" and specify the start and end pages.
|
450 |
"""
|
451 |
)
|
@@ -457,8 +463,8 @@ def launch_interface():
|
|
457 |
)
|
458 |
|
459 |
with gr.Row():
|
460 |
-
start_page = gr.Number(value=1, label="Start Page", visible=False)
|
461 |
-
end_page = gr.Number(value=1, label="End Page", visible=False)
|
462 |
|
463 |
# Function to update visibility of start_page and end_page
|
464 |
def update_page_inputs(extraction_mode):
|
@@ -474,7 +480,7 @@ def launch_interface():
|
|
474 |
)
|
475 |
|
476 |
# Tabs for page-level and sentence-level analysis
|
477 |
-
with gr.Tab("Page-Level Analysis"):
|
478 |
gr.Markdown(
|
479 |
"""
|
480 |
### π Page-Level SDG Analysis
|
@@ -485,20 +491,20 @@ def launch_interface():
|
|
485 |
)
|
486 |
with gr.Row():
|
487 |
with gr.Column():
|
488 |
-
primary_page_plot = gr.Plot(label="Primary SDGs [Page-Level]")
|
489 |
with gr.Column():
|
490 |
-
secondary_page_plot = gr.Plot(label="Secondary SDGs [Page-Level]")
|
491 |
|
492 |
with gr.Row():
|
493 |
-
page_csv = gr.File(label="Download Page Predictions CSV")
|
494 |
-
page_docx = gr.File(label="Download Page Report DOCX")
|
495 |
-
page_jpeg1 = gr.File(label="Download Primary SDGs JPEG")
|
496 |
-
page_jpeg2 = gr.File(label="Download Secondary SDGs JPEG")
|
497 |
|
498 |
-
page_button = gr.Button("Run Page-Level Analysis")
|
499 |
-
reset_page_button = gr.Button("Reset Page-Level Analysis")
|
500 |
|
501 |
-
with gr.Tab("Sentence-Level Analysis"):
|
502 |
gr.Markdown(
|
503 |
"""
|
504 |
### βοΈ Sentence-Level SDG Analysis
|
@@ -509,18 +515,18 @@ def launch_interface():
|
|
509 |
)
|
510 |
with gr.Row():
|
511 |
with gr.Column():
|
512 |
-
primary_sentence_plot = gr.Plot(label="Primary SDGs [Sentence-Level]")
|
513 |
with gr.Column():
|
514 |
-
secondary_sentence_plot = gr.Plot(label="Secondary SDGs [Sentence-Level]")
|
515 |
|
516 |
with gr.Row():
|
517 |
-
sentence_csv = gr.File(label="Download Sentence Predictions CSV")
|
518 |
-
sentence_docx = gr.File(label="Download Sentence Report DOCX")
|
519 |
-
sentence_jpeg1 = gr.File(label="Download Primary SDGs JPEG")
|
520 |
-
sentence_jpeg2 = gr.File(label="Download Secondary SDGs JPEG")
|
521 |
|
522 |
-
sentence_button = gr.Button("Run Sentence-Level Analysis")
|
523 |
-
reset_sentence_button = gr.Button("Reset Sentence-Level Analysis")
|
524 |
|
525 |
# Function to process page-level analysis
|
526 |
@spaces.GPU
|
@@ -531,11 +537,17 @@ def launch_interface():
|
|
531 |
try:
|
532 |
if hasattr(file, 'name'):
|
533 |
pdf_file_path = file.name
|
|
|
534 |
else:
|
535 |
# Save the file to a temporary location
|
536 |
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_pdf:
|
537 |
temp_pdf.write(file.read())
|
538 |
pdf_file_path = temp_pdf.name
|
|
|
|
|
|
|
|
|
|
|
539 |
|
540 |
# Determine page range based on extraction_mode
|
541 |
if extraction_mode == "All Pages":
|
@@ -556,22 +568,28 @@ def launch_interface():
|
|
556 |
df_page_predictions = predict_pages(page_df)
|
557 |
|
558 |
first_plot = plot_sdg(
|
559 |
-
df_page_predictions, "", 'pred1'
|
560 |
)
|
561 |
second_plot = plot_sdg(
|
562 |
-
df_page_predictions, "", 'pred2'
|
563 |
)
|
564 |
|
565 |
-
|
566 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
567 |
|
568 |
# Save figures as JPEG
|
569 |
-
save_figure_as_jpeg(first_plot,
|
570 |
-
save_figure_as_jpeg(second_plot,
|
571 |
|
572 |
return (
|
573 |
-
first_plot, second_plot,
|
574 |
-
|
575 |
|
576 |
except Exception as e:
|
577 |
print(f"Error: {e}")
|
@@ -586,11 +604,17 @@ def launch_interface():
|
|
586 |
try:
|
587 |
if hasattr(file, 'name'):
|
588 |
pdf_file_path = file.name
|
|
|
589 |
else:
|
590 |
# Save the file to a temporary location
|
591 |
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_pdf:
|
592 |
temp_pdf.write(file.read())
|
593 |
pdf_file_path = temp_pdf.name
|
|
|
|
|
|
|
|
|
|
|
594 |
|
595 |
# Determine page range based on extraction_mode
|
596 |
if extraction_mode == "All Pages":
|
@@ -611,22 +635,28 @@ def launch_interface():
|
|
611 |
df_sentence_predictions = predict_sentences(sentence_df)
|
612 |
|
613 |
first_plot = plot_sdg(
|
614 |
-
df_sentence_predictions, "", 'pred1'
|
615 |
)
|
616 |
second_plot = plot_sdg(
|
617 |
-
df_sentence_predictions, "", 'pred2'
|
618 |
)
|
619 |
|
620 |
-
|
621 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
622 |
|
623 |
# Save figures as JPEG
|
624 |
-
save_figure_as_jpeg(first_plot,
|
625 |
-
save_figure_as_jpeg(second_plot,
|
626 |
|
627 |
return (
|
628 |
-
first_plot, second_plot,
|
629 |
-
|
630 |
|
631 |
except Exception as e:
|
632 |
print(f"Error: {e}")
|
|
|
227 |
pio.write_image(fig, filename, format='jpeg', width=1000, height=600, scale=5)
|
228 |
|
229 |
# Generate reports (page and sentence levels)
|
230 |
+
def generate_page_report(df_pages, report_file_name):
|
231 |
doc = Document()
|
232 |
doc.add_heading("Page-Level SDG Analysis Report", 0)
|
233 |
|
234 |
+
doc.add_heading("π General Notes", level=2)
|
235 |
doc.add_paragraph(
|
236 |
'This app conducts page-level analysis of the uploaded document. Each page is processed by the sdgBERT AI model trained to predict the first 16 '
|
237 |
'Sustainable Development Goals (SDGs). The model analyzes the content and returns scores '
|
|
|
242 |
'(Primary and Secondary) for each page with a probability score greater than zero.'
|
243 |
)
|
244 |
|
245 |
+
doc.add_heading("π Primary SDGs Bar Graph", level=3)
|
246 |
doc.add_paragraph(
|
247 |
'This graph displays the most essential SDG the AI model associates with pages. The bars '
|
248 |
'represent the percentage of pages most strongly aligned with each SDG. This offers insight into the dominant '
|
249 |
'sustainable development theme within the document.'
|
250 |
)
|
251 |
|
252 |
+
doc.add_heading("π Secondary SDGs Bar Graph", level=3)
|
253 |
doc.add_paragraph(
|
254 |
'This graph shows the second most relevant SDGs for pages. Although these SDGs are '
|
255 |
'not the primary focus, the text has some relevance to these goals.'
|
256 |
)
|
257 |
|
258 |
for doc_name in df_pages['Document'].unique():
|
259 |
+
# Sanitize doc_name to use in file names
|
260 |
+
sanitized_doc_name = re.sub(r'[^\w\-]', '_', os.path.splitext(doc_name)[0])
|
261 |
+
|
262 |
+
doc.add_heading(f"π Document: {doc_name}", level=2)
|
263 |
df_doc = df_pages[df_pages['Document'] == doc_name]
|
264 |
|
265 |
# Generate and save graphs
|
266 |
+
first_sdg_plot_path = f"{sanitized_doc_name}_first_sdg_page.jpeg"
|
267 |
+
second_sdg_plot_path = f"{sanitized_doc_name}_second_sdg_page.jpeg"
|
268 |
|
269 |
plot_sdg(df_doc, "Primary SDGs", 'pred1').write_image(
|
270 |
first_sdg_plot_path, format='jpeg', scale=7, engine="kaleido")
|
|
|
275 |
doc.add_picture(first_sdg_plot_path, width=Inches(6))
|
276 |
doc.add_picture(second_sdg_plot_path, width=Inches(6))
|
277 |
|
278 |
+
doc.save(report_file_name)
|
279 |
+
return report_file_name
|
280 |
|
281 |
+
def generate_sentence_report(df_sentences, report_file_name):
|
282 |
doc = Document()
|
283 |
doc.add_heading("Sentence-Level SDG Analysis Report", 0)
|
284 |
|
285 |
+
doc.add_heading("π General Notes", level=2)
|
286 |
doc.add_paragraph(
|
287 |
'This app splits documents into sentences using a natural language processing algorithm. '
|
288 |
'Each sentence is processed by the sdgBERT AI model trained to predict the first 16 '
|
|
|
294 |
'(Primary and Secondary) for each sentence with a probability score greater than zero.'
|
295 |
)
|
296 |
|
297 |
+
doc.add_heading("π Primary SDGs Bar Graph", level=3)
|
298 |
doc.add_paragraph(
|
299 |
'This graph displays the most essential SDG the AI model associates with sentences. The bars '
|
300 |
'represent the percentage of sentences most strongly aligned with each SDG. This offers more profound insight '
|
301 |
'into the dominant sustainable development theme within the document.'
|
302 |
)
|
303 |
|
304 |
+
doc.add_heading("π Secondary SDGs Bar Graph", level=3)
|
305 |
doc.add_paragraph(
|
306 |
'This graph shows the second most relevant SDGs for sentences. Although these SDGs are not '
|
307 |
'the primary focus, the text has some relevance to these goals.'
|
308 |
)
|
309 |
|
310 |
for doc_name in df_sentences['Document'].unique():
|
311 |
+
# Sanitize doc_name to use in file names
|
312 |
+
sanitized_doc_name = re.sub(r'[^\w\-]', '_', os.path.splitext(doc_name)[0])
|
313 |
+
|
314 |
+
doc.add_heading(f"π Document: {doc_name}", level=2)
|
315 |
df_doc = df_sentences[df_sentences['Document'] == doc_name]
|
316 |
|
317 |
# Generate and save graphs
|
318 |
+
first_sdg_plot_path = f"{sanitized_doc_name}_first_sdg_sentence.jpeg"
|
319 |
+
second_sdg_plot_path = f"{sanitized_doc_name}_second_sdg_sentence.jpeg"
|
320 |
|
321 |
plot_sdg(df_doc, "Primary SDGs", 'pred1').write_image(
|
322 |
first_sdg_plot_path, format='jpeg', scale=7, engine="kaleido")
|
|
|
327 |
doc.add_picture(first_sdg_plot_path, width=Inches(6))
|
328 |
doc.add_picture(second_sdg_plot_path, width=Inches(6))
|
329 |
|
330 |
+
doc.save(report_file_name)
|
331 |
+
return report_file_name
|
332 |
|
333 |
# New text extraction functions with text cleaning and line joining
|
334 |
def extract_text_with_py_pdf_loader(pdf_file_path, start_page=None, end_page=None):
|
|
|
445 |
# Shared PDF file input for both analyses
|
446 |
with gr.Row():
|
447 |
file_input = gr.File(
|
448 |
+
label="π Upload PDF File for Analysis", file_types=[".pdf"]
|
449 |
)
|
450 |
|
451 |
# Extraction mode selection with explanatory text
|
452 |
gr.Markdown(
|
453 |
"""
|
454 |
+
### π PDFText Extraction Mode
|
455 |
Choose whether to analyze all pages or a specific range of pages. If you want to exclude certain pages from the analysis, select "Range of Pages" and specify the start and end pages.
|
456 |
"""
|
457 |
)
|
|
|
463 |
)
|
464 |
|
465 |
with gr.Row():
|
466 |
+
start_page = gr.Number(value=1, label="π’ Start Page", visible=False)
|
467 |
+
end_page = gr.Number(value=1, label="π’ End Page", visible=False)
|
468 |
|
469 |
# Function to update visibility of start_page and end_page
|
470 |
def update_page_inputs(extraction_mode):
|
|
|
480 |
)
|
481 |
|
482 |
# Tabs for page-level and sentence-level analysis
|
483 |
+
with gr.Tab("π Page-Level Analysis"):
|
484 |
gr.Markdown(
|
485 |
"""
|
486 |
### π Page-Level SDG Analysis
|
|
|
491 |
)
|
492 |
with gr.Row():
|
493 |
with gr.Column():
|
494 |
+
primary_page_plot = gr.Plot(label="π Primary SDGs [Page-Level]")
|
495 |
with gr.Column():
|
496 |
+
secondary_page_plot = gr.Plot(label="π Secondary SDGs [Page-Level]")
|
497 |
|
498 |
with gr.Row():
|
499 |
+
page_csv = gr.File(label="π Download Page Predictions CSV")
|
500 |
+
page_docx = gr.File(label="π Download Page Report DOCX")
|
501 |
+
page_jpeg1 = gr.File(label="πΌοΈ Download Primary SDGs JPEG")
|
502 |
+
page_jpeg2 = gr.File(label="πΌοΈ Download Secondary SDGs JPEG")
|
503 |
|
504 |
+
page_button = gr.Button("πββοΈ Run Page-Level Analysis")
|
505 |
+
reset_page_button = gr.Button("π Reset Page-Level Analysis")
|
506 |
|
507 |
+
with gr.Tab("βοΈ Sentence-Level Analysis"):
|
508 |
gr.Markdown(
|
509 |
"""
|
510 |
### βοΈ Sentence-Level SDG Analysis
|
|
|
515 |
)
|
516 |
with gr.Row():
|
517 |
with gr.Column():
|
518 |
+
primary_sentence_plot = gr.Plot(label="π Primary SDGs [Sentence-Level]")
|
519 |
with gr.Column():
|
520 |
+
secondary_sentence_plot = gr.Plot(label="π Secondary SDGs [Sentence-Level]")
|
521 |
|
522 |
with gr.Row():
|
523 |
+
sentence_csv = gr.File(label="π Download Sentence Predictions CSV")
|
524 |
+
sentence_docx = gr.File(label="π Download Sentence Report DOCX")
|
525 |
+
sentence_jpeg1 = gr.File(label="πΌοΈ Download Primary SDGs JPEG")
|
526 |
+
sentence_jpeg2 = gr.File(label="πΌοΈ Download Secondary SDGs JPEG")
|
527 |
|
528 |
+
sentence_button = gr.Button("πββοΈ Run Sentence-Level Analysis")
|
529 |
+
reset_sentence_button = gr.Button("π Reset Sentence-Level Analysis")
|
530 |
|
531 |
# Function to process page-level analysis
|
532 |
@spaces.GPU
|
|
|
537 |
try:
|
538 |
if hasattr(file, 'name'):
|
539 |
pdf_file_path = file.name
|
540 |
+
original_file_name = os.path.basename(file.name)
|
541 |
else:
|
542 |
# Save the file to a temporary location
|
543 |
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_pdf:
|
544 |
temp_pdf.write(file.read())
|
545 |
pdf_file_path = temp_pdf.name
|
546 |
+
original_file_name = 'uploaded_document'
|
547 |
+
|
548 |
+
# Sanitize the file name to use in output file names
|
549 |
+
sanitized_file_name = os.path.splitext(original_file_name)[0]
|
550 |
+
sanitized_file_name = re.sub(r'[^\w\-]', '_', sanitized_file_name)
|
551 |
|
552 |
# Determine page range based on extraction_mode
|
553 |
if extraction_mode == "All Pages":
|
|
|
568 |
df_page_predictions = predict_pages(page_df)
|
569 |
|
570 |
first_plot = plot_sdg(
|
571 |
+
df_page_predictions, "π Primary SDGs", 'pred1'
|
572 |
)
|
573 |
second_plot = plot_sdg(
|
574 |
+
df_page_predictions, "π Secondary SDGs", 'pred2'
|
575 |
)
|
576 |
|
577 |
+
# Define output file names
|
578 |
+
page_csv_file = f"{sanitized_file_name}_page_predictions.csv"
|
579 |
+
page_report_file = f"{sanitized_file_name}_page_report.docx"
|
580 |
+
primary_page_jpeg = f"{sanitized_file_name}_primary_page.jpeg"
|
581 |
+
secondary_page_jpeg = f"{sanitized_file_name}_secondary_page.jpeg"
|
582 |
+
|
583 |
+
df_page_predictions.to_csv(page_csv_file, index=False)
|
584 |
+
page_report = generate_page_report(df_page_predictions, page_report_file)
|
585 |
|
586 |
# Save figures as JPEG
|
587 |
+
save_figure_as_jpeg(first_plot, primary_page_jpeg)
|
588 |
+
save_figure_as_jpeg(second_plot, secondary_page_jpeg)
|
589 |
|
590 |
return (
|
591 |
+
first_plot, second_plot, page_csv_file, page_report_file,
|
592 |
+
primary_page_jpeg, secondary_page_jpeg)
|
593 |
|
594 |
except Exception as e:
|
595 |
print(f"Error: {e}")
|
|
|
604 |
try:
|
605 |
if hasattr(file, 'name'):
|
606 |
pdf_file_path = file.name
|
607 |
+
original_file_name = os.path.basename(file.name)
|
608 |
else:
|
609 |
# Save the file to a temporary location
|
610 |
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_pdf:
|
611 |
temp_pdf.write(file.read())
|
612 |
pdf_file_path = temp_pdf.name
|
613 |
+
original_file_name = 'uploaded_document'
|
614 |
+
|
615 |
+
# Sanitize the file name to use in output file names
|
616 |
+
sanitized_file_name = os.path.splitext(original_file_name)[0]
|
617 |
+
sanitized_file_name = re.sub(r'[^\w\-]', '_', sanitized_file_name)
|
618 |
|
619 |
# Determine page range based on extraction_mode
|
620 |
if extraction_mode == "All Pages":
|
|
|
635 |
df_sentence_predictions = predict_sentences(sentence_df)
|
636 |
|
637 |
first_plot = plot_sdg(
|
638 |
+
df_sentence_predictions, "π Primary SDGs", 'pred1'
|
639 |
)
|
640 |
second_plot = plot_sdg(
|
641 |
+
df_sentence_predictions, "π Secondary SDGs", 'pred2'
|
642 |
)
|
643 |
|
644 |
+
# Define output file names
|
645 |
+
sentence_csv_file = f"{sanitized_file_name}_sentence_predictions.csv"
|
646 |
+
sentence_report_file = f"{sanitized_file_name}_sentence_report.docx"
|
647 |
+
primary_sentence_jpeg = f"{sanitized_file_name}_primary_sentence.jpeg"
|
648 |
+
secondary_sentence_jpeg = f"{sanitized_file_name}_secondary_sentence.jpeg"
|
649 |
+
|
650 |
+
df_sentence_predictions.to_csv(sentence_csv_file, index=False)
|
651 |
+
sentence_report = generate_sentence_report(df_sentence_predictions, sentence_report_file)
|
652 |
|
653 |
# Save figures as JPEG
|
654 |
+
save_figure_as_jpeg(first_plot, primary_sentence_jpeg)
|
655 |
+
save_figure_as_jpeg(second_plot, secondary_sentence_jpeg)
|
656 |
|
657 |
return (
|
658 |
+
first_plot, second_plot, sentence_csv_file, sentence_report_file,
|
659 |
+
primary_sentence_jpeg, secondary_sentence_jpeg)
|
660 |
|
661 |
except Exception as e:
|
662 |
print(f"Error: {e}")
|