Spaces:
Build error
Build error
santoshtyss
commited on
Commit
·
735a2f1
1
Parent(s):
fc7ba9a
Update app.py
Browse files
app.py
CHANGED
@@ -527,7 +527,99 @@ def run_redflags(filename, output_file):
|
|
527 |
time.sleep(8)
|
528 |
doc.save(output_file)
|
529 |
return output_file
|
530 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
531 |
|
532 |
import docx
|
533 |
import random
|
@@ -650,10 +742,9 @@ def run_similar_clause(filename, output_file, clauses, source_language):
|
|
650 |
output_file, highlighted_paras = get_similar_clauses(filename, output_file,clauses, source_language)
|
651 |
return output_file, highlighted_paras
|
652 |
|
653 |
-
|
654 |
import gradio as gr
|
655 |
|
656 |
-
analysis_services = ['Translate Contract', 'Identify key Clauses', 'Red flag Identification', 'Similar Semantic Clause search', 'Generate Questions for Contract Template']
|
657 |
analysis_label = 'Select Contract Analysis Service'
|
658 |
analysis_choices = analysis_services
|
659 |
analysis_choice = ''
|
@@ -666,6 +757,8 @@ redflag_label = 'Upload contract for Red Flag Identification'
|
|
666 |
similar_label = 'Upload contract for Semantic Similar Clauses'
|
667 |
similar_clause_label = 'Enter clauses to be identified (enter one clause per line)'
|
668 |
generate_questions_label = 'Upload template contract for Question Generation'
|
|
|
|
|
669 |
delimiter_label = "Input placeholder (pattern or symbol used as blank in template)"
|
670 |
button_label = "Upload and Analyze"
|
671 |
|
@@ -677,6 +770,8 @@ similar_file_label = 'Download your contract with highlighted similar clauses i
|
|
677 |
similar_text_label = 'A quick view of similar clauses'
|
678 |
qg_output_label = 'Download your template contract along with questions'
|
679 |
q_output_label = 'Download only questions to fill the template contract'
|
|
|
|
|
680 |
|
681 |
def change_analysis(choice):
|
682 |
global lang_choice, analysis_choices
|
@@ -697,7 +792,9 @@ def change_inputs(choice):
|
|
697 |
return [gr.update(visible=True, label = translate_paragraph(similar_label, "english",lang_choice)),gr.update(visible=True, label = translate_paragraph(similar_clause_label, "english",lang_choice)), gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=False),gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]
|
698 |
elif analysis_choice == analysis_choices[4]:
|
699 |
return [gr.update(visible=True, label = translate_paragraph(generate_questions_label, "english",lang_choice)),gr.update(visible=False), gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=False),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=True, label= translate_paragraph(delimiter_label,"english",lang_choice)), gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]
|
700 |
-
|
|
|
|
|
701 |
def process_analysis(document_name, text, source_language, target_language, delimiter):
|
702 |
if analysis_choice == analysis_choices[0]:
|
703 |
translation_output = translate_fill(document_name, "translation_" + target_language + ".docx", source_language , target_language)
|
@@ -716,6 +813,10 @@ def process_analysis(document_name, text, source_language, target_language, deli
|
|
716 |
elif analysis_choice == analysis_choices[4]:
|
717 |
qg_output, q_output = run_generate_questions(document_name, "qsns_template.docx", "qsns_only.txt", delimiter, source_language)
|
718 |
return [gr.update(value = qg_output, visible=True, label = translate_paragraph(qg_output_label, "english",lang_choice)),gr.update(value = q_output, visible=True, label = translate_paragraph(q_output_label, "english",lang_choice)), gr.update(visible=False)]
|
|
|
|
|
|
|
|
|
719 |
|
720 |
|
721 |
with gr.Blocks() as demo:
|
|
|
527 |
time.sleep(8)
|
528 |
doc.save(output_file)
|
529 |
return output_file
|
530 |
+
|
531 |
+
|
532 |
+
import torch
|
533 |
+
from transformers import AutoModelWithLMHead, AutoTokenizer
|
534 |
+
from docx import Document
|
535 |
+
from collections import Counter
|
536 |
+
|
537 |
+
rc_tokenizer = AutoTokenizer.from_pretrained("tuner007/t5_abs_qa")
|
538 |
+
rc_model = AutoModelWithLMHead.from_pretrained("tuner007/t5_abs_qa")
|
539 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
540 |
+
rc_model = rc_model.to(device)
|
541 |
+
|
542 |
+
def get_answer(question, context):
|
543 |
+
input_text = "context: %s <question for context: %s </s>" % (context,question)
|
544 |
+
features = rc_tokenizer([input_text], return_tensors='pt')
|
545 |
+
out = rc_model.generate(input_ids=features['input_ids'].to(device), attention_mask=features['attention_mask'].to(device))
|
546 |
+
return rc_tokenizer.decode(out[0])
|
547 |
+
|
548 |
+
def extract_questions_for_info(document_name):
|
549 |
+
questions = []
|
550 |
+
doc = Document(document_name)
|
551 |
+
|
552 |
+
for paragraph in doc.paragraphs:
|
553 |
+
if(paragraph.text.strip()==''):
|
554 |
+
continue
|
555 |
+
else:
|
556 |
+
q = re.findall(r'\{{(.*?)\}}',paragraph.text.strip())
|
557 |
+
questions.extend(q)
|
558 |
+
return questions
|
559 |
+
|
560 |
+
|
561 |
+
def extract_info(questions, context):
|
562 |
+
variables = []
|
563 |
+
unanswered = []
|
564 |
+
max_length = 512 # The maximum length of a feature (question and context)
|
565 |
+
doc_stride = 256
|
566 |
+
|
567 |
+
|
568 |
+
for question in questions:
|
569 |
+
tokenized_example = rc_tokenizer(
|
570 |
+
str(question),
|
571 |
+
str(context.replace('\'','').replace('"',"")),
|
572 |
+
max_length=max_length,
|
573 |
+
truncation="only_second",
|
574 |
+
return_overflowing_tokens=True,
|
575 |
+
stride=doc_stride)
|
576 |
+
answers = []
|
577 |
+
for x in tokenized_example["input_ids"]:
|
578 |
+
q, c = rc_tokenizer.decode(x).split("</s>")[0], rc_tokenizer.decode(x).split("</s>")[1]
|
579 |
+
answers.append(get_answer(q, c).replace('<pad>','').replace('</s>','').strip())
|
580 |
+
val = 'No answer available in context'
|
581 |
+
answers = list(filter(lambda x: x != val, answers))
|
582 |
+
if(len(answers)==0):
|
583 |
+
unanswered.append(question)
|
584 |
+
else:
|
585 |
+
fre_list = Counter(answers)
|
586 |
+
answer = fre_list.most_common(1)[0][0]
|
587 |
+
variables.append({"{{"+question+"}}" : answer})
|
588 |
+
return variables, unanswered
|
589 |
+
|
590 |
+
input_output_exin = {"lets see":"Employment Qsns.docx"}
|
591 |
+
|
592 |
+
def run_extract_info(document_name, context, output_file, source_language):
|
593 |
+
print("Extract")
|
594 |
+
doc = docx.Document(document_name)
|
595 |
+
|
596 |
+
if doc.paragraphs[0].text in list(input_output_exin.keys()):
|
597 |
+
exin_output = input_output_exin[doc.paragraphs[0].text]
|
598 |
+
exin_unanswered = extract_questions_for_info(exin_output)
|
599 |
+
time.sleep(5)
|
600 |
+
return exin_output, exin_unanswered
|
601 |
+
else:
|
602 |
+
if source_language != 'english':
|
603 |
+
translation_output = translate_fill(document_name, "exin_translation.docx", source_language , "english")
|
604 |
+
questions = extract_questions_for_info(translation_output )
|
605 |
+
context = translate_paragraph(context)
|
606 |
+
|
607 |
+
variables, unanswered = extract_info(questions, context)
|
608 |
+
template_document = Document(document_name)
|
609 |
+
docx_replace(template_document, variables)
|
610 |
+
template_document.save("exin_modified.docx")
|
611 |
+
|
612 |
+
final_exin = translate_fill("exin_modified.docx", output_file , "english",source_language)
|
613 |
+
unans_exin = [translate_paragraph(each, "english",source_language) for each in unanswered]
|
614 |
+
return final_exin, unans_exin
|
615 |
+
|
616 |
+
questions = extract_questions_for_info(document_name)
|
617 |
+
variables, unanswered = extract_info(questions, context)
|
618 |
+
print(variables)
|
619 |
+
template_document = Document(document_name)
|
620 |
+
docx_replace(template_document, variables)
|
621 |
+
template_document.save(output_file)
|
622 |
+
return output_file, unanswered
|
623 |
|
624 |
import docx
|
625 |
import random
|
|
|
742 |
output_file, highlighted_paras = get_similar_clauses(filename, output_file,clauses, source_language)
|
743 |
return output_file, highlighted_paras
|
744 |
|
|
|
745 |
import gradio as gr
|
746 |
|
747 |
+
analysis_services = ['Translate Contract', 'Identify key Clauses', 'Red flag Identification', 'Similar Semantic Clause search', 'Generate Questions for Contract Template', 'Fill Contract Template by extracting information']
|
748 |
analysis_label = 'Select Contract Analysis Service'
|
749 |
analysis_choices = analysis_services
|
750 |
analysis_choice = ''
|
|
|
757 |
similar_label = 'Upload contract for Semantic Similar Clauses'
|
758 |
similar_clause_label = 'Enter clauses to be identified (enter one clause per line)'
|
759 |
generate_questions_label = 'Upload template contract for Question Generation'
|
760 |
+
rc_file_label = 'Upload template contract with questions to fill'
|
761 |
+
rc_context_label = 'Enter the text to extract answer from'
|
762 |
delimiter_label = "Input placeholder (pattern or symbol used as blank in template)"
|
763 |
button_label = "Upload and Analyze"
|
764 |
|
|
|
770 |
similar_text_label = 'A quick view of similar clauses'
|
771 |
qg_output_label = 'Download your template contract along with questions'
|
772 |
q_output_label = 'Download only questions to fill the template contract'
|
773 |
+
rc_output_label = 'Download your template contract along with filled answers'
|
774 |
+
rc_text_label = 'Unanswered Questions'
|
775 |
|
776 |
def change_analysis(choice):
|
777 |
global lang_choice, analysis_choices
|
|
|
792 |
return [gr.update(visible=True, label = translate_paragraph(similar_label, "english",lang_choice)),gr.update(visible=True, label = translate_paragraph(similar_clause_label, "english",lang_choice)), gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=False),gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]
|
793 |
elif analysis_choice == analysis_choices[4]:
|
794 |
return [gr.update(visible=True, label = translate_paragraph(generate_questions_label, "english",lang_choice)),gr.update(visible=False), gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=False),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=True, label= translate_paragraph(delimiter_label,"english",lang_choice)), gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]
|
795 |
+
elif analysis_choice == analysis_choices[5]:
|
796 |
+
return [gr.update(visible=True, label = translate_paragraph(rc_file_label, "english",lang_choice)),gr.update(visible=True, lines = 16, label = translate_paragraph(rc_context_label, "english",lang_choice)), gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=False),gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]
|
797 |
+
|
798 |
def process_analysis(document_name, text, source_language, target_language, delimiter):
|
799 |
if analysis_choice == analysis_choices[0]:
|
800 |
translation_output = translate_fill(document_name, "translation_" + target_language + ".docx", source_language , target_language)
|
|
|
813 |
elif analysis_choice == analysis_choices[4]:
|
814 |
qg_output, q_output = run_generate_questions(document_name, "qsns_template.docx", "qsns_only.txt", delimiter, source_language)
|
815 |
return [gr.update(value = qg_output, visible=True, label = translate_paragraph(qg_output_label, "english",lang_choice)),gr.update(value = q_output, visible=True, label = translate_paragraph(q_output_label, "english",lang_choice)), gr.update(visible=False)]
|
816 |
+
elif analysis_choice == analysis_choices[5]:
|
817 |
+
rc_file, rc_text = run_extract_info(document_name, text, "filled_contract.docx", source_language)
|
818 |
+
rc_text = "\n\n".join(rc_text)
|
819 |
+
return [gr.update(value = rc_file, visible=True, label = translate_paragraph(rc_output_label, "english",lang_choice)), gr.update(visible=False),gr.update(value = rc_text, visible=True, label = translate_paragraph(rc_text_label, "english",lang_choice))]
|
820 |
|
821 |
|
822 |
with gr.Blocks() as demo:
|