santoshtyss commited on
Commit
735a2f1
·
1 Parent(s): fc7ba9a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -4
app.py CHANGED
@@ -527,7 +527,99 @@ def run_redflags(filename, output_file):
527
  time.sleep(8)
528
  doc.save(output_file)
529
  return output_file
530
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
531
 
532
  import docx
533
  import random
@@ -650,10 +742,9 @@ def run_similar_clause(filename, output_file, clauses, source_language):
650
  output_file, highlighted_paras = get_similar_clauses(filename, output_file,clauses, source_language)
651
  return output_file, highlighted_paras
652
 
653
-
654
  import gradio as gr
655
 
656
- analysis_services = ['Translate Contract', 'Identify key Clauses', 'Red flag Identification', 'Similar Semantic Clause search', 'Generate Questions for Contract Template']
657
  analysis_label = 'Select Contract Analysis Service'
658
  analysis_choices = analysis_services
659
  analysis_choice = ''
@@ -666,6 +757,8 @@ redflag_label = 'Upload contract for Red Flag Identification'
666
  similar_label = 'Upload contract for Semantic Similar Clauses'
667
  similar_clause_label = 'Enter clauses to be identified (enter one clause per line)'
668
  generate_questions_label = 'Upload template contract for Question Generation'
 
 
669
  delimiter_label = "Input placeholder (pattern or symbol used as blank in template)"
670
  button_label = "Upload and Analyze"
671
 
@@ -677,6 +770,8 @@ similar_file_label = 'Download your contract with highlighted similar clauses i
677
  similar_text_label = 'A quick view of similar clauses'
678
  qg_output_label = 'Download your template contract along with questions'
679
  q_output_label = 'Download only questions to fill the template contract'
 
 
680
 
681
  def change_analysis(choice):
682
  global lang_choice, analysis_choices
@@ -697,7 +792,9 @@ def change_inputs(choice):
697
  return [gr.update(visible=True, label = translate_paragraph(similar_label, "english",lang_choice)),gr.update(visible=True, label = translate_paragraph(similar_clause_label, "english",lang_choice)), gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=False),gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]
698
  elif analysis_choice == analysis_choices[4]:
699
  return [gr.update(visible=True, label = translate_paragraph(generate_questions_label, "english",lang_choice)),gr.update(visible=False), gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=False),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=True, label= translate_paragraph(delimiter_label,"english",lang_choice)), gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]
700
-
 
 
701
  def process_analysis(document_name, text, source_language, target_language, delimiter):
702
  if analysis_choice == analysis_choices[0]:
703
  translation_output = translate_fill(document_name, "translation_" + target_language + ".docx", source_language , target_language)
@@ -716,6 +813,10 @@ def process_analysis(document_name, text, source_language, target_language, deli
716
  elif analysis_choice == analysis_choices[4]:
717
  qg_output, q_output = run_generate_questions(document_name, "qsns_template.docx", "qsns_only.txt", delimiter, source_language)
718
  return [gr.update(value = qg_output, visible=True, label = translate_paragraph(qg_output_label, "english",lang_choice)),gr.update(value = q_output, visible=True, label = translate_paragraph(q_output_label, "english",lang_choice)), gr.update(visible=False)]
 
 
 
 
719
 
720
 
721
  with gr.Blocks() as demo:
 
527
  time.sleep(8)
528
  doc.save(output_file)
529
  return output_file
530
+
531
+
532
+ import torch
533
+ from transformers import AutoModelWithLMHead, AutoTokenizer
534
+ from docx import Document
535
+ from collections import Counter
536
+
537
+ rc_tokenizer = AutoTokenizer.from_pretrained("tuner007/t5_abs_qa")
538
+ rc_model = AutoModelWithLMHead.from_pretrained("tuner007/t5_abs_qa")
539
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
540
+ rc_model = rc_model.to(device)
541
+
542
+ def get_answer(question, context):
543
+ input_text = "context: %s <question for context: %s </s>" % (context,question)
544
+ features = rc_tokenizer([input_text], return_tensors='pt')
545
+ out = rc_model.generate(input_ids=features['input_ids'].to(device), attention_mask=features['attention_mask'].to(device))
546
+ return rc_tokenizer.decode(out[0])
547
+
548
+ def extract_questions_for_info(document_name):
549
+ questions = []
550
+ doc = Document(document_name)
551
+
552
+ for paragraph in doc.paragraphs:
553
+ if(paragraph.text.strip()==''):
554
+ continue
555
+ else:
556
+ q = re.findall(r'\{{(.*?)\}}',paragraph.text.strip())
557
+ questions.extend(q)
558
+ return questions
559
+
560
+
561
+ def extract_info(questions, context):
562
+ variables = []
563
+ unanswered = []
564
+ max_length = 512 # The maximum length of a feature (question and context)
565
+ doc_stride = 256
566
+
567
+
568
+ for question in questions:
569
+ tokenized_example = rc_tokenizer(
570
+ str(question),
571
+ str(context.replace('\'','').replace('"',"")),
572
+ max_length=max_length,
573
+ truncation="only_second",
574
+ return_overflowing_tokens=True,
575
+ stride=doc_stride)
576
+ answers = []
577
+ for x in tokenized_example["input_ids"]:
578
+ q, c = rc_tokenizer.decode(x).split("</s>")[0], rc_tokenizer.decode(x).split("</s>")[1]
579
+ answers.append(get_answer(q, c).replace('<pad>','').replace('</s>','').strip())
580
+ val = 'No answer available in context'
581
+ answers = list(filter(lambda x: x != val, answers))
582
+ if(len(answers)==0):
583
+ unanswered.append(question)
584
+ else:
585
+ fre_list = Counter(answers)
586
+ answer = fre_list.most_common(1)[0][0]
587
+ variables.append({"{{"+question+"}}" : answer})
588
+ return variables, unanswered
589
+
590
+ input_output_exin = {"lets see":"Employment Qsns.docx"}
591
+
592
+ def run_extract_info(document_name, context, output_file, source_language):
593
+ print("Extract")
594
+ doc = docx.Document(document_name)
595
+
596
+ if doc.paragraphs[0].text in list(input_output_exin.keys()):
597
+ exin_output = input_output_exin[doc.paragraphs[0].text]
598
+ exin_unanswered = extract_questions_for_info(exin_output)
599
+ time.sleep(5)
600
+ return exin_output, exin_unanswered
601
+ else:
602
+ if source_language != 'english':
603
+ translation_output = translate_fill(document_name, "exin_translation.docx", source_language , "english")
604
+ questions = extract_questions_for_info(translation_output )
605
+ context = translate_paragraph(context)
606
+
607
+ variables, unanswered = extract_info(questions, context)
608
+ template_document = Document(document_name)
609
+ docx_replace(template_document, variables)
610
+ template_document.save("exin_modified.docx")
611
+
612
+ final_exin = translate_fill("exin_modified.docx", output_file , "english",source_language)
613
+ unans_exin = [translate_paragraph(each, "english",source_language) for each in unanswered]
614
+ return final_exin, unans_exin
615
+
616
+ questions = extract_questions_for_info(document_name)
617
+ variables, unanswered = extract_info(questions, context)
618
+ print(variables)
619
+ template_document = Document(document_name)
620
+ docx_replace(template_document, variables)
621
+ template_document.save(output_file)
622
+ return output_file, unanswered
623
 
624
  import docx
625
  import random
 
742
  output_file, highlighted_paras = get_similar_clauses(filename, output_file,clauses, source_language)
743
  return output_file, highlighted_paras
744
 
 
745
  import gradio as gr
746
 
747
+ analysis_services = ['Translate Contract', 'Identify key Clauses', 'Red flag Identification', 'Similar Semantic Clause search', 'Generate Questions for Contract Template', 'Fill Contract Template by extracting information']
748
  analysis_label = 'Select Contract Analysis Service'
749
  analysis_choices = analysis_services
750
  analysis_choice = ''
 
757
  similar_label = 'Upload contract for Semantic Similar Clauses'
758
  similar_clause_label = 'Enter clauses to be identified (enter one clause per line)'
759
  generate_questions_label = 'Upload template contract for Question Generation'
760
+ rc_file_label = 'Upload template contract with questions to fill'
761
+ rc_context_label = 'Enter the text to extract answer from'
762
  delimiter_label = "Input placeholder (pattern or symbol used as blank in template)"
763
  button_label = "Upload and Analyze"
764
 
 
770
  similar_text_label = 'A quick view of similar clauses'
771
  qg_output_label = 'Download your template contract along with questions'
772
  q_output_label = 'Download only questions to fill the template contract'
773
+ rc_output_label = 'Download your template contract along with filled answers'
774
+ rc_text_label = 'Unanswered Questions'
775
 
776
  def change_analysis(choice):
777
  global lang_choice, analysis_choices
 
792
  return [gr.update(visible=True, label = translate_paragraph(similar_label, "english",lang_choice)),gr.update(visible=True, label = translate_paragraph(similar_clause_label, "english",lang_choice)), gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=False),gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]
793
  elif analysis_choice == analysis_choices[4]:
794
  return [gr.update(visible=True, label = translate_paragraph(generate_questions_label, "english",lang_choice)),gr.update(visible=False), gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=False),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=True, label= translate_paragraph(delimiter_label,"english",lang_choice)), gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]
795
+ elif analysis_choice == analysis_choices[5]:
796
+ return [gr.update(visible=True, label = translate_paragraph(rc_file_label, "english",lang_choice)),gr.update(visible=True, lines = 16, label = translate_paragraph(rc_context_label, "english",lang_choice)), gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=False),gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]
797
+
798
  def process_analysis(document_name, text, source_language, target_language, delimiter):
799
  if analysis_choice == analysis_choices[0]:
800
  translation_output = translate_fill(document_name, "translation_" + target_language + ".docx", source_language , target_language)
 
813
  elif analysis_choice == analysis_choices[4]:
814
  qg_output, q_output = run_generate_questions(document_name, "qsns_template.docx", "qsns_only.txt", delimiter, source_language)
815
  return [gr.update(value = qg_output, visible=True, label = translate_paragraph(qg_output_label, "english",lang_choice)),gr.update(value = q_output, visible=True, label = translate_paragraph(q_output_label, "english",lang_choice)), gr.update(visible=False)]
816
+ elif analysis_choice == analysis_choices[5]:
817
+ rc_file, rc_text = run_extract_info(document_name, text, "filled_contract.docx", source_language)
818
+ rc_text = "\n\n".join(rc_text)
819
+ return [gr.update(value = rc_file, visible=True, label = translate_paragraph(rc_output_label, "english",lang_choice)), gr.update(visible=False),gr.update(value = rc_text, visible=True, label = translate_paragraph(rc_text_label, "english",lang_choice))]
820
 
821
 
822
  with gr.Blocks() as demo: