Chan-Y commited on
Commit
b975282
·
verified ·
1 Parent(s): d3aa346

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -10
app.py CHANGED
@@ -4,9 +4,9 @@ warnings.simplefilter(action='ignore', category=FutureWarning)
4
  import PyPDF2
5
  import gradio as gr
6
  from langchain.prompts import PromptTemplate
7
- from langchain.chains.summarize import load_summarize_chain
8
  from pathlib import Path
9
  from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
 
10
 
11
  llm = HuggingFaceEndpoint(
12
  repo_id="mistralai/Mistral-7B-Instruct-v0.3",
@@ -25,6 +25,7 @@ def read_pdf(file_path):
25
  return text
26
 
27
  def summarize(file, n_words):
 
28
  # Read the content of the uploaded file
29
  file_path = file.name
30
  if file_path.endswith('.pdf'):
@@ -33,27 +34,44 @@ def summarize(file, n_words):
33
  with open(file_path, 'r', encoding='utf-8') as f:
34
  text = f.read()
35
 
36
- template = '''
37
  Please carefully read the following document:
38
 
39
  <document>
40
  {TEXT}
41
  </document>
42
 
43
- After reading through the document, identify the language, and pinpoint the key points and main ideas covered in the text. Organize these key points into a concise bulleted list that summarizes the essential information from the document. The summary should consist of a maximum of 10 bullet points.
44
- Ensure that the final summary is in the language you identified from the document.
45
- Your goal is to comprehensively capture the core content of the document while expressing each summary point succinctly. Omit minor details and focus on central themes and important facts.
46
  '''
47
 
48
- prompt = PromptTemplate(
49
- template=template,
50
  input_variables=['TEXT']
51
  )
52
 
53
- formatted_prompt = prompt.format(TEXT=text)
54
- output_summary = llm_engine_hf.invoke(formatted_prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
- return output_summary.content
57
 
58
  def download_summary(output_text):
59
  if output_text:
 
4
  import PyPDF2
5
  import gradio as gr
6
  from langchain.prompts import PromptTemplate
 
7
  from pathlib import Path
8
  from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
9
+ from langchain_core.output_parsers import JsonOutputParser
10
 
11
  llm = HuggingFaceEndpoint(
12
  repo_id="mistralai/Mistral-7B-Instruct-v0.3",
 
25
  return text
26
 
27
  def summarize(file, n_words):
28
+ global llm
29
  # Read the content of the uploaded file
30
  file_path = file.name
31
  if file_path.endswith('.pdf'):
 
34
  with open(file_path, 'r', encoding='utf-8') as f:
35
  text = f.read()
36
 
37
+ template_detect = '''
38
  Please carefully read the following document:
39
 
40
  <document>
41
  {TEXT}
42
  </document>
43
 
44
+ identify the language, return detected language in json format with key "language" and value is the detected language
 
 
45
  '''
46
 
47
+ prompt_detect = PromptTemplate(
48
+ template=template_detect,
49
  input_variables=['TEXT']
50
  )
51
 
52
+ language_detect = prompt_detect | llm | JsonOutputParser()
53
+ formatted_prompt = prompt_detect.format(TEXT=text)
54
+ language = language_detect.invoke(formatted_prompt)
55
+
56
+ lang = language["language"]
57
+ template_translate = '''
58
+ Please carefully read the following document:
59
+ <document>
60
+ {TEXT}
61
+ </document>
62
+ After reading through the document, pinpoint the key points and main ideas covered in the text.
63
+ Organize these key points into a concise bulleted list that summarizes the essential information from the document.
64
+ The summary should be in {LANG} language.
65
+ '''
66
+
67
+ prompt_summarize = PromptTemplate(
68
+ template=template_translate,
69
+ input_variables=["TEXT", "LANG"]
70
+ )
71
+ formatted_prompt = prompt_summarize.format(TEXT=text, LANG=lang)
72
+ summary = llm.invoke(formatted_prompt)
73
 
74
+ return summary
75
 
76
  def download_summary(output_text):
77
  if output_text: