Spaces:

Chan-Y
/

Mistral-7B-Summarizer

Sleeping

App Files Files Community

Chan-Y commited on Jul 4, 2024

Commit

b975282

verified ·

1 Parent(s): d3aa346

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -10

app.py CHANGED Viewed

@@ -4,9 +4,9 @@ warnings.simplefilter(action='ignore', category=FutureWarning)
 import PyPDF2
 import gradio as gr
 from langchain.prompts import PromptTemplate
-from langchain.chains.summarize import load_summarize_chain
 from pathlib import Path
 from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
 llm = HuggingFaceEndpoint(
     repo_id="mistralai/Mistral-7B-Instruct-v0.3",
@@ -25,6 +25,7 @@ def read_pdf(file_path):
     return text
 def summarize(file, n_words):
     # Read the content of the uploaded file
     file_path = file.name
     if file_path.endswith('.pdf'):
@@ -33,27 +34,44 @@ def summarize(file, n_words):
         with open(file_path, 'r', encoding='utf-8') as f:
             text = f.read()
-    template = '''
 Please carefully read the following document:
 <document>
 {TEXT}
 </document>
-After reading through the document, identify the language, and pinpoint the key points and main ideas covered in the text. Organize these key points into a concise bulleted list that summarizes the essential information from the document. The summary should consist of a maximum of 10 bullet points.
-Ensure that the final summary is in the language you identified from the document.
-Your goal is to comprehensively capture the core content of the document while expressing each summary point succinctly. Omit minor details and focus on central themes and important facts.
 '''
-    prompt = PromptTemplate(
-        template=template,
         input_variables=['TEXT']
     )
-    formatted_prompt = prompt.format(TEXT=text)
-    output_summary = llm_engine_hf.invoke(formatted_prompt)
-    return output_summary.content
 def download_summary(output_text):
     if output_text:

 import PyPDF2
 import gradio as gr
 from langchain.prompts import PromptTemplate
 from pathlib import Path
 from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
+from langchain_core.output_parsers import JsonOutputParser
 llm = HuggingFaceEndpoint(
     repo_id="mistralai/Mistral-7B-Instruct-v0.3",
     return text
 def summarize(file, n_words):
+    global llm
     # Read the content of the uploaded file
     file_path = file.name
     if file_path.endswith('.pdf'):
         with open(file_path, 'r', encoding='utf-8') as f:
             text = f.read()
+    template_detect = '''
 Please carefully read the following document:
 <document>
 {TEXT}
 </document>
+identify the language, return detected language in json format with key "language" and value is the detected language
 '''
+    prompt_detect = PromptTemplate(
+        template=template_detect,
         input_variables=['TEXT']
     )
+    language_detect = prompt_detect | llm | JsonOutputParser()
+    formatted_prompt = prompt_detect.format(TEXT=text)
+    language = language_detect.invoke(formatted_prompt)
+    lang = language["language"]
+    template_translate = '''
+Please carefully read the following document:
+<document>
+{TEXT}
+</document>
+After reading through the document, pinpoint the key points and main ideas covered in the text.
+Organize these key points into a concise bulleted list that summarizes the essential information from the document.
+The summary should be in {LANG} language.
+'''
+    prompt_summarize = PromptTemplate(
+        template=template_translate,
+        input_variables=["TEXT", "LANG"]
+    )
+    formatted_prompt = prompt_summarize.format(TEXT=text, LANG=lang)
+    summary = llm.invoke(formatted_prompt)
+    return summary
 def download_summary(output_text):
     if output_text: