Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,9 +4,9 @@ warnings.simplefilter(action='ignore', category=FutureWarning)
|
|
4 |
import PyPDF2
|
5 |
import gradio as gr
|
6 |
from langchain.prompts import PromptTemplate
|
7 |
-
from langchain.chains.summarize import load_summarize_chain
|
8 |
from pathlib import Path
|
9 |
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
|
|
|
10 |
|
11 |
llm = HuggingFaceEndpoint(
|
12 |
repo_id="mistralai/Mistral-7B-Instruct-v0.3",
|
@@ -25,6 +25,7 @@ def read_pdf(file_path):
|
|
25 |
return text
|
26 |
|
27 |
def summarize(file, n_words):
|
|
|
28 |
# Read the content of the uploaded file
|
29 |
file_path = file.name
|
30 |
if file_path.endswith('.pdf'):
|
@@ -33,27 +34,44 @@ def summarize(file, n_words):
|
|
33 |
with open(file_path, 'r', encoding='utf-8') as f:
|
34 |
text = f.read()
|
35 |
|
36 |
-
|
37 |
Please carefully read the following document:
|
38 |
|
39 |
<document>
|
40 |
{TEXT}
|
41 |
</document>
|
42 |
|
43 |
-
|
44 |
-
Ensure that the final summary is in the language you identified from the document.
|
45 |
-
Your goal is to comprehensively capture the core content of the document while expressing each summary point succinctly. Omit minor details and focus on central themes and important facts.
|
46 |
'''
|
47 |
|
48 |
-
|
49 |
-
template=
|
50 |
input_variables=['TEXT']
|
51 |
)
|
52 |
|
53 |
-
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
-
return
|
57 |
|
58 |
def download_summary(output_text):
|
59 |
if output_text:
|
|
|
4 |
import PyPDF2
|
5 |
import gradio as gr
|
6 |
from langchain.prompts import PromptTemplate
|
|
|
7 |
from pathlib import Path
|
8 |
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
|
9 |
+
from langchain_core.output_parsers import JsonOutputParser
|
10 |
|
11 |
llm = HuggingFaceEndpoint(
|
12 |
repo_id="mistralai/Mistral-7B-Instruct-v0.3",
|
|
|
25 |
return text
|
26 |
|
27 |
def summarize(file, n_words):
|
28 |
+
global llm
|
29 |
# Read the content of the uploaded file
|
30 |
file_path = file.name
|
31 |
if file_path.endswith('.pdf'):
|
|
|
34 |
with open(file_path, 'r', encoding='utf-8') as f:
|
35 |
text = f.read()
|
36 |
|
37 |
+
template_detect = '''
|
38 |
Please carefully read the following document:
|
39 |
|
40 |
<document>
|
41 |
{TEXT}
|
42 |
</document>
|
43 |
|
44 |
+
identify the language, return detected language in json format with key "language" and value is the detected language
|
|
|
|
|
45 |
'''
|
46 |
|
47 |
+
prompt_detect = PromptTemplate(
|
48 |
+
template=template_detect,
|
49 |
input_variables=['TEXT']
|
50 |
)
|
51 |
|
52 |
+
language_detect = prompt_detect | llm | JsonOutputParser()
|
53 |
+
formatted_prompt = prompt_detect.format(TEXT=text)
|
54 |
+
language = language_detect.invoke(formatted_prompt)
|
55 |
+
|
56 |
+
lang = language["language"]
|
57 |
+
template_translate = '''
|
58 |
+
Please carefully read the following document:
|
59 |
+
<document>
|
60 |
+
{TEXT}
|
61 |
+
</document>
|
62 |
+
After reading through the document, pinpoint the key points and main ideas covered in the text.
|
63 |
+
Organize these key points into a concise bulleted list that summarizes the essential information from the document.
|
64 |
+
The summary should be in {LANG} language.
|
65 |
+
'''
|
66 |
+
|
67 |
+
prompt_summarize = PromptTemplate(
|
68 |
+
template=template_translate,
|
69 |
+
input_variables=["TEXT", "LANG"]
|
70 |
+
)
|
71 |
+
formatted_prompt = prompt_summarize.format(TEXT=text, LANG=lang)
|
72 |
+
summary = llm.invoke(formatted_prompt)
|
73 |
|
74 |
+
return summary
|
75 |
|
76 |
def download_summary(output_text):
|
77 |
if output_text:
|