import warnings warnings.simplefilter(action='ignore', category=FutureWarning) import PyPDF2 import gradio as gr from langchain.prompts import PromptTemplate from langchain.chains.summarize import load_summarize_chain from pathlib import Path from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint llm = HuggingFaceEndpoint( repo_id="mistralai/Mistral-7B-Instruct-v0.3", task="text-generation", max_new_tokens=4096, temperature=0.5, do_sample=False, ) llm_engine_hf = ChatHuggingFace(llm=llm) def read_pdf(file_path): pdf_reader = PyPDF2.PdfReader(file_path) text = "" for page in range(len(pdf_reader.pages)): text += pdf_reader.pages[page].extract_text() return text def summarize(file, n_words): # Read the content of the uploaded file file_path = file.name if file_path.endswith('.pdf'): text = read_pdf(file_path) else: with open(file_path, 'r', encoding='utf-8') as f: text = f.read() template = ''' Please carefully read the following document: {TEXT} After reading through the document, identify the language, and pinpoint the key points and main ideas covered in the text. Organize these key points into a concise bulleted list that summarizes the essential information from the document. The summary should consist of a maximum of 10 bullet points. Ensure that the final summary is in the language you identified from the document. Your goal is to comprehensively capture the core content of the document while expressing each summary point succinctly. Omit minor details and focus on central themes and important facts. ''' prompt = PromptTemplate( template=template, input_variables=['TEXT'] ) formatted_prompt = prompt.format(TEXT=text) output_summary = llm_engine_hf.invoke(formatted_prompt) return output_summary.content def download_summary(output_text): if output_text: file_path = Path('summary.txt') with open(file_path, 'w', encoding='utf-8') as f: f.write(output_text) return file_path else: return None def create_download_file(summary_text): file_path = download_summary(summary_text) return str(file_path) if file_path else None # Create the Gradio interface with gr.Blocks() as demo: gr.Markdown("## Document Summarizer") with gr.Row(): with gr.Column(): file = gr.File(label="Submit a file") with gr.Column(): output_text = gr.Textbox(label="Summary", lines=20) submit_button = gr.Button("Summarize") submit_button.click(summarize, inputs=[file], outputs=output_text) def generate_file(): summary_text = output_text file_path = download_summary(summary_text) return file_path download_button = gr.Button("Download Summary") download_button.click( fn=create_download_file, inputs=[output_text], outputs=gr.File() ) # Run the Gradio app demo.launch(share=True)