Spaces:

ZySec-AI
/

ZySec

Sleeping

ZySec / modules /nav_summarizer.py

vSiddi

fix files

8e29341 about 1 year ago

4.99 kB


	import os
	import streamlit as st
	from langchain_openai import ChatOpenAI
	from langchain.chains import load_summarize_chain
	from langchain_community.document_loaders import PyPDFLoader, TextLoader, UnstructuredWordDocumentLoader
	from langchain.prompts import PromptTemplate
	from modules import app_page_definitions, app_logger,app_constants,file_utils

	# Use the logger from app_config
	app_logger = app_logger.app_logger

	# Configurable batch size (4 pages per batch)
	batch_size = app_constants.SUMMARIZER_BATCH
	WORKSPACE_DIRECTORY = app_constants.WORKSPACE_DIRECTORY


	def process_file(file_path, file_type):
	if file_type == "text/plain":
	loader = TextLoader(file_path)
	elif file_type == "application/pdf":
	loader = PyPDFLoader(file_path)
	elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
	loader = UnstructuredWordDocumentLoader(file_path)
	else:
	raise ValueError(f"Unsupported file type: {file_type}")
	app_logger.info(f"Processing file {file_path} of type {file_type}")
	return loader.load_and_split()

	def app():
	app_logger.info("Starting Streamlit app - Summarizer Tool page")

	# Fetch page configuration from app_page_definitions
	page_config = app_page_definitions.PAGE_CONFIG.get("nav_summarize")

	st.title(page_config["title"])
	st.caption(page_config["caption"])
	st.session_state.current_page = "nav_summarize"

	uploaded_file = st.file_uploader("Upload your document here:", type=['txt', 'pdf', 'docx'], key="file_uploader")

	if uploaded_file is not None:
	file_path = file_utils.save_uploaded_file(uploaded_file,uploads_path=WORKSPACE_DIRECTORY + "/tmp")
	docs = process_file(file_path, uploaded_file.type)

	total_docs = len(docs)
	app_logger.info(f"Total documents processed: {total_docs}")

	if total_docs > 1:
	doc_range = st.slider("Select document range for summarization", 1, total_docs, (1, total_docs))
	else:
	doc_range = (1, 1)

	progress_bar = st.progress(0)

	if st.button("Summarize"):
	with st.spinner('Processing... Please wait'):
	llm = ChatOpenAI(
	model_name=app_constants.MODEL_NAME,
	openai_api_key=app_constants.openai_api_key,
	base_url=app_constants.local_model_uri,
	streaming=True
	)

	prompt_template = """Write a concise summary of the following:
	{text}
	CONCISE SUMMARY:"""
	prompt = PromptTemplate.from_template(prompt_template)

	refine_template = (
	"You are a content writer and your job is to produce a summary of input\n"
	"We have provided an existing summary up to a certain point: {existing_answer}\n"
	"Start and end properly and refine the existing summary "
	"with some more context below.\n"
	"------------\n"
	"{text}\n"
	"------------\n"
	"Given the new context, refine the original summary. "
	"If the context isn't useful, return the original summary."
	)
	refine_prompt = PromptTemplate.from_template(refine_template)

	chain = load_summarize_chain(
	llm=llm,
	chain_type="refine",
	question_prompt=prompt,
	refine_prompt=refine_prompt,
	return_intermediate_steps=True,
	input_key="input_documents",
	output_key="output_text",
	)

	start_doc, end_doc = doc_range
	for i in range(start_doc - 1, min(end_doc, total_docs), batch_size):
	batch_docs = docs[i:min(i + batch_size, total_docs)]

	progress_value = (i + len(batch_docs)) / total_docs
	progress_bar.progress(progress_value)

	with st.expander(f"Processing Documents {i + 1} - {i + len(batch_docs)}", expanded=False):
	intermediate_summary = chain.invoke({"input_documents": batch_docs}, return_only_outputs=True)
	st.write(intermediate_summary)

	selected_docs = docs[start_doc - 1:end_doc]
	final_summary_response = chain.invoke({"input_documents": selected_docs}, return_only_outputs=True)
	final_summary = final_summary_response['output_text'] if 'output_text' in final_summary_response else "No summary generated."
	st.text_area("Final Summary", final_summary, height=300)

	st.success("Summarization Completed!")
	progress_bar.empty()
	else:
	st.warning("Please upload a document to summarize.")
	app_logger.warning("No document uploaded for summarization")