astro21's picture
Update app.py
b0412ac verified
import streamlit as st
from transformers import pipeline
import pandas as pd
# Load the text summarization pipeline
summarizer = pipeline("summarization", model="astro21/bart-cls_n")
chunk_counter = 0
def summarize_text(input_text):
global chunk_counter
chunk_counter = 0
max_chunk_size = 1024
chunks = [input_text[i:i + max_chunk_size] for i in range(0, len(input_text), max_chunk_size)]
summarized_chunks = []
chunk_lengths = []
summarized_chunks_only = []
for chunk in chunks:
chunk_counter += 1
summarized_chunk = summarizer(chunk, max_length=128, min_length=64, do_sample=False)[0]['summary_text']
summarized_chunks.append(f"Chunk {chunk_counter}:\n{summarized_chunk}")
summarized_chunks_only.append(summarized_chunk)
chunk_lengths.append(len(chunk))
summarized_text = "\n".join(summarized_chunks)
summarized_text_only = "\n".join(summarized_chunks_only)
# Save the merged summary to a file
with open("summarized.txt", "w") as output_file:
output_file.write(summarized_text_only)
chunk_df = pd.DataFrame({'Chunk Number': range(1, chunk_counter + 1), 'Chunk Length': chunk_lengths})
return summarized_text_only, chunk_df, "summarized.txt"
def summarize_text_file(file):
if file is not None:
content = str(file.read(), 'utf-8')
return summarize_text(content)
st.title("Text Summarization")
st.write("Summarize text using BART")
uploaded_file = st.file_uploader("Upload a text file", type=["txt"])
if uploaded_file is not None:
result = summarize_text_file(uploaded_file)
st.subheader("Summarized Text")
st.write(result[0])
st.subheader("Chunk Information")
st.write(result[1])