import gradio as gr from transformers import pipeline import fitz import spaces # Initialize summarization pipeline summarizer = pipeline("summarization", model="facebook/bart-large-cnn") # Function to summarize text def summarize_text(text, model, max_length=1024): # Split the input text into smaller chunks chunks = [text[i:i+max_length] for i in range(0, len(text), max_length)] # Summarize each chunk separately (You can use list comprehension) summaries = [] for chunk in chunks: summary = model(chunk, max_length=150, min_length=40, do_sample=False)[0]['summary_text'] summaries.append(summary) # Concatenate the summaries into a single string summary = ' '.join(summaries) return summary # Function to read PDF and summarize def summarize_pdf(pdf_file, model): with fitz.open(pdf_file.name) as doc: text = "" for page in doc: text += page.get_text() return summarize_text(text, model) @spaces.GPU(duration=60) def summarize(input_type, input_text, uploaded_file): try: if input_type == "Text": summary = summarize_text(input_text, summarizer) else: summary = summarize_pdf(uploaded_file, summarizer) return summary except RuntimeError as e: if "CUDA out of memory" in str(e): return "There was a problem summarizing the text due to insufficient GPU memory. Please try again with a smaller input." else: return "There was a problem summarizing the text. Please try again later." except gradio.exceptions.Error as e: if "GPU task aborted" in str(e): return "The summarization process took too long and was aborted. Please try again with a smaller input." else: return "There was a problem summarizing the text. Please try again later." except Exception as e: return "There was a problem summarizing the text. Please try again later." # Define the footer footer = """
""" # Define the inputs and outputs inputs = [ gr.Radio(["Text", "PDF"], label="Input Type"), gr.Textbox(lines=10, label="Enter Text to Summarize", visible=False), gr.File(label="Upload PDF file", visible=False) ] outputs = [ gr.Textbox(label="Summary"), gr.HTML(footer) ] # Define the submit button submit_btn = gr.Button("Submit") # Define the Gradio interface with gr.Blocks(theme='gradio/soft') as app: gr.Markdown("# Text and PDF Summarization App") gr.Markdown("Note: This model can handle a maximum of 1024 tokens. A token is a unit of text that the model can process at a time. When summarizing text, the input text is split into smaller chunks of up to 1024 tokens each, and each chunk is summarized separately. The summaries are then concatenated into a single summary.") with gr.Row(): input_type = gr.Radio(["Text", "PDF"], label="Input Type") with gr.Row(): input_text = gr.Textbox(lines=10, label="Enter Text to Summarize", visible=False) uploaded_file = gr.File(label="Upload PDF file", visible=False) with gr.Row(): submit_btn = gr.Button("Submit") with gr.Row(): summary = gr.Textbox(label="Summary") with gr.Row(): footer = gr.HTML(footer) # Define the change event handler for the input type radio buttons def input_type_change(input_type): if input_type == "Text": return {input_text: gr.Textbox(visible=True), uploaded_file: gr.File(visible=False)} else: return {input_text: gr.Textbox(visible=False), uploaded_file: gr.File(visible=True)} input_type.change(fn=input_type_change, inputs=[input_type], outputs=[input_text, uploaded_file]) # Define the click event handler for the submit button submit_btn.click(fn=summarize, inputs=[input_type, input_text, uploaded_file], outputs=[summary]) # Launch the Gradio interface if __name__ == "__main__": app.launch()