Spaces:

cstr
/

PDF-Summarizer

Running

App Files Files Community

cstr commited on Dec 6, 2024

Commit

ff1b4d3

verified ·

1 Parent(s): b14eb30

Update app.py

Browse files

Files changed (1) hide show

app.py +127 -117

app.py CHANGED Viewed

@@ -4,18 +4,34 @@ import tempfile
 import requests
 import gradio as gr
 from PyPDF2 import PdfReader
-import openai
 import logging
 # Set up logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 # Initialize Hugging Face models
 HUGGINGFACE_MODELS = {
-    "Phi-3 Mini 128k Instruct by EswardiVI": "eswardivi/Phi-3-mini-128k-instruct",
-    "Phi-3 Mini 128k Instruct by TaufiqDP": "taufiqdp/phi-3-mini-128k-instruct"
 }
 # Utility Functions
 def extract_text_from_pdf(pdf_path):
     """Extract text content from PDF file."""
@@ -71,66 +87,52 @@ def split_into_snippets(text, context_size):
     return snippets
-def build_prompts(snippets, prompt_instruction, custom_prompt):
     """Build formatted prompts from text snippets."""
     prompts = []
-    for idx, snippet in enumerate(snippets, start=1):
-        current_prompt = custom_prompt if custom_prompt else prompt_instruction
-        framed_prompt = f"---\nPart {idx} of {len(snippets)}:\n{current_prompt}\n\n{snippet}\n\nEnd of Part {idx}.\n---"
         prompts.append(framed_prompt)
-    return prompts
 def send_to_huggingface(prompt, model_name):
-    """Send prompt to Hugging Face model."""
     try:
-        payload = {"inputs": prompt}
-        response = requests.post(
-            f"https://api-inference.huggingface.co/models/{model_name}",
-            json=payload
         )
-        if response.status_code == 200:
-            return response.json()[0].get('generated_text', 'No generated text found.')
-        else:
-            error_info = response.json()
-            error_message = error_info.get('error', 'Unknown error occurred.')
-            logging.error(f"Error from Hugging Face model: {error_message}")
-            return f"Error from Hugging Face model: {error_message}"
     except Exception as e:
         logging.error(f"Error interacting with Hugging Face model: {e}")
         return f"Error interacting with Hugging Face model: {e}"
-def authenticate_openai(api_key):
-    """Authenticate with OpenAI API."""
-    if api_key:
-        try:
-            openai.api_key = api_key
-            openai.Model.list()
-            return "OpenAI Authentication Successful!"
-        except Exception as e:
-            logging.error(f"OpenAI API Key Error: {e}")
-            return f"OpenAI API Key Error: {e}"
-    return "No OpenAI API key provided."
 # Main Interface
 with gr.Blocks(theme=gr.themes.Default()) as demo:
     # Header
     gr.Markdown("# 📄 Smart PDF Summarizer")
     gr.Markdown("Upload a PDF document and get AI-powered summaries using OpenAI or Hugging Face models.")
-    # Authentication Section
-    with gr.Row():
-        with gr.Column(scale=1):
-            openai_api_key = gr.Textbox(
-                label="🔑 OpenAI API Key",
-                type="password",
-                placeholder="Enter your OpenAI API key (optional)"
-            )
-            auth_status = gr.Textbox(
-                label="Authentication Status",
-                interactive=False
-            )
-            auth_button = gr.Button("🔓 Authenticate", variant="primary")
     # Main Content
     with gr.Row():
         # Left Column - Input Options
@@ -146,18 +148,24 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
                     value="txt",
                     label="📝 Output Format"
                 )
             context_size = gr.Slider(
-                minimum=4000,
-                maximum=128000,
-                step=4000,
                 value=32000,
-                label="📏 Context Window Size"
             )
             snippet_number = gr.Number(
-                label="🔢 Snippet Number (Optional)",
-                value=None,
                 precision=0
             )
@@ -178,6 +186,14 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
                 label="🔧 Hugging Face Model",
                 visible=False
             )
         # Right Column - Output
         with gr.Column(scale=1):
@@ -194,35 +210,34 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
                 lines=10
             )
             summary_output = gr.Textbox(
                 label="📝 Summary",
                 lines=15
             )
             with gr.Row():
-                download_prompt = gr.File(
-                    label="📥 Download Prompt"
-                )
-                download_summary = gr.File(
-                    label="📥 Download Summary"
                 )
     # Event Handlers
     def toggle_hf_model(choice):
-        return gr.update(visible=choice == "Hugging Face Model")
-    def handle_authentication(api_key):
-        return authenticate_openai(api_key)
-    def process_pdf(pdf, fmt, ctx_size, snippet_num, prompt, model_selection, hf_model_choice, api_key):
         try:
             if not pdf:
-                return "Please upload a PDF file.", "", "", None, None
             # Extract text
             text = extract_text_from_pdf(pdf.name)
             if text.startswith("Error"):
-                return text, "", "", None, None
             # Format content
             formatted_text = format_content(text, fmt)
@@ -230,62 +245,42 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
             # Split into snippets
             snippets = split_into_snippets(formatted_text, ctx_size)
-            # Process specific snippet or all
-            if snippet_num is not None:
-                if 1 <= snippet_num <= len(snippets):
-                    selected_snippets = [snippets[snippet_num - 1]]
-                else:
-                    return f"Invalid snippet number. Please choose between 1 and {len(snippets)}.", "", "", None, None
-            else:
-                selected_snippets = snippets
             # Build prompts
             default_prompt = "Summarize the following text:"
-            prompts = build_prompts(selected_snippets, default_prompt, prompt)
-            full_prompt = "\n".join(prompts)
-            # Generate summary
-            if model_selection == "OpenAI ChatGPT":
-                if not api_key:
-                    return "OpenAI API key required.", full_prompt, "", None, None
-                try:
-                    openai.api_key = api_key
-                    response = openai.ChatCompletion.create(
-                        model="gpt-3.5-turbo",
-                        messages=[{"role": "user", "content": full_prompt}]
-                    )
-                    summary = response.choices[0].message.content
-                except Exception as e:
-                    return f"OpenAI API error: {str(e)}", full_prompt, "", None, None
-            else:
                 summary = send_to_huggingface(full_prompt, HUGGINGFACE_MODELS[hf_model_choice])
             # Save files for download
             with tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.txt') as prompt_file:
                 prompt_file.write(full_prompt)
-                prompt_path = prompt_file.name
-            with tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.txt') as summary_file:
-                summary_file.write(summary)
-                summary_path = summary_file.name
-            return "Processing complete!", full_prompt, summary, prompt_path, summary_path
         except Exception as e:
             logging.error(f"Error processing PDF: {e}")
-            return f"Error processing PDF: {str(e)}", "", "", None, None
     # Connect event handlers
     model_choice.change(
         toggle_hf_model,
         inputs=[model_choice],
-        outputs=[hf_model]
-    )
-    auth_button.click(
-        handle_authentication,
-        inputs=[openai_api_key],
-        outputs=[auth_status]
     )
     process_button.click(
@@ -297,35 +292,50 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
             snippet_number,
             custom_prompt,
             model_choice,
-            hf_model,
-            openai_api_key
         ],
         outputs=[
             progress_status,
             generated_prompt,
             summary_output,
-            download_prompt,
-            download_summary
         ]
     )
     # Instructions
     gr.Markdown("""
     ### 📌 Instructions:
-    1. (Optional) Enter your OpenAI API key and authenticate
-    2. Upload a PDF document
-    3. Choose output format and context window size
-    4. Optionally specify a snippet number or custom prompt
-    5. Select between OpenAI ChatGPT or Hugging Face model
-    6. Click 'Process PDF' to generate summary
-    7. Download the generated prompt and summary as needed
     ### ⚙️ Features:
     - Support for multiple PDF formats
     - Flexible text formatting options
-    - Custom prompt creation
-    - Multiple AI model options
-    - Snippet-based processing
     - Downloadable outputs
     """)

 import requests
 import gradio as gr
 from PyPDF2 import PdfReader
 import logging
+import webbrowser
+from gradio_client import Client
 # Set up logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 # Initialize Hugging Face models
 HUGGINGFACE_MODELS = {
+    "Phi-3 Mini 128k": "eswardivi/Phi-3-mini-128k-instruct",
 }
+# Common context window sizes
+CONTEXT_SIZES = {
+    "4K": 4000,
+    "8K": 8000,
+    "32K": 32000,
+    "128K": 128000,
+    "200K": 200000
+}
+def copy_to_clipboard(text):
+    return text
+def open_chatgpt():
+    webbrowser.open('https://chat.openai.com/')
+    return "Opening ChatGPT in browser..."
 # Utility Functions
 def extract_text_from_pdf(pdf_path):
     """Extract text content from PDF file."""
     return snippets
+def build_prompts(snippets, prompt_instruction, custom_prompt, snippet_num=None):
     """Build formatted prompts from text snippets."""
+    if snippet_num is not None:
+        if 1 <= snippet_num <= len(snippets):
+            selected_snippets = [snippets[snippet_num - 1]]
+        else:
+            return f"Error: Invalid snippet number. Please choose between 1 and {len(snippets)}."
+    else:
+        selected_snippets = snippets
     prompts = []
+    base_prompt = custom_prompt if custom_prompt else prompt_instruction
+    for idx, snippet in enumerate(selected_snippets, start=1):
+        if len(selected_snippets) > 1:
+            prompt_header = f"{base_prompt} Part {idx} of {len(selected_snippets)}: ---\n"
+        else:
+            prompt_header = f"{base_prompt} ---\n"
+        framed_prompt = f"{prompt_header}{snippet}\n---"
         prompts.append(framed_prompt)
+    return "\n\n".join(prompts)
 def send_to_huggingface(prompt, model_name):
+    """Send prompt to Hugging Face model using gradio_client."""
     try:
+        client = Client(model_name)
+        response = client.predict(
+            prompt,  # message
+            0.9,    # temperature
+            True,   # sampling
+            512,    # max_new_tokens
+            api_name="/chat"
         )
+        return response
     except Exception as e:
         logging.error(f"Error interacting with Hugging Face model: {e}")
         return f"Error interacting with Hugging Face model: {e}"
 # Main Interface
 with gr.Blocks(theme=gr.themes.Default()) as demo:
     # Header
     gr.Markdown("# 📄 Smart PDF Summarizer")
     gr.Markdown("Upload a PDF document and get AI-powered summaries using OpenAI or Hugging Face models.")
     # Main Content
     with gr.Row():
         # Left Column - Input Options
                     value="txt",
                     label="📝 Output Format"
                 )
+            gr.Markdown("### Context Window Size")
+            with gr.Row():
+                for size_name, size_value in CONTEXT_SIZES.items():
+                    if gr.Button(size_name).click:
+                        context_size.value = size_value
             context_size = gr.Slider(
+                minimum=1000,
+                maximum=200000,
+                step=1000,
                 value=32000,
+                label="📏 Custom Context Size"
             )
             snippet_number = gr.Number(
+                label="🔢 Snippet Number",
+                value=1,
                 precision=0
             )
                 label="🔧 Hugging Face Model",
                 visible=False
             )
+            # Authentication moved down
+            with gr.Row(visible=False) as auth_row:
+                openai_api_key = gr.Textbox(
+                    label="🔑 OpenAI API Key",
+                    type="password",
+                    placeholder="Enter your OpenAI API key (optional)"
+                )
         # Right Column - Output
         with gr.Column(scale=1):
                 lines=10
             )
+            with gr.Row():
+                copy_prompt_button = gr.Button("📋 Copy Prompt")
+                open_chatgpt_button = gr.Button("🌐 Open ChatGPT")
             summary_output = gr.Textbox(
                 label="📝 Summary",
                 lines=15
             )
             with gr.Row():
+                copy_summary_button = gr.Button("📋 Copy Summary")
+                download_files = gr.Files(
+                    label="📥 Download Files"
                 )
     # Event Handlers
     def toggle_hf_model(choice):
+        return gr.update(visible=choice == "Hugging Face Model"), gr.update(visible=choice == "OpenAI ChatGPT")
+    def process_pdf(pdf, fmt, ctx_size, snippet_num, prompt, model_selection, hf_model_choice):
         try:
             if not pdf:
+                return "Please upload a PDF file.", "", "", None
             # Extract text
             text = extract_text_from_pdf(pdf.name)
             if text.startswith("Error"):
+                return text, "", "", None
             # Format content
             formatted_text = format_content(text, fmt)
             # Split into snippets
             snippets = split_into_snippets(formatted_text, ctx_size)
             # Build prompts
             default_prompt = "Summarize the following text:"
+            full_prompt = build_prompts(snippets, default_prompt, prompt, snippet_num)
+            if isinstance(full_prompt, str) and full_prompt.startswith("Error"):
+                return full_prompt, "", "", None
+            # Generate summary based on model choice
+            if model_selection == "Hugging Face Model":
                 summary = send_to_huggingface(full_prompt, HUGGINGFACE_MODELS[hf_model_choice])
+            else:
+                summary = "Please use the Copy Prompt button and paste into ChatGPT."
             # Save files for download
+            files_to_download = []
             with tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.txt') as prompt_file:
                 prompt_file.write(full_prompt)
+                files_to_download.append(prompt_file.name)
+            if summary != "Please use the Copy Prompt button and paste into ChatGPT.":
+                with tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.txt') as summary_file:
+                    summary_file.write(summary)
+                    files_to_download.append(summary_file.name)
+            return "Processing complete!", full_prompt, summary, files_to_download
         except Exception as e:
             logging.error(f"Error processing PDF: {e}")
+            return f"Error processing PDF: {str(e)}", "", "", None
     # Connect event handlers
     model_choice.change(
         toggle_hf_model,
         inputs=[model_choice],
+        outputs=[hf_model, auth_row]
     )
     process_button.click(
             snippet_number,
             custom_prompt,
             model_choice,
+            hf_model
         ],
         outputs=[
             progress_status,
             generated_prompt,
             summary_output,
+            download_files
         ]
     )
+    copy_prompt_button.click(
+        copy_to_clipboard,
+        inputs=[generated_prompt],
+        outputs=[progress_status]
+    )
+    copy_summary_button.click(
+        copy_to_clipboard,
+        inputs=[summary_output],
+        outputs=[progress_status]
+    )
+    open_chatgpt_button.click(
+        open_chatgpt,
+        outputs=[progress_status]
+    )
     # Instructions
     gr.Markdown("""
     ### 📌 Instructions:
+    1. Upload a PDF document
+    2. Choose output format and context window size
+    3. Select snippet number (default: 1) or enter custom prompt
+    4. Select between OpenAI ChatGPT or Hugging Face model
+    5. Click 'Process PDF' to generate summary
+    6. Use 'Copy Prompt' and 'Open ChatGPT' for manual processing
+    7. Download generated files as needed
     ### ⚙️ Features:
     - Support for multiple PDF formats
     - Flexible text formatting options
+    - Predefined context window sizes (4K to 200K)
+    - Copy to clipboard functionality
+    - Direct ChatGPT integration
     - Downloadable outputs
     """)