cstr commited on
Commit
ff1b4d3
Β·
verified Β·
1 Parent(s): b14eb30

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +127 -117
app.py CHANGED
@@ -4,18 +4,34 @@ import tempfile
4
  import requests
5
  import gradio as gr
6
  from PyPDF2 import PdfReader
7
- import openai
8
  import logging
 
 
9
 
10
  # Set up logging
11
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
12
 
13
  # Initialize Hugging Face models
14
  HUGGINGFACE_MODELS = {
15
- "Phi-3 Mini 128k Instruct by EswardiVI": "eswardivi/Phi-3-mini-128k-instruct",
16
- "Phi-3 Mini 128k Instruct by TaufiqDP": "taufiqdp/phi-3-mini-128k-instruct"
17
  }
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  # Utility Functions
20
  def extract_text_from_pdf(pdf_path):
21
  """Extract text content from PDF file."""
@@ -71,66 +87,52 @@ def split_into_snippets(text, context_size):
71
 
72
  return snippets
73
 
74
- def build_prompts(snippets, prompt_instruction, custom_prompt):
75
  """Build formatted prompts from text snippets."""
 
 
 
 
 
 
 
 
76
  prompts = []
77
- for idx, snippet in enumerate(snippets, start=1):
78
- current_prompt = custom_prompt if custom_prompt else prompt_instruction
79
- framed_prompt = f"---\nPart {idx} of {len(snippets)}:\n{current_prompt}\n\n{snippet}\n\nEnd of Part {idx}.\n---"
 
 
 
 
 
 
80
  prompts.append(framed_prompt)
81
- return prompts
 
82
 
83
  def send_to_huggingface(prompt, model_name):
84
- """Send prompt to Hugging Face model."""
85
  try:
86
- payload = {"inputs": prompt}
87
- response = requests.post(
88
- f"https://api-inference.huggingface.co/models/{model_name}",
89
- json=payload
 
 
 
90
  )
91
- if response.status_code == 200:
92
- return response.json()[0].get('generated_text', 'No generated text found.')
93
- else:
94
- error_info = response.json()
95
- error_message = error_info.get('error', 'Unknown error occurred.')
96
- logging.error(f"Error from Hugging Face model: {error_message}")
97
- return f"Error from Hugging Face model: {error_message}"
98
  except Exception as e:
99
  logging.error(f"Error interacting with Hugging Face model: {e}")
100
  return f"Error interacting with Hugging Face model: {e}"
101
 
102
- def authenticate_openai(api_key):
103
- """Authenticate with OpenAI API."""
104
- if api_key:
105
- try:
106
- openai.api_key = api_key
107
- openai.Model.list()
108
- return "OpenAI Authentication Successful!"
109
- except Exception as e:
110
- logging.error(f"OpenAI API Key Error: {e}")
111
- return f"OpenAI API Key Error: {e}"
112
- return "No OpenAI API key provided."
113
-
114
  # Main Interface
115
  with gr.Blocks(theme=gr.themes.Default()) as demo:
116
  # Header
117
  gr.Markdown("# πŸ“„ Smart PDF Summarizer")
118
  gr.Markdown("Upload a PDF document and get AI-powered summaries using OpenAI or Hugging Face models.")
119
 
120
- # Authentication Section
121
- with gr.Row():
122
- with gr.Column(scale=1):
123
- openai_api_key = gr.Textbox(
124
- label="πŸ”‘ OpenAI API Key",
125
- type="password",
126
- placeholder="Enter your OpenAI API key (optional)"
127
- )
128
- auth_status = gr.Textbox(
129
- label="Authentication Status",
130
- interactive=False
131
- )
132
- auth_button = gr.Button("πŸ”“ Authenticate", variant="primary")
133
-
134
  # Main Content
135
  with gr.Row():
136
  # Left Column - Input Options
@@ -146,18 +148,24 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
146
  value="txt",
147
  label="πŸ“ Output Format"
148
  )
149
-
 
 
 
 
 
 
150
  context_size = gr.Slider(
151
- minimum=4000,
152
- maximum=128000,
153
- step=4000,
154
  value=32000,
155
- label="πŸ“ Context Window Size"
156
  )
157
 
158
  snippet_number = gr.Number(
159
- label="πŸ”’ Snippet Number (Optional)",
160
- value=None,
161
  precision=0
162
  )
163
 
@@ -178,6 +186,14 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
178
  label="πŸ”§ Hugging Face Model",
179
  visible=False
180
  )
 
 
 
 
 
 
 
 
181
 
182
  # Right Column - Output
183
  with gr.Column(scale=1):
@@ -194,35 +210,34 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
194
  lines=10
195
  )
196
 
 
 
 
 
197
  summary_output = gr.Textbox(
198
  label="πŸ“ Summary",
199
  lines=15
200
  )
201
 
202
  with gr.Row():
203
- download_prompt = gr.File(
204
- label="πŸ“₯ Download Prompt"
205
- )
206
- download_summary = gr.File(
207
- label="πŸ“₯ Download Summary"
208
  )
209
 
210
  # Event Handlers
211
  def toggle_hf_model(choice):
212
- return gr.update(visible=choice == "Hugging Face Model")
213
 
214
- def handle_authentication(api_key):
215
- return authenticate_openai(api_key)
216
-
217
- def process_pdf(pdf, fmt, ctx_size, snippet_num, prompt, model_selection, hf_model_choice, api_key):
218
  try:
219
  if not pdf:
220
- return "Please upload a PDF file.", "", "", None, None
221
 
222
  # Extract text
223
  text = extract_text_from_pdf(pdf.name)
224
  if text.startswith("Error"):
225
- return text, "", "", None, None
226
 
227
  # Format content
228
  formatted_text = format_content(text, fmt)
@@ -230,62 +245,42 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
230
  # Split into snippets
231
  snippets = split_into_snippets(formatted_text, ctx_size)
232
 
233
- # Process specific snippet or all
234
- if snippet_num is not None:
235
- if 1 <= snippet_num <= len(snippets):
236
- selected_snippets = [snippets[snippet_num - 1]]
237
- else:
238
- return f"Invalid snippet number. Please choose between 1 and {len(snippets)}.", "", "", None, None
239
- else:
240
- selected_snippets = snippets
241
-
242
  # Build prompts
243
  default_prompt = "Summarize the following text:"
244
- prompts = build_prompts(selected_snippets, default_prompt, prompt)
245
- full_prompt = "\n".join(prompts)
246
 
247
- # Generate summary
248
- if model_selection == "OpenAI ChatGPT":
249
- if not api_key:
250
- return "OpenAI API key required.", full_prompt, "", None, None
251
- try:
252
- openai.api_key = api_key
253
- response = openai.ChatCompletion.create(
254
- model="gpt-3.5-turbo",
255
- messages=[{"role": "user", "content": full_prompt}]
256
- )
257
- summary = response.choices[0].message.content
258
- except Exception as e:
259
- return f"OpenAI API error: {str(e)}", full_prompt, "", None, None
260
- else:
261
  summary = send_to_huggingface(full_prompt, HUGGINGFACE_MODELS[hf_model_choice])
 
 
262
 
263
  # Save files for download
 
 
264
  with tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.txt') as prompt_file:
265
  prompt_file.write(full_prompt)
266
- prompt_path = prompt_file.name
267
 
268
- with tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.txt') as summary_file:
269
- summary_file.write(summary)
270
- summary_path = summary_file.name
 
271
 
272
- return "Processing complete!", full_prompt, summary, prompt_path, summary_path
273
 
274
  except Exception as e:
275
  logging.error(f"Error processing PDF: {e}")
276
- return f"Error processing PDF: {str(e)}", "", "", None, None
277
 
278
  # Connect event handlers
279
  model_choice.change(
280
  toggle_hf_model,
281
  inputs=[model_choice],
282
- outputs=[hf_model]
283
- )
284
-
285
- auth_button.click(
286
- handle_authentication,
287
- inputs=[openai_api_key],
288
- outputs=[auth_status]
289
  )
290
 
291
  process_button.click(
@@ -297,35 +292,50 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
297
  snippet_number,
298
  custom_prompt,
299
  model_choice,
300
- hf_model,
301
- openai_api_key
302
  ],
303
  outputs=[
304
  progress_status,
305
  generated_prompt,
306
  summary_output,
307
- download_prompt,
308
- download_summary
309
  ]
310
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
 
312
  # Instructions
313
  gr.Markdown("""
314
  ### πŸ“Œ Instructions:
315
- 1. (Optional) Enter your OpenAI API key and authenticate
316
- 2. Upload a PDF document
317
- 3. Choose output format and context window size
318
- 4. Optionally specify a snippet number or custom prompt
319
- 5. Select between OpenAI ChatGPT or Hugging Face model
320
- 6. Click 'Process PDF' to generate summary
321
- 7. Download the generated prompt and summary as needed
322
 
323
  ### βš™οΈ Features:
324
  - Support for multiple PDF formats
325
  - Flexible text formatting options
326
- - Custom prompt creation
327
- - Multiple AI model options
328
- - Snippet-based processing
329
  - Downloadable outputs
330
  """)
331
 
 
4
  import requests
5
  import gradio as gr
6
  from PyPDF2 import PdfReader
 
7
  import logging
8
+ import webbrowser
9
+ from gradio_client import Client
10
 
11
  # Set up logging
12
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
13
 
14
  # Initialize Hugging Face models
15
  HUGGINGFACE_MODELS = {
16
+ "Phi-3 Mini 128k": "eswardivi/Phi-3-mini-128k-instruct",
 
17
  }
18
 
19
+ # Common context window sizes
20
+ CONTEXT_SIZES = {
21
+ "4K": 4000,
22
+ "8K": 8000,
23
+ "32K": 32000,
24
+ "128K": 128000,
25
+ "200K": 200000
26
+ }
27
+
28
+ def copy_to_clipboard(text):
29
+ return text
30
+
31
+ def open_chatgpt():
32
+ webbrowser.open('https://chat.openai.com/')
33
+ return "Opening ChatGPT in browser..."
34
+
35
  # Utility Functions
36
  def extract_text_from_pdf(pdf_path):
37
  """Extract text content from PDF file."""
 
87
 
88
  return snippets
89
 
90
+ def build_prompts(snippets, prompt_instruction, custom_prompt, snippet_num=None):
91
  """Build formatted prompts from text snippets."""
92
+ if snippet_num is not None:
93
+ if 1 <= snippet_num <= len(snippets):
94
+ selected_snippets = [snippets[snippet_num - 1]]
95
+ else:
96
+ return f"Error: Invalid snippet number. Please choose between 1 and {len(snippets)}."
97
+ else:
98
+ selected_snippets = snippets
99
+
100
  prompts = []
101
+ base_prompt = custom_prompt if custom_prompt else prompt_instruction
102
+
103
+ for idx, snippet in enumerate(selected_snippets, start=1):
104
+ if len(selected_snippets) > 1:
105
+ prompt_header = f"{base_prompt} Part {idx} of {len(selected_snippets)}: ---\n"
106
+ else:
107
+ prompt_header = f"{base_prompt} ---\n"
108
+
109
+ framed_prompt = f"{prompt_header}{snippet}\n---"
110
  prompts.append(framed_prompt)
111
+
112
+ return "\n\n".join(prompts)
113
 
114
  def send_to_huggingface(prompt, model_name):
115
+ """Send prompt to Hugging Face model using gradio_client."""
116
  try:
117
+ client = Client(model_name)
118
+ response = client.predict(
119
+ prompt, # message
120
+ 0.9, # temperature
121
+ True, # sampling
122
+ 512, # max_new_tokens
123
+ api_name="/chat"
124
  )
125
+ return response
 
 
 
 
 
 
126
  except Exception as e:
127
  logging.error(f"Error interacting with Hugging Face model: {e}")
128
  return f"Error interacting with Hugging Face model: {e}"
129
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  # Main Interface
131
  with gr.Blocks(theme=gr.themes.Default()) as demo:
132
  # Header
133
  gr.Markdown("# πŸ“„ Smart PDF Summarizer")
134
  gr.Markdown("Upload a PDF document and get AI-powered summaries using OpenAI or Hugging Face models.")
135
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  # Main Content
137
  with gr.Row():
138
  # Left Column - Input Options
 
148
  value="txt",
149
  label="πŸ“ Output Format"
150
  )
151
+
152
+ gr.Markdown("### Context Window Size")
153
+ with gr.Row():
154
+ for size_name, size_value in CONTEXT_SIZES.items():
155
+ if gr.Button(size_name).click:
156
+ context_size.value = size_value
157
+
158
  context_size = gr.Slider(
159
+ minimum=1000,
160
+ maximum=200000,
161
+ step=1000,
162
  value=32000,
163
+ label="πŸ“ Custom Context Size"
164
  )
165
 
166
  snippet_number = gr.Number(
167
+ label="πŸ”’ Snippet Number",
168
+ value=1,
169
  precision=0
170
  )
171
 
 
186
  label="πŸ”§ Hugging Face Model",
187
  visible=False
188
  )
189
+
190
+ # Authentication moved down
191
+ with gr.Row(visible=False) as auth_row:
192
+ openai_api_key = gr.Textbox(
193
+ label="πŸ”‘ OpenAI API Key",
194
+ type="password",
195
+ placeholder="Enter your OpenAI API key (optional)"
196
+ )
197
 
198
  # Right Column - Output
199
  with gr.Column(scale=1):
 
210
  lines=10
211
  )
212
 
213
+ with gr.Row():
214
+ copy_prompt_button = gr.Button("πŸ“‹ Copy Prompt")
215
+ open_chatgpt_button = gr.Button("🌐 Open ChatGPT")
216
+
217
  summary_output = gr.Textbox(
218
  label="πŸ“ Summary",
219
  lines=15
220
  )
221
 
222
  with gr.Row():
223
+ copy_summary_button = gr.Button("πŸ“‹ Copy Summary")
224
+ download_files = gr.Files(
225
+ label="πŸ“₯ Download Files"
 
 
226
  )
227
 
228
  # Event Handlers
229
  def toggle_hf_model(choice):
230
+ return gr.update(visible=choice == "Hugging Face Model"), gr.update(visible=choice == "OpenAI ChatGPT")
231
 
232
+ def process_pdf(pdf, fmt, ctx_size, snippet_num, prompt, model_selection, hf_model_choice):
 
 
 
233
  try:
234
  if not pdf:
235
+ return "Please upload a PDF file.", "", "", None
236
 
237
  # Extract text
238
  text = extract_text_from_pdf(pdf.name)
239
  if text.startswith("Error"):
240
+ return text, "", "", None
241
 
242
  # Format content
243
  formatted_text = format_content(text, fmt)
 
245
  # Split into snippets
246
  snippets = split_into_snippets(formatted_text, ctx_size)
247
 
 
 
 
 
 
 
 
 
 
248
  # Build prompts
249
  default_prompt = "Summarize the following text:"
250
+ full_prompt = build_prompts(snippets, default_prompt, prompt, snippet_num)
 
251
 
252
+ if isinstance(full_prompt, str) and full_prompt.startswith("Error"):
253
+ return full_prompt, "", "", None
254
+
255
+ # Generate summary based on model choice
256
+ if model_selection == "Hugging Face Model":
 
 
 
 
 
 
 
 
 
257
  summary = send_to_huggingface(full_prompt, HUGGINGFACE_MODELS[hf_model_choice])
258
+ else:
259
+ summary = "Please use the Copy Prompt button and paste into ChatGPT."
260
 
261
  # Save files for download
262
+ files_to_download = []
263
+
264
  with tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.txt') as prompt_file:
265
  prompt_file.write(full_prompt)
266
+ files_to_download.append(prompt_file.name)
267
 
268
+ if summary != "Please use the Copy Prompt button and paste into ChatGPT.":
269
+ with tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.txt') as summary_file:
270
+ summary_file.write(summary)
271
+ files_to_download.append(summary_file.name)
272
 
273
+ return "Processing complete!", full_prompt, summary, files_to_download
274
 
275
  except Exception as e:
276
  logging.error(f"Error processing PDF: {e}")
277
+ return f"Error processing PDF: {str(e)}", "", "", None
278
 
279
  # Connect event handlers
280
  model_choice.change(
281
  toggle_hf_model,
282
  inputs=[model_choice],
283
+ outputs=[hf_model, auth_row]
 
 
 
 
 
 
284
  )
285
 
286
  process_button.click(
 
292
  snippet_number,
293
  custom_prompt,
294
  model_choice,
295
+ hf_model
 
296
  ],
297
  outputs=[
298
  progress_status,
299
  generated_prompt,
300
  summary_output,
301
+ download_files
 
302
  ]
303
  )
304
+
305
+ copy_prompt_button.click(
306
+ copy_to_clipboard,
307
+ inputs=[generated_prompt],
308
+ outputs=[progress_status]
309
+ )
310
+
311
+ copy_summary_button.click(
312
+ copy_to_clipboard,
313
+ inputs=[summary_output],
314
+ outputs=[progress_status]
315
+ )
316
+
317
+ open_chatgpt_button.click(
318
+ open_chatgpt,
319
+ outputs=[progress_status]
320
+ )
321
 
322
  # Instructions
323
  gr.Markdown("""
324
  ### πŸ“Œ Instructions:
325
+ 1. Upload a PDF document
326
+ 2. Choose output format and context window size
327
+ 3. Select snippet number (default: 1) or enter custom prompt
328
+ 4. Select between OpenAI ChatGPT or Hugging Face model
329
+ 5. Click 'Process PDF' to generate summary
330
+ 6. Use 'Copy Prompt' and 'Open ChatGPT' for manual processing
331
+ 7. Download generated files as needed
332
 
333
  ### βš™οΈ Features:
334
  - Support for multiple PDF formats
335
  - Flexible text formatting options
336
+ - Predefined context window sizes (4K to 200K)
337
+ - Copy to clipboard functionality
338
+ - Direct ChatGPT integration
339
  - Downloadable outputs
340
  """)
341