oceansweep commited on
Commit
9db5a77
1 Parent(s): d59d956

Update App_Function_Libraries/Article_Summarization_Lib.py

Browse files
App_Function_Libraries/Article_Summarization_Lib.py CHANGED
@@ -1,284 +1,292 @@
1
- # Article_Summarization_Lib.py
2
- #########################################
3
- # Article Summarization Library
4
- # This library is used to handle summarization of articles.
5
-
6
- #
7
- ####
8
- #
9
- ####################
10
- # Function List
11
- #
12
- # 1.
13
- #
14
- ####################
15
- #
16
- # Import necessary libraries
17
- import datetime
18
- from datetime import datetime
19
- import gradio as gr
20
- import json
21
- import os
22
- import logging
23
- import requests
24
- # 3rd-Party Imports
25
- from tqdm import tqdm
26
-
27
- from App_Function_Libraries.Utils import sanitize_filename
28
- # Local Imports
29
- from Article_Extractor_Lib import scrape_article
30
- from Local_Summarization_Lib import summarize_with_llama, summarize_with_oobabooga, summarize_with_tabbyapi, \
31
- summarize_with_vllm, summarize_with_kobold, save_summary_to_file, summarize_with_local_llm
32
- from Summarization_General_Lib import summarize_with_openai, summarize_with_anthropic, summarize_with_cohere, summarize_with_groq, summarize_with_openrouter, summarize_with_deepseek, summarize_with_huggingface
33
- from SQLite_DB import Database, create_tables, add_media_with_keywords
34
- #
35
- #######################################################################################################################
36
- # Function Definitions
37
- #
38
-
39
- def ingest_article_to_db(url, title, author, content, keywords, summary, ingestion_date, custom_prompt):
40
- try:
41
- # Check if content is not empty or whitespace
42
- if not content.strip():
43
- raise ValueError("Content is empty.")
44
-
45
- db = Database()
46
- create_tables()
47
- keyword_list = keywords.split(',') if keywords else ["default"]
48
- keyword_str = ', '.join(keyword_list)
49
-
50
- # Set default values for missing fields
51
- url = url or 'Unknown'
52
- title = title or 'Unknown'
53
- author = author or 'Unknown'
54
- keywords = keywords or 'default'
55
- summary = summary or 'No summary available'
56
- ingestion_date = ingestion_date or datetime.datetime.now().strftime('%Y-%m-%d')
57
-
58
- # Log the values of all fields before calling add_media_with_keywords
59
- logging.debug(f"URL: {url}")
60
- logging.debug(f"Title: {title}")
61
- logging.debug(f"Author: {author}")
62
- logging.debug(f"Content: {content[:50]}... (length: {len(content)})") # Log first 50 characters of content
63
- logging.debug(f"Keywords: {keywords}")
64
- logging.debug(f"Summary: {summary}")
65
- logging.debug(f"Ingestion Date: {ingestion_date}")
66
- logging.debug(f"Custom Prompt: {custom_prompt}")
67
-
68
- # Check if any required field is empty and log the specific missing field
69
- if not url:
70
- logging.error("URL is missing.")
71
- raise ValueError("URL is missing.")
72
- if not title:
73
- logging.error("Title is missing.")
74
- raise ValueError("Title is missing.")
75
- if not content:
76
- logging.error("Content is missing.")
77
- raise ValueError("Content is missing.")
78
- if not keywords:
79
- logging.error("Keywords are missing.")
80
- raise ValueError("Keywords are missing.")
81
- if not summary:
82
- logging.error("Summary is missing.")
83
- raise ValueError("Summary is missing.")
84
- if not ingestion_date:
85
- logging.error("Ingestion date is missing.")
86
- raise ValueError("Ingestion date is missing.")
87
- if not custom_prompt:
88
- logging.error("Custom prompt is missing.")
89
- raise ValueError("Custom prompt is missing.")
90
-
91
- # Add media with keywords to the database
92
- result = add_media_with_keywords(
93
- url=url,
94
- title=title,
95
- media_type='article',
96
- content=content,
97
- keywords=keyword_str or "article_default",
98
- prompt=custom_prompt or None,
99
- summary=summary or "No summary generated",
100
- transcription_model=None, # or some default value if applicable
101
- author=author or 'Unknown',
102
- ingestion_date=ingestion_date
103
- )
104
- return result
105
- except Exception as e:
106
- logging.error(f"Failed to ingest article to the database: {e}")
107
- return str(e)
108
-
109
-
110
- def scrape_and_summarize_multiple(urls, custom_prompt_arg, api_name, api_key, keywords, custom_article_titles):
111
- urls = [url.strip() for url in urls.split('\n') if url.strip()]
112
- custom_titles = custom_article_titles.split('\n') if custom_article_titles else []
113
-
114
- results = []
115
- errors = []
116
-
117
- # Create a progress bar
118
- progress = gr.Progress()
119
-
120
- for i, url in tqdm(enumerate(urls), total=len(urls), desc="Processing URLs"):
121
- custom_title = custom_titles[i] if i < len(custom_titles) else None
122
- try:
123
- result = scrape_and_summarize(url, custom_prompt_arg, api_name, api_key, keywords, custom_title)
124
- results.append(f"Results for URL {i + 1}:\n{result}")
125
- except Exception as e:
126
- error_message = f"Error processing URL {i + 1} ({url}): {str(e)}"
127
- errors.append(error_message)
128
- results.append(f"Failed to process URL {i + 1}: {url}")
129
-
130
- # Update progress
131
- progress((i + 1) / len(urls), desc=f"Processed {i + 1}/{len(urls)} URLs")
132
-
133
- # Combine results and errors
134
- combined_output = "\n".join(results)
135
- if errors:
136
- combined_output += "\n\nErrors encountered:\n" + "\n".join(errors)
137
-
138
- return combined_output
139
-
140
-
141
- def scrape_and_summarize(url, custom_prompt_arg, api_name, api_key, keywords, custom_article_title):
142
- try:
143
- # Step 1: Scrape the article
144
- article_data = scrape_article(url)
145
- print(f"Scraped Article Data: {article_data}") # Debugging statement
146
- if not article_data:
147
- return "Failed to scrape the article."
148
-
149
- # Use the custom title if provided, otherwise use the scraped title
150
- title = custom_article_title.strip() if custom_article_title else article_data.get('title', 'Untitled')
151
- author = article_data.get('author', 'Unknown')
152
- content = article_data.get('content', '')
153
- ingestion_date = datetime.now().strftime('%Y-%m-%d')
154
-
155
- print(f"Title: {title}, Author: {author}, Content Length: {len(content)}") # Debugging statement
156
-
157
- # Custom prompt for the article
158
- article_custom_prompt = custom_prompt_arg or "Summarize this article."
159
-
160
- # Step 2: Summarize the article
161
- summary = None
162
- if api_name:
163
- logging.debug(f"Article_Summarizer: Summarization being performed by {api_name}")
164
-
165
- # Sanitize filename for saving the JSON file
166
- sanitized_title = sanitize_filename(title)
167
- json_file_path = os.path.join("Results", f"{sanitized_title}_segments.json")
168
-
169
- with open(json_file_path, 'w') as json_file:
170
- json.dump([{'text': content}], json_file, indent=2)
171
-
172
- try:
173
- if api_name.lower() == 'openai':
174
- # def summarize_with_openai(api_key, input_data, custom_prompt_arg)
175
- summary = summarize_with_openai(api_key, json_file_path, article_custom_prompt)
176
-
177
- elif api_name.lower() == "anthropic":
178
- # def summarize_with_anthropic(api_key, input_data, model, custom_prompt_arg, max_retries=3, retry_delay=5):
179
- summary = summarize_with_anthropic(api_key, json_file_path, article_custom_prompt)
180
- elif api_name.lower() == "cohere":
181
- # def summarize_with_cohere(api_key, input_data, model, custom_prompt_arg)
182
- summary = summarize_with_cohere(api_key, json_file_path, article_custom_prompt)
183
-
184
- elif api_name.lower() == "groq":
185
- logging.debug(f"MAIN: Trying to summarize with groq")
186
- # def summarize_with_groq(api_key, input_data, model, custom_prompt_arg):
187
- summary = summarize_with_groq(api_key, json_file_path, article_custom_prompt)
188
-
189
- elif api_name.lower() == "openrouter":
190
- logging.debug(f"MAIN: Trying to summarize with OpenRouter")
191
- # def summarize_with_openrouter(api_key, input_data, custom_prompt_arg):
192
- summary = summarize_with_openrouter(api_key, json_file_path, article_custom_prompt)
193
-
194
- elif api_name.lower() == "deepseek":
195
- logging.debug(f"MAIN: Trying to summarize with DeepSeek")
196
- # def summarize_with_deepseek(api_key, input_data, custom_prompt_arg):
197
- summary = summarize_with_deepseek(api_key, json_file_path, article_custom_prompt)
198
-
199
- elif api_name.lower() == "llama.cpp":
200
- logging.debug(f"MAIN: Trying to summarize with Llama.cpp")
201
- # def summarize_with_llama(api_url, file_path, token, custom_prompt)
202
- summary = summarize_with_llama(json_file_path, article_custom_prompt)
203
-
204
- elif api_name.lower() == "kobold":
205
- logging.debug(f"MAIN: Trying to summarize with Kobold.cpp")
206
- # def summarize_with_kobold(input_data, kobold_api_token, custom_prompt_input, api_url):
207
- summary = summarize_with_kobold(json_file_path, api_key, article_custom_prompt)
208
-
209
- elif api_name.lower() == "ooba":
210
- # def summarize_with_oobabooga(input_data, api_key, custom_prompt, api_url):
211
- summary = summarize_with_oobabooga(json_file_path, api_key, article_custom_prompt)
212
-
213
- elif api_name.lower() == "tabbyapi":
214
- # def summarize_with_tabbyapi(input_data, tabby_model, custom_prompt_input, api_key=None, api_IP):
215
- summary = summarize_with_tabbyapi(json_file_path, article_custom_prompt)
216
-
217
- elif api_name.lower() == "vllm":
218
- logging.debug(f"MAIN: Trying to summarize with VLLM")
219
- # def summarize_with_vllm(api_key, input_data, custom_prompt_input):
220
- summary = summarize_with_vllm(json_file_path, article_custom_prompt)
221
-
222
- elif api_name.lower() == "local-llm":
223
- logging.debug(f"MAIN: Trying to summarize with Local LLM")
224
- summary = summarize_with_local_llm(json_file_path, article_custom_prompt)
225
-
226
- elif api_name.lower() == "huggingface":
227
- logging.debug(f"MAIN: Trying to summarize with huggingface")
228
- # def summarize_with_huggingface(api_key, input_data, custom_prompt_arg):
229
- summarize_with_huggingface(api_key, json_file_path, article_custom_prompt)
230
- # Add additional API handlers here...
231
- except requests.exceptions.ConnectionError as e:
232
- logging.error(f"Connection error while trying to summarize with {api_name}: {str(e)}")
233
-
234
- if summary:
235
- logging.info(f"Article_Summarizer: Summary generated using {api_name} API")
236
- save_summary_to_file(summary, json_file_path)
237
- else:
238
- summary = "Summary not available"
239
- logging.warning(f"Failed to generate summary using {api_name} API")
240
-
241
- else:
242
- summary = "Article Summarization: No API provided for summarization."
243
-
244
- print(f"Summary: {summary}") # Debugging statement
245
-
246
- # Step 3: Ingest the article into the database
247
- ingestion_result = ingest_article_to_db(url, title, author, content, keywords, summary, ingestion_date,
248
- article_custom_prompt)
249
-
250
- return f"Title: {title}\nAuthor: {author}\nIngestion Result: {ingestion_result}\n\nSummary: {summary}\n\nArticle Contents: {content}"
251
- except Exception as e:
252
- logging.error(f"Error processing URL {url}: {str(e)}")
253
- return f"Failed to process URL {url}: {str(e)}"
254
-
255
-
256
- def ingest_unstructured_text(text, custom_prompt, api_name, api_key, keywords, custom_article_title):
257
- title = custom_article_title.strip() if custom_article_title else "Unstructured Text"
258
- author = "Unknown"
259
- ingestion_date = datetime.now().strftime('%Y-%m-%d')
260
-
261
- # Summarize the unstructured text
262
- if api_name:
263
- json_file_path = f"Results/{title.replace(' ', '_')}_segments.json"
264
- with open(json_file_path, 'w') as json_file:
265
- json.dump([{'text': text}], json_file, indent=2)
266
-
267
- if api_name.lower() == 'openai':
268
- summary = summarize_with_openai(api_key, json_file_path, custom_prompt)
269
- # Add other APIs as needed
270
- else:
271
- summary = "Unsupported API."
272
- else:
273
- summary = "No API provided for summarization."
274
-
275
- # Ingest the unstructured text into the database
276
- ingestion_result = ingest_article_to_db('Unstructured Text', title, author, text, keywords, summary, ingestion_date,
277
- custom_prompt)
278
- return f"Title: {title}\nSummary: {summary}\nIngestion Result: {ingestion_result}"
279
-
280
-
281
-
282
- #
283
- #
 
 
 
 
 
 
 
 
284
  #######################################################################################################################
 
1
+ # Article_Summarization_Lib.py
2
+ #########################################
3
+ # Article Summarization Library
4
+ # This library is used to handle summarization of articles.
5
+
6
+ #
7
+ ####
8
+ #
9
+ ####################
10
+ # Function List
11
+ #
12
+ # 1.
13
+ #
14
+ ####################
15
+ #
16
+ # Import necessary libraries
17
+ import datetime
18
+ from datetime import datetime
19
+ import gradio as gr
20
+ import json
21
+ import os
22
+ import logging
23
+ import requests
24
+ # 3rd-Party Imports
25
+ from tqdm import tqdm
26
+
27
+ from App_Function_Libraries.Utils import sanitize_filename
28
+ # Local Imports
29
+ from Article_Extractor_Lib import scrape_article
30
+ from Local_Summarization_Lib import summarize_with_llama, summarize_with_oobabooga, summarize_with_tabbyapi, \
31
+ summarize_with_vllm, summarize_with_kobold, save_summary_to_file, summarize_with_local_llm
32
+ from Summarization_General_Lib import summarize_with_openai, summarize_with_anthropic, summarize_with_cohere, \
33
+ summarize_with_groq, summarize_with_openrouter, summarize_with_deepseek, summarize_with_huggingface, \
34
+ summarize_with_mistral
35
+ from SQLite_DB import Database, create_tables, add_media_with_keywords
36
+ #
37
+ #######################################################################################################################
38
+ # Function Definitions
39
+ #
40
+
41
+ def ingest_article_to_db(url, title, author, content, keywords, summary, ingestion_date, custom_prompt):
42
+ try:
43
+ # Check if content is not empty or whitespace
44
+ if not content.strip():
45
+ raise ValueError("Content is empty.")
46
+
47
+ db = Database()
48
+ create_tables()
49
+ keyword_list = keywords.split(',') if keywords else ["default"]
50
+ keyword_str = ', '.join(keyword_list)
51
+
52
+ # Set default values for missing fields
53
+ url = url or 'Unknown'
54
+ title = title or 'Unknown'
55
+ author = author or 'Unknown'
56
+ keywords = keywords or 'default'
57
+ summary = summary or 'No summary available'
58
+ ingestion_date = ingestion_date or datetime.datetime.now().strftime('%Y-%m-%d')
59
+
60
+ # Log the values of all fields before calling add_media_with_keywords
61
+ logging.debug(f"URL: {url}")
62
+ logging.debug(f"Title: {title}")
63
+ logging.debug(f"Author: {author}")
64
+ logging.debug(f"Content: {content[:50]}... (length: {len(content)})") # Log first 50 characters of content
65
+ logging.debug(f"Keywords: {keywords}")
66
+ logging.debug(f"Summary: {summary}")
67
+ logging.debug(f"Ingestion Date: {ingestion_date}")
68
+ logging.debug(f"Custom Prompt: {custom_prompt}")
69
+
70
+ # Check if any required field is empty and log the specific missing field
71
+ if not url:
72
+ logging.error("URL is missing.")
73
+ raise ValueError("URL is missing.")
74
+ if not title:
75
+ logging.error("Title is missing.")
76
+ raise ValueError("Title is missing.")
77
+ if not content:
78
+ logging.error("Content is missing.")
79
+ raise ValueError("Content is missing.")
80
+ if not keywords:
81
+ logging.error("Keywords are missing.")
82
+ raise ValueError("Keywords are missing.")
83
+ if not summary:
84
+ logging.error("Summary is missing.")
85
+ raise ValueError("Summary is missing.")
86
+ if not ingestion_date:
87
+ logging.error("Ingestion date is missing.")
88
+ raise ValueError("Ingestion date is missing.")
89
+ if not custom_prompt:
90
+ logging.error("Custom prompt is missing.")
91
+ raise ValueError("Custom prompt is missing.")
92
+
93
+ # Add media with keywords to the database
94
+ result = add_media_with_keywords(
95
+ url=url,
96
+ title=title,
97
+ media_type='article',
98
+ content=content,
99
+ keywords=keyword_str or "article_default",
100
+ prompt=custom_prompt or None,
101
+ summary=summary or "No summary generated",
102
+ transcription_model=None, # or some default value if applicable
103
+ author=author or 'Unknown',
104
+ ingestion_date=ingestion_date
105
+ )
106
+ return result
107
+ except Exception as e:
108
+ logging.error(f"Failed to ingest article to the database: {e}")
109
+ return str(e)
110
+
111
+
112
+ def scrape_and_summarize_multiple(urls, custom_prompt_arg, api_name, api_key, keywords, custom_article_titles, system_message=None):
113
+ urls = [url.strip() for url in urls.split('\n') if url.strip()]
114
+ custom_titles = custom_article_titles.split('\n') if custom_article_titles else []
115
+
116
+ results = []
117
+ errors = []
118
+
119
+ # Create a progress bar
120
+ progress = gr.Progress()
121
+
122
+ for i, url in tqdm(enumerate(urls), total=len(urls), desc="Processing URLs"):
123
+ custom_title = custom_titles[i] if i < len(custom_titles) else None
124
+ try:
125
+ result = scrape_and_summarize(url, custom_prompt_arg, api_name, api_key, keywords, custom_title, system_message)
126
+ results.append(f"Results for URL {i + 1}:\n{result}")
127
+ except Exception as e:
128
+ error_message = f"Error processing URL {i + 1} ({url}): {str(e)}"
129
+ errors.append(error_message)
130
+ results.append(f"Failed to process URL {i + 1}: {url}")
131
+
132
+ # Update progress
133
+ progress((i + 1) / len(urls), desc=f"Processed {i + 1}/{len(urls)} URLs")
134
+
135
+ # Combine results and errors
136
+ combined_output = "\n".join(results)
137
+ if errors:
138
+ combined_output += "\n\nErrors encountered:\n" + "\n".join(errors)
139
+
140
+ return combined_output
141
+
142
+
143
+ def scrape_and_summarize(url, custom_prompt_arg, api_name, api_key, keywords, custom_article_title, system_message=None):
144
+ try:
145
+ # Step 1: Scrape the article
146
+ article_data = scrape_article(url)
147
+ print(f"Scraped Article Data: {article_data}") # Debugging statement
148
+ if not article_data:
149
+ return "Failed to scrape the article."
150
+
151
+ # Use the custom title if provided, otherwise use the scraped title
152
+ title = custom_article_title.strip() if custom_article_title else article_data.get('title', 'Untitled')
153
+ author = article_data.get('author', 'Unknown')
154
+ content = article_data.get('content', '')
155
+ ingestion_date = datetime.now().strftime('%Y-%m-%d')
156
+
157
+ print(f"Title: {title}, Author: {author}, Content Length: {len(content)}") # Debugging statement
158
+
159
+ # Custom system prompt for the article
160
+ system_message = system_message or "Act as a professional summarizer and summarize this article."
161
+ # Custom prompt for the article
162
+ article_custom_prompt = custom_prompt_arg or "Act as a professional summarizer and summarize this article."
163
+
164
+ # Step 2: Summarize the article
165
+ summary = None
166
+ if api_name:
167
+ logging.debug(f"Article_Summarizer: Summarization being performed by {api_name}")
168
+
169
+ # Sanitize filename for saving the JSON file
170
+ sanitized_title = sanitize_filename(title)
171
+ json_file_path = os.path.join("Results", f"{sanitized_title}_segments.json")
172
+
173
+ with open(json_file_path, 'w') as json_file:
174
+ json.dump([{'text': content}], json_file, indent=2)
175
+
176
+ # FIXME - Swap out this if/else to use the dedicated function....
177
+ try:
178
+ if api_name.lower() == 'openai':
179
+ # def summarize_with_openai(api_key, input_data, custom_prompt_arg)
180
+ summary = summarize_with_openai(api_key, json_file_path, article_custom_prompt, system_message)
181
+
182
+ elif api_name.lower() == "anthropic":
183
+ # def summarize_with_anthropic(api_key, input_data, model, custom_prompt_arg, max_retries=3, retry_delay=5):
184
+ summary = summarize_with_anthropic(api_key, json_file_path, article_custom_prompt, system_message)
185
+ elif api_name.lower() == "cohere":
186
+ # def summarize_with_cohere(api_key, input_data, model, custom_prompt_arg)
187
+ summary = summarize_with_cohere(api_key, json_file_path, article_custom_prompt, system_message)
188
+
189
+ elif api_name.lower() == "groq":
190
+ logging.debug(f"MAIN: Trying to summarize with groq")
191
+ # def summarize_with_groq(api_key, input_data, model, custom_prompt_arg):
192
+ summary = summarize_with_groq(api_key, json_file_path, article_custom_prompt, system_message)
193
+
194
+ elif api_name.lower() == "openrouter":
195
+ logging.debug(f"MAIN: Trying to summarize with OpenRouter")
196
+ # def summarize_with_openrouter(api_key, input_data, custom_prompt_arg):
197
+ summary = summarize_with_openrouter(api_key, json_file_path, article_custom_prompt, system_message)
198
+
199
+ elif api_name.lower() == "deepseek":
200
+ logging.debug(f"MAIN: Trying to summarize with DeepSeek")
201
+ # def summarize_with_deepseek(api_key, input_data, custom_prompt_arg):
202
+ summary = summarize_with_deepseek(api_key, json_file_path, article_custom_prompt, system_message)
203
+
204
+ elif api_name.lower() == "mistral":
205
+ summary = summarize_with_mistral(api_key, json_file_path, article_custom_prompt, system_message)
206
+
207
+ elif api_name.lower() == "llama.cpp":
208
+ logging.debug(f"MAIN: Trying to summarize with Llama.cpp")
209
+ # def summarize_with_llama(api_url, file_path, token, custom_prompt)
210
+ summary = summarize_with_llama(json_file_path, article_custom_prompt, system_message)
211
+
212
+ elif api_name.lower() == "kobold":
213
+ logging.debug(f"MAIN: Trying to summarize with Kobold.cpp")
214
+ # def summarize_with_kobold(input_data, kobold_api_token, custom_prompt_input, api_url):
215
+ summary = summarize_with_kobold(json_file_path, api_key, article_custom_prompt, system_message)
216
+
217
+ elif api_name.lower() == "ooba":
218
+ # def summarize_with_oobabooga(input_data, api_key, custom_prompt, api_url):
219
+ summary = summarize_with_oobabooga(json_file_path, api_key, article_custom_prompt, system_message)
220
+
221
+ elif api_name.lower() == "tabbyapi":
222
+ # def summarize_with_tabbyapi(input_data, tabby_model, custom_prompt_input, api_key=None, api_IP):
223
+ summary = summarize_with_tabbyapi(json_file_path, article_custom_prompt, system_message)
224
+
225
+ elif api_name.lower() == "vllm":
226
+ logging.debug(f"MAIN: Trying to summarize with VLLM")
227
+ # def summarize_with_vllm(api_key, input_data, custom_prompt_input):
228
+ summary = summarize_with_vllm(json_file_path, article_custom_prompt, system_message)
229
+
230
+ elif api_name.lower() == "local-llm":
231
+ logging.debug(f"MAIN: Trying to summarize with Local LLM")
232
+ summary = summarize_with_local_llm(json_file_path, article_custom_prompt, system_message)
233
+
234
+ elif api_name.lower() == "huggingface":
235
+ logging.debug(f"MAIN: Trying to summarize with huggingface")
236
+ # def summarize_with_huggingface(api_key, input_data, custom_prompt_arg):
237
+ summarize_with_huggingface(api_key, json_file_path, article_custom_prompt, system_message)
238
+ # Add additional API handlers here...
239
+ except requests.exceptions.ConnectionError as e:
240
+ logging.error(f"Connection error while trying to summarize with {api_name}: {str(e)}")
241
+
242
+ if summary:
243
+ logging.info(f"Article_Summarizer: Summary generated using {api_name} API")
244
+ save_summary_to_file(summary, json_file_path)
245
+ else:
246
+ summary = "Summary not available"
247
+ logging.warning(f"Failed to generate summary using {api_name} API")
248
+
249
+ else:
250
+ summary = "Article Summarization: No API provided for summarization."
251
+
252
+ print(f"Summary: {summary}") # Debugging statement
253
+
254
+ # Step 3: Ingest the article into the database
255
+ ingestion_result = ingest_article_to_db(url, title, author, content, keywords, summary, ingestion_date,
256
+ article_custom_prompt)
257
+
258
+ return f"Title: {title}\nAuthor: {author}\nIngestion Result: {ingestion_result}\n\nSummary: {summary}\n\nArticle Contents: {content}"
259
+ except Exception as e:
260
+ logging.error(f"Error processing URL {url}: {str(e)}")
261
+ return f"Failed to process URL {url}: {str(e)}"
262
+
263
+
264
+ def ingest_unstructured_text(text, custom_prompt, api_name, api_key, keywords, custom_article_title, system_message=None):
265
+ title = custom_article_title.strip() if custom_article_title else "Unstructured Text"
266
+ author = "Unknown"
267
+ ingestion_date = datetime.now().strftime('%Y-%m-%d')
268
+
269
+ # Summarize the unstructured text
270
+ if api_name:
271
+ json_file_path = f"Results/{title.replace(' ', '_')}_segments.json"
272
+ with open(json_file_path, 'w') as json_file:
273
+ json.dump([{'text': text}], json_file, indent=2)
274
+
275
+ if api_name.lower() == 'openai':
276
+ summary = summarize_with_openai(api_key, json_file_path, custom_prompt, system_message)
277
+ # Add other APIs as needed
278
+ else:
279
+ summary = "Unsupported API."
280
+ else:
281
+ summary = "No API provided for summarization."
282
+
283
+ # Ingest the unstructured text into the database
284
+ ingestion_result = ingest_article_to_db('Unstructured Text', title, author, text, keywords, summary, ingestion_date,
285
+ custom_prompt)
286
+ return f"Title: {title}\nSummary: {summary}\nIngestion Result: {ingestion_result}"
287
+
288
+
289
+
290
+ #
291
+ #
292
  #######################################################################################################################