Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -9,54 +9,94 @@ from pathlib import Path
|
|
9 |
md = MarkItDown()
|
10 |
|
11 |
# Configure Gemini AI
|
12 |
-
genai.configure(api_key=
|
13 |
model = genai.GenerativeModel('gemini-2.0-flash-exp')
|
14 |
|
15 |
def process_with_markitdown(input_path):
|
16 |
"""Process file or URL with MarkItDown and return text content"""
|
|
|
17 |
try:
|
18 |
-
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
except Exception as e:
|
|
|
21 |
return f"Error processing input: {str(e)}"
|
22 |
|
23 |
def save_uploaded_file(uploaded_file):
|
24 |
"""Saves an uploaded file to a temporary location."""
|
|
|
25 |
if uploaded_file is None:
|
|
|
26 |
return "No file uploaded."
|
27 |
|
28 |
try:
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
|
|
|
|
|
|
|
|
|
34 |
with open(temp_filename, 'wb') as f:
|
35 |
-
f.write(
|
36 |
-
|
37 |
-
print("
|
38 |
-
|
39 |
return temp_filename
|
40 |
|
41 |
except Exception as e:
|
|
|
42 |
return f"An error occurred: {str(e)}"
|
43 |
|
44 |
async def summarize_text(text):
|
45 |
"""Summarize the input text using Gemini AI"""
|
46 |
try:
|
47 |
prompt = f"""Please provide a concise summary of the following text. Focus on the main points and key takeaways:
|
48 |
-
|
49 |
{text}
|
50 |
-
|
51 |
Summary:"""
|
52 |
|
53 |
-
|
|
|
54 |
return response.text
|
55 |
except Exception as e:
|
56 |
return f"Error generating summary: {str(e)}"
|
57 |
|
58 |
async def process_input(input_text, uploaded_file=None):
|
59 |
"""Main function to process either URL or uploaded file"""
|
|
|
60 |
try:
|
61 |
if uploaded_file is not None:
|
62 |
# Handle file upload
|
@@ -115,7 +155,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
|
|
115 |
file_upload = gr.File(
|
116 |
label="Drop files here or click to upload",
|
117 |
file_types=[
|
118 |
-
".pdf", ".docx", ".xlsx", ".csv", ".txt",
|
119 |
".html", ".htm", ".xml", ".json"
|
120 |
],
|
121 |
file_count="single",
|
@@ -136,7 +176,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
|
|
136 |
submit_btn.click(
|
137 |
fn=process_input,
|
138 |
inputs=[input_text, file_upload],
|
139 |
-
outputs=output_text
|
|
|
140 |
)
|
141 |
|
142 |
clear_btn.click(
|
|
|
9 |
md = MarkItDown()
|
10 |
|
11 |
# Configure Gemini AI
|
12 |
+
genai.configure(api_key='AIzaSyBKnIWXodhUTBfJew5NuMgow0OhrR2ugvQ',transport="rest")
|
13 |
model = genai.GenerativeModel('gemini-2.0-flash-exp')
|
14 |
|
15 |
def process_with_markitdown(input_path):
|
16 |
"""Process file or URL with MarkItDown and return text content"""
|
17 |
+
print(f"[DEBUG] Starting MarkItDown processing for: {input_path}")
|
18 |
try:
|
19 |
+
import concurrent.futures
|
20 |
+
from concurrent.futures import ThreadPoolExecutor
|
21 |
+
|
22 |
+
def convert_with_timeout():
|
23 |
+
print("[DEBUG] Attempting MarkItDown conversion")
|
24 |
+
result = md.convert(input_path)
|
25 |
+
print("[DEBUG] MarkItDown conversion successful")
|
26 |
+
if not result or not hasattr(result, 'text_content'):
|
27 |
+
print("[DEBUG] No text content in result")
|
28 |
+
return "Error: No text content found in document"
|
29 |
+
return result.text_content
|
30 |
+
|
31 |
+
# Use ThreadPoolExecutor with timeout
|
32 |
+
with ThreadPoolExecutor() as executor:
|
33 |
+
future = executor.submit(convert_with_timeout)
|
34 |
+
try:
|
35 |
+
result = future.result(timeout=30) # 30 second timeout
|
36 |
+
print("[DEBUG] Successfully got result from MarkItDown")
|
37 |
+
return result
|
38 |
+
except concurrent.futures.TimeoutError:
|
39 |
+
print("[DEBUG] MarkItDown processing timed out")
|
40 |
+
return "Error: Processing timed out after 30 seconds"
|
41 |
+
|
42 |
except Exception as e:
|
43 |
+
print(f"[DEBUG] Error in process_with_markitdown: {str(e)}")
|
44 |
return f"Error processing input: {str(e)}"
|
45 |
|
46 |
def save_uploaded_file(uploaded_file):
|
47 |
"""Saves an uploaded file to a temporary location."""
|
48 |
+
print("[DEBUG] Starting save_uploaded_file")
|
49 |
if uploaded_file is None:
|
50 |
+
print("[DEBUG] No file uploaded")
|
51 |
return "No file uploaded."
|
52 |
|
53 |
try:
|
54 |
+
print(f"[DEBUG] Uploaded file object type: {type(uploaded_file)}")
|
55 |
+
print(f"[DEBUG] Uploaded file name: {uploaded_file.name}")
|
56 |
+
|
57 |
+
# Get the actual file path from the uploaded file
|
58 |
+
file_path = uploaded_file.name
|
59 |
+
print(f"[DEBUG] Original file path: {file_path}")
|
60 |
+
|
61 |
+
# Read the content directly from the original file
|
62 |
+
try:
|
63 |
+
with open(file_path, 'rb') as source_file:
|
64 |
+
content = source_file.read()
|
65 |
+
print(f"[DEBUG] Successfully read {len(content)} bytes from source file")
|
66 |
+
except Exception as e:
|
67 |
+
print(f"[DEBUG] Error reading source file: {str(e)}")
|
68 |
+
return f"Error reading file: {str(e)}"
|
69 |
|
70 |
+
# Save to temp file
|
71 |
+
temp_dir = tempfile.gettempdir()
|
72 |
+
temp_filename = os.path.join(temp_dir, os.path.basename(file_path))
|
73 |
+
|
74 |
with open(temp_filename, 'wb') as f:
|
75 |
+
f.write(content)
|
76 |
+
|
77 |
+
print(f"[DEBUG] File saved successfully at: {temp_filename}")
|
|
|
78 |
return temp_filename
|
79 |
|
80 |
except Exception as e:
|
81 |
+
print(f"[DEBUG] Error in save_uploaded_file: {str(e)}")
|
82 |
return f"An error occurred: {str(e)}"
|
83 |
|
84 |
async def summarize_text(text):
|
85 |
"""Summarize the input text using Gemini AI"""
|
86 |
try:
|
87 |
prompt = f"""Please provide a concise summary of the following text. Focus on the main points and key takeaways:
|
|
|
88 |
{text}
|
|
|
89 |
Summary:"""
|
90 |
|
91 |
+
# Use the synchronous version since async version isn't working as expected
|
92 |
+
response = model.generate_content(prompt)
|
93 |
return response.text
|
94 |
except Exception as e:
|
95 |
return f"Error generating summary: {str(e)}"
|
96 |
|
97 |
async def process_input(input_text, uploaded_file=None):
|
98 |
"""Main function to process either URL or uploaded file"""
|
99 |
+
print("[DEBUG] Starting process_input")
|
100 |
try:
|
101 |
if uploaded_file is not None:
|
102 |
# Handle file upload
|
|
|
155 |
file_upload = gr.File(
|
156 |
label="Drop files here or click to upload",
|
157 |
file_types=[
|
158 |
+
".pdf", ".docx", ".xlsx", ".csv", ".txt",
|
159 |
".html", ".htm", ".xml", ".json"
|
160 |
],
|
161 |
file_count="single",
|
|
|
176 |
submit_btn.click(
|
177 |
fn=process_input,
|
178 |
inputs=[input_text, file_upload],
|
179 |
+
outputs=output_text,
|
180 |
+
api_name="process"
|
181 |
)
|
182 |
|
183 |
clear_btn.click(
|