gnumanth commited on
Commit
929f24d
·
verified ·
1 Parent(s): da3aae1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -16
app.py CHANGED
@@ -9,54 +9,94 @@ from pathlib import Path
9
  md = MarkItDown()
10
 
11
  # Configure Gemini AI
12
- genai.configure(api_key=os.getenv('GEMINI_KEY'))
13
  model = genai.GenerativeModel('gemini-2.0-flash-exp')
14
 
15
  def process_with_markitdown(input_path):
16
  """Process file or URL with MarkItDown and return text content"""
 
17
  try:
18
- result = md.convert(input_path)
19
- return result.text_content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  except Exception as e:
 
21
  return f"Error processing input: {str(e)}"
22
 
23
  def save_uploaded_file(uploaded_file):
24
  """Saves an uploaded file to a temporary location."""
 
25
  if uploaded_file is None:
 
26
  return "No file uploaded."
27
 
28
  try:
29
- # Extract filename and file object from the tuple
30
- filename, file_object = uploaded_file
31
- temp_dir = tempfile.gettempdir()
32
- temp_filename = os.path.join(temp_dir, filename)
 
 
 
 
 
 
 
 
 
 
 
33
 
 
 
 
 
34
  with open(temp_filename, 'wb') as f:
35
- f.write(file_object.read())
36
-
37
- print("****",filename, temp_filename)
38
-
39
  return temp_filename
40
 
41
  except Exception as e:
 
42
  return f"An error occurred: {str(e)}"
43
 
44
  async def summarize_text(text):
45
  """Summarize the input text using Gemini AI"""
46
  try:
47
  prompt = f"""Please provide a concise summary of the following text. Focus on the main points and key takeaways:
48
-
49
  {text}
50
-
51
  Summary:"""
52
 
53
- response = await model.generate_content_async(prompt)
 
54
  return response.text
55
  except Exception as e:
56
  return f"Error generating summary: {str(e)}"
57
 
58
  async def process_input(input_text, uploaded_file=None):
59
  """Main function to process either URL or uploaded file"""
 
60
  try:
61
  if uploaded_file is not None:
62
  # Handle file upload
@@ -115,7 +155,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
115
  file_upload = gr.File(
116
  label="Drop files here or click to upload",
117
  file_types=[
118
- ".pdf", ".docx", ".xlsx", ".csv", ".txt", ".md",
119
  ".html", ".htm", ".xml", ".json"
120
  ],
121
  file_count="single",
@@ -136,7 +176,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
136
  submit_btn.click(
137
  fn=process_input,
138
  inputs=[input_text, file_upload],
139
- outputs=output_text
 
140
  )
141
 
142
  clear_btn.click(
 
9
  md = MarkItDown()
10
 
11
  # Configure Gemini AI
12
+ genai.configure(api_key='AIzaSyBKnIWXodhUTBfJew5NuMgow0OhrR2ugvQ',transport="rest")
13
  model = genai.GenerativeModel('gemini-2.0-flash-exp')
14
 
15
  def process_with_markitdown(input_path):
16
  """Process file or URL with MarkItDown and return text content"""
17
+ print(f"[DEBUG] Starting MarkItDown processing for: {input_path}")
18
  try:
19
+ import concurrent.futures
20
+ from concurrent.futures import ThreadPoolExecutor
21
+
22
+ def convert_with_timeout():
23
+ print("[DEBUG] Attempting MarkItDown conversion")
24
+ result = md.convert(input_path)
25
+ print("[DEBUG] MarkItDown conversion successful")
26
+ if not result or not hasattr(result, 'text_content'):
27
+ print("[DEBUG] No text content in result")
28
+ return "Error: No text content found in document"
29
+ return result.text_content
30
+
31
+ # Use ThreadPoolExecutor with timeout
32
+ with ThreadPoolExecutor() as executor:
33
+ future = executor.submit(convert_with_timeout)
34
+ try:
35
+ result = future.result(timeout=30) # 30 second timeout
36
+ print("[DEBUG] Successfully got result from MarkItDown")
37
+ return result
38
+ except concurrent.futures.TimeoutError:
39
+ print("[DEBUG] MarkItDown processing timed out")
40
+ return "Error: Processing timed out after 30 seconds"
41
+
42
  except Exception as e:
43
+ print(f"[DEBUG] Error in process_with_markitdown: {str(e)}")
44
  return f"Error processing input: {str(e)}"
45
 
46
  def save_uploaded_file(uploaded_file):
47
  """Saves an uploaded file to a temporary location."""
48
+ print("[DEBUG] Starting save_uploaded_file")
49
  if uploaded_file is None:
50
+ print("[DEBUG] No file uploaded")
51
  return "No file uploaded."
52
 
53
  try:
54
+ print(f"[DEBUG] Uploaded file object type: {type(uploaded_file)}")
55
+ print(f"[DEBUG] Uploaded file name: {uploaded_file.name}")
56
+
57
+ # Get the actual file path from the uploaded file
58
+ file_path = uploaded_file.name
59
+ print(f"[DEBUG] Original file path: {file_path}")
60
+
61
+ # Read the content directly from the original file
62
+ try:
63
+ with open(file_path, 'rb') as source_file:
64
+ content = source_file.read()
65
+ print(f"[DEBUG] Successfully read {len(content)} bytes from source file")
66
+ except Exception as e:
67
+ print(f"[DEBUG] Error reading source file: {str(e)}")
68
+ return f"Error reading file: {str(e)}"
69
 
70
+ # Save to temp file
71
+ temp_dir = tempfile.gettempdir()
72
+ temp_filename = os.path.join(temp_dir, os.path.basename(file_path))
73
+
74
  with open(temp_filename, 'wb') as f:
75
+ f.write(content)
76
+
77
+ print(f"[DEBUG] File saved successfully at: {temp_filename}")
 
78
  return temp_filename
79
 
80
  except Exception as e:
81
+ print(f"[DEBUG] Error in save_uploaded_file: {str(e)}")
82
  return f"An error occurred: {str(e)}"
83
 
84
  async def summarize_text(text):
85
  """Summarize the input text using Gemini AI"""
86
  try:
87
  prompt = f"""Please provide a concise summary of the following text. Focus on the main points and key takeaways:
 
88
  {text}
 
89
  Summary:"""
90
 
91
+ # Use the synchronous version since async version isn't working as expected
92
+ response = model.generate_content(prompt)
93
  return response.text
94
  except Exception as e:
95
  return f"Error generating summary: {str(e)}"
96
 
97
  async def process_input(input_text, uploaded_file=None):
98
  """Main function to process either URL or uploaded file"""
99
+ print("[DEBUG] Starting process_input")
100
  try:
101
  if uploaded_file is not None:
102
  # Handle file upload
 
155
  file_upload = gr.File(
156
  label="Drop files here or click to upload",
157
  file_types=[
158
+ ".pdf", ".docx", ".xlsx", ".csv", ".txt",
159
  ".html", ".htm", ".xml", ".json"
160
  ],
161
  file_count="single",
 
176
  submit_btn.click(
177
  fn=process_input,
178
  inputs=[input_text, file_upload],
179
+ outputs=output_text,
180
+ api_name="process"
181
  )
182
 
183
  clear_btn.click(