File size: 6,518 Bytes
8fc9e84
 
 
 
 
 
 
 
 
 
 
155926c
dbed0a3
8fc9e84
 
 
929f24d
8fc9e84
929f24d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8fc9e84
929f24d
8fc9e84
7f5bd14
2c312a9
 
929f24d
2c312a9
929f24d
0f03e2f
47a9313
0f03e2f
929f24d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7f5bd14
929f24d
 
 
 
7f5bd14
929f24d
 
 
7f5bd14
f349c08
0f03e2f
929f24d
f349c08
 
8fc9e84
 
 
ce549a4
 
 
8fc9e84
929f24d
 
8fc9e84
 
 
 
 
 
929f24d
8fc9e84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5a69f4d
 
21931b3
8fc9e84
5a69f4d
 
 
0a358e5
4a5c91c
5a13c15
5a69f4d
 
 
8fc9e84
 
 
 
 
 
 
 
 
 
 
 
929f24d
8fc9e84
 
 
5a69f4d
8fc9e84
 
 
 
 
 
5a69f4d
 
 
 
21931b3
8fc9e84
5a69f4d
8fc9e84
 
 
929f24d
 
8fc9e84
 
 
5a69f4d
8fc9e84
 
 
 
 
 
0a358e5
 
5a13c15
8fc9e84
9588578
8fc9e84
5a69f4d
 
155926c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
import gradio as gr
from markitdown import MarkItDown
import google.generativeai as genai
import tempfile
import os
from pathlib import Path

# Initialize MarkItDown
md = MarkItDown()

# Configure Gemini AI
genai.configure(api_key=os.getenv('GEMINI_KEY'))
model = genai.GenerativeModel('gemini-2.0-flash-lite-preview-02-05')

def process_with_markitdown(input_path):
    """Process file or URL with MarkItDown and return text content"""
    print(f"[DEBUG] Starting MarkItDown processing for: {input_path}")
    try:
        import concurrent.futures
        from concurrent.futures import ThreadPoolExecutor

        def convert_with_timeout():
            print("[DEBUG] Attempting MarkItDown conversion")
            result = md.convert(input_path)
            print("[DEBUG] MarkItDown conversion successful")
            if not result or not hasattr(result, 'text_content'):
                print("[DEBUG] No text content in result")
                return "Error: No text content found in document"
            return result.text_content

        # Use ThreadPoolExecutor with timeout
        with ThreadPoolExecutor() as executor:
            future = executor.submit(convert_with_timeout)
            try:
                result = future.result(timeout=30)  # 30 second timeout
                print("[DEBUG] Successfully got result from MarkItDown")
                return result
            except concurrent.futures.TimeoutError:
                print("[DEBUG] MarkItDown processing timed out")
                return "Error: Processing timed out after 30 seconds"

    except Exception as e:
        print(f"[DEBUG] Error in process_with_markitdown: {str(e)}")
        return f"Error processing input: {str(e)}"
        
def save_uploaded_file(uploaded_file):
    """Saves an uploaded file to a temporary location."""
    print("[DEBUG] Starting save_uploaded_file")
    if uploaded_file is None:
        print("[DEBUG] No file uploaded")
        return "No file uploaded."

    try:
        print(f"[DEBUG] Uploaded file object type: {type(uploaded_file)}")
        print(f"[DEBUG] Uploaded file name: {uploaded_file.name}")
        
        # Get the actual file path from the uploaded file
        file_path = uploaded_file.name
        print(f"[DEBUG] Original file path: {file_path}")
        
        # Read the content directly from the original file
        try:
            with open(file_path, 'rb') as source_file:
                content = source_file.read()
                print(f"[DEBUG] Successfully read {len(content)} bytes from source file")
        except Exception as e:
            print(f"[DEBUG] Error reading source file: {str(e)}")
            return f"Error reading file: {str(e)}"

        # Save to temp file
        temp_dir = tempfile.gettempdir()
        temp_filename = os.path.join(temp_dir, os.path.basename(file_path))
        
        with open(temp_filename, 'wb') as f:
            f.write(content)
        
        print(f"[DEBUG] File saved successfully at: {temp_filename}")
        return temp_filename

    except Exception as e:
        print(f"[DEBUG] Error in save_uploaded_file: {str(e)}")
        return f"An error occurred: {str(e)}"

async def summarize_text(text):
    """Summarize the input text using Gemini AI"""
    try:
        prompt = f"""Please provide a concise summary of the following text. Focus on the main points and key takeaways:
{text}
Summary:"""
        
        # Use the synchronous version since async version isn't working as expected
        response = model.generate_content(prompt)
        return response.text
    except Exception as e:
        return f"Error generating summary: {str(e)}"

async def process_input(input_text, uploaded_file=None):
    """Main function to process either URL or uploaded file"""
    print("[DEBUG] Starting process_input")
    try:
        if uploaded_file is not None:
            # Handle file upload
            temp_path = save_uploaded_file(uploaded_file)
            if temp_path.startswith('Error'):
                return temp_path
                
            text = process_with_markitdown(temp_path)
            
            # Clean up temporary file
            try:
                os.remove(temp_path)
            except:
                pass
        
        elif input_text.startswith(('http://', 'https://')):
            # Handle URL
            text = process_with_markitdown(input_text)
        
        else:
            # Handle direct text input
            text = input_text
        
        if text.startswith('Error'):
            return text
            
        # Generate summary using Gemini AI
        summary = await summarize_text(text)
        return summary
    
    except Exception as e:
        return f"Error processing input: {str(e)}"

def clear_inputs():
    return ["", None, ""]

# Create Gradio interface with drag-and-drop
with gr.Blocks(theme=gr.themes.Soft()) as iface:
    gr.Markdown(
        """
        # Summarizeit
        > Summarize any document! Using Gemini 2.0 Flash model.
        
        Enter a URL, paste text, or drag & drop a file to get a summary.
        """
    )
    
    with gr.Row():
        input_text = gr.Textbox(
            label="Enter URL or text",
            placeholder="Enter a URL or paste text here...",
            scale=2
        )
    
    with gr.Row():
        file_upload = gr.File(
            label="Drop files here or click to upload",
            file_types=[
                ".pdf", ".docx", ".xlsx", ".csv", ".txt", 
                ".html", ".htm", ".xml", ".json"
            ],
            file_count="single",
            scale=2
        )
    
    with gr.Row():
        submit_btn = gr.Button("Summarize", variant="primary")
        clear_btn = gr.Button("Clear")
    
    output_text = gr.Textbox(
        label="Summary",
        lines=10,
        show_copy_button=True
    )
    
    # Set up event handlers
    submit_btn.click(
        fn=process_input,
        inputs=[input_text, file_upload],
        outputs=output_text,
        api_name="process"
    )
    
    clear_btn.click(
        fn=clear_inputs,
        outputs=[input_text, file_upload, output_text]
    )
    
    # Add examples
    gr.Examples(
        examples=[
            ["https://h3manth.com"],
            ["https://www.youtube.com/watch?v=bSHp7WVpPgc"],
            ["https://en.wikipedia.org/wiki/Three-body_problem"]
        ],
        inputs=input_text
    )

if __name__ == "__main__":
    iface.launch(True)