import subprocess # Install specific version of Gradio subprocess.run(["pip", "install", "gradio==1.7.7"]) import gradio as gr import fitz import tkinter as tk from tkinter import filedialog from PIL import Image, ImageTk class PDFViewer: def __init__(self, pdf_path): self.doc = fitz.open(pdf_path) self.page = self.doc[0] # Assuming you want to work with the first page self.page_num=0 # Get the size of the first page self.page_width = int(self.page.rect.width) self.page_height = int(self.page.rect.height) # Create a Tkinter window self.root = tk.Tk() self.root.title("PDF Viewer") self.root.attributes("-topmost", True) # Put the window at the top # Create a canvas to display the PDF page self.canvas = tk.Canvas(self.root, width=self.page_width, height=self.page_height) self.canvas.pack() # Initialize scrollbar self.scrollbar = tk.Scrollbar(self.root, orient="vertical", command=self.on_scroll) self.scrollbar.pack(side="right", fill="y") self.canvas.configure(yscrollcommand=self.scrollbar.set) # Display the first page self.display_page() # Bind mouse wheel event for scrolling self.canvas.bind("", self.on_mousewheel) # Display the PDF page on the canvas pix = self.page.get_pixmap(matrix=fitz.Matrix(1, 1)) img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) self.photo = ImageTk.PhotoImage(image=img) self.canvas.create_image(0, 0, anchor=tk.NW, image=self.photo) # Variables to store mouse click coordinates self.start_x = None self.start_y = None # Bind left mouse button click and drag events self.canvas.bind("", self.on_button_press) self.canvas.bind("", self.on_move_press) self.canvas.bind("", self.on_button_release) # Initialize rectangle drawn on canvas self.rect = None def display_page(self): # Clear canvas self.canvas.delete("all") # Get the size of the page self.page = self.doc[self.page_num] self.page_width = int(self.page.rect.width) self.page_height = int(self.page.rect.height) # Display the PDF page on the canvas pix = self.page.get_pixmap() img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) self.photo = ImageTk.PhotoImage(image=img) self.canvas.create_image(0, 0, anchor=tk.NW, image=self.photo) # Update scrollbar self.scrollbar.config(command=self.canvas.yview) def on_scroll(self, *args): # Update canvas view when scrollbar is moved self.canvas.yview(*args) def on_mousewheel(self, event): # Scroll up/down when mouse wheel is moved if event.delta < 0: self.page_num += 1 else: self.page_num -= 1 self.page_num = max(0, min(self.page_num, len(self.doc) - 1)) self.display_page() def on_button_press(self, event): # Record the starting point of the selection self.start_x = self.canvas.canvasx(event.x) self.start_y = self.canvas.canvasy(event.y) # Delete any previously drawn rectangle if self.rect: self.canvas.delete(self.rect) # Draw a new rectangle starting from the clicked point self.rect = self.canvas.create_rectangle(self.start_x, self.start_y, self.start_x, self.start_y, outline='red') def on_move_press(self, event): # Update the size of the rectangle as the mouse moves cur_x = self.canvas.canvasx(event.x) cur_y = self.canvas.canvasy(event.y) self.canvas.coords(self.rect, self.start_x, self.start_y, cur_x, cur_y) def on_button_release(self, event): # Save the selected area as an image x1 = min(self.start_x, self.canvas.canvasx(event.x)) y1 = min(self.start_y, self.canvas.canvasy(event.y)) x2 = max(self.start_x, self.canvas.canvasx(event.x)) y2 = max(self.start_y, self.canvas.canvasy(event.y)) selected_area = fitz.Rect(x1, y1, x2, y2) selected_pixmap = self.page.get_pixmap(matrix=fitz.Matrix(1, 1), clip=selected_area) # Convert Pixmap to PIL Image img = Image.frombytes("RGB", [selected_pixmap.width, selected_pixmap.height], selected_pixmap.samples) # Save the selected area as an image save_path = filedialog.asksaveasfilename(defaultextension=".png", filetypes=[("PNG files", "*.png")]) if save_path: img.save(save_path) # Destroy the Tkinter window self.root.destroy() # Define the function to be called when the PDF file path is provided def main(pdf_file): # Ask user to select a PDF file pdf_path = pdf_file.name if pdf_path: PDFViewer(pdf_path).root.mainloop() return "File Saved" pdf_file = gr.inputs.File(label="Select a PDF file") # Create the Gradio interface interface = gr.Interface( fn=main, inputs=pdf_file, outputs="text", title="PDF Region Extraction", description="Select a region from a PDF file to extract.", ) interface.launch()