import tkinter as tk from tkinter import filedialog, messagebox import json import pandas as pd def read_large_json(file_path, chunk_size=1000): with open(file_path, 'r') as file: data = [] chunk = [] for line in file: chunk.append(line.strip()) if len(chunk) >= chunk_size: try: chunk_data = json.loads("[" + ",".join(chunk) + "]") data.extend(chunk_data) except json.JSONDecodeError: chunk_data = [json.loads(line.strip()) for line in chunk if line.strip().startswith("{")] data.extend(chunk_data) chunk = [] if chunk: try: chunk_data = json.loads("[" + ",".join(chunk) + "]") data.extend(chunk_data) except json.JSONDecodeError: chunk_data = [json.loads(line.strip()) for line in chunk if line.strip().startswith("{")] data.extend(chunk_data) return data def browse_files(): file_path = filedialog.askopenfilename(filetypes=[("JSON files", "*.json")]) entry_file_path.delete(0, tk.END) entry_file_path.insert(0, file_path) def start_scraping(): file_path = entry_file_path.get() try: data = read_large_json(file_path) flattened_data = [entry['_source'] for entry in data] df = pd.DataFrame(flattened_data) output_file_path = filedialog.asksaveasfilename(defaultextension=".xlsx" | "*csv", filetypes=[("Excel files", "*.xlsx" | "*.csv")]) df.to_excel(output_file_path, index=False) messagebox.showinfo("Success", f"Excel file has been saved to {output_file_path}") except Exception as e: messagebox.showerror("Error", str(e)) def show_file(): entry_file_path = filedialog.askopenfilename(filetypes=[("csv files", "*.csv || .xlsx")]) dd = pd.read_excel(filedialog) app = tk.Tk() app.title("Twitter Scraper") frame = tk.Frame(app) frame.pack(padx=10, pady=10) label_file_path = tk.Label(frame, text="JSON File Path:") label_file_path.grid(row=0, column=0, padx=5, pady=5) entry_file_path = tk.Entry(frame, width=50) entry_file_path.grid(row=0, column=1, padx=5, pady=5) button_browse = tk.Button(frame, text="Browse", command=browse_files) button_browse.grid(row=0, column=2, padx=5, pady=5) button_scrape = tk.Button(frame, text="Scrape", command=start_scraping) button_scrape.grid(row=1, column=0, columnspan=3, pady=10) app.mainloop()