Spaces:
No application file
No application file
import tkinter as tk | |
from tkinter import filedialog, messagebox | |
import json | |
import pandas as pd | |
def read_large_json(file_path, chunk_size=1000): | |
with open(file_path, 'r') as file: | |
data = [] | |
chunk = [] | |
for line in file: | |
chunk.append(line.strip()) | |
if len(chunk) >= chunk_size: | |
try: | |
chunk_data = json.loads("[" + ",".join(chunk) + "]") | |
data.extend(chunk_data) | |
except json.JSONDecodeError: | |
chunk_data = [json.loads(line.strip()) for line in chunk if line.strip().startswith("{")] | |
data.extend(chunk_data) | |
chunk = [] | |
if chunk: | |
try: | |
chunk_data = json.loads("[" + ",".join(chunk) + "]") | |
data.extend(chunk_data) | |
except json.JSONDecodeError: | |
chunk_data = [json.loads(line.strip()) for line in chunk if line.strip().startswith("{")] | |
data.extend(chunk_data) | |
return data | |
def browse_files(): | |
file_path = filedialog.askopenfilename(filetypes=[("JSON files", "*.json")]) | |
entry_file_path.delete(0, tk.END) | |
entry_file_path.insert(0, file_path) | |
def start_scraping(): | |
file_path = entry_file_path.get() | |
try: | |
data = read_large_json(file_path) | |
flattened_data = [entry['_source'] for entry in data] | |
df = pd.DataFrame(flattened_data) | |
output_file_path = filedialog.asksaveasfilename(defaultextension=".xlsx" | "*csv", filetypes=[("Excel files", "*.xlsx" | "*.csv")]) | |
df.to_excel(output_file_path, index=False) | |
messagebox.showinfo("Success", f"Excel file has been saved to {output_file_path}") | |
except Exception as e: | |
messagebox.showerror("Error", str(e)) | |
def show_file(): | |
entry_file_path = filedialog.askopenfilename(filetypes=[("csv files", "*.csv || .xlsx")]) | |
dd = pd.read_excel(filedialog) | |
app = tk.Tk() | |
app.title("Twitter Scraper") | |
frame = tk.Frame(app) | |
frame.pack(padx=10, pady=10) | |
label_file_path = tk.Label(frame, text="JSON File Path:") | |
label_file_path.grid(row=0, column=0, padx=5, pady=5) | |
entry_file_path = tk.Entry(frame, width=50) | |
entry_file_path.grid(row=0, column=1, padx=5, pady=5) | |
button_browse = tk.Button(frame, text="Browse", command=browse_files) | |
button_browse.grid(row=0, column=2, padx=5, pady=5) | |
button_scrape = tk.Button(frame, text="Scrape", command=start_scraping) | |
button_scrape.grid(row=1, column=0, columnspan=3, pady=10) | |
app.mainloop() | |