throaway2854 commited on
Commit
f21bf1a
·
verified ·
1 Parent(s): a5c4d00

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +239 -103
app.py CHANGED
@@ -1,107 +1,243 @@
1
  import gradio as gr
2
- import json
3
  import os
4
  import zipfile
5
- from datetime import datetime
6
-
7
- # Initialize dataset storage
8
- datasets = {}
9
-
10
- # Function to save dataset to JSONL file
11
- def save_dataset(dataset_name):
12
- if dataset_name in datasets:
13
- dataset = datasets[dataset_name]
14
- os.makedirs(f"{dataset_name}/images", exist_ok=True)
15
- with open(f"{dataset_name}/dataset.jsonl", "w") as f:
16
- for item in dataset:
17
- f.write(json.dumps(item) + "\n")
18
-
19
- # Function to load dataset from JSONL file
20
- def load_dataset(dataset_name, file):
21
- if file:
22
- os.makedirs(dataset_name, exist_ok=True)
23
- with open(f"{dataset_name}/dataset.jsonl", "r") as f:
24
- datasets[dataset_name] = [json.loads(line) for line in f]
25
- return f"Dataset '{dataset_name}' loaded successfully."
26
- return "No file uploaded."
27
-
28
- # Function to add image/prompt pair to dataset
29
- def add_to_dataset(dataset_name, image, prompt):
30
- if dataset_name not in datasets:
31
- datasets[dataset_name] = []
32
- image_path = f"{dataset_name}/images/{datetime.now().timestamp()}.png"
33
- image.save(image_path)
34
- datasets[dataset_name].append({"image": image_path, "prompt": prompt})
35
- return f"Added to dataset '{dataset_name}'."
36
-
37
- # Function to edit image/prompt pair in dataset
38
- def edit_dataset(dataset_name, index, new_prompt):
39
- if dataset_name in datasets and 0 <= index < len(datasets[dataset_name]):
40
- datasets[dataset_name][index]["prompt"] = new_prompt
41
- return f"Edited prompt at index {index} in dataset '{dataset_name}'."
42
- return "Invalid index."
43
-
44
- # Function to delete image/prompt pair from dataset
45
- def delete_from_dataset(dataset_name, index):
46
- if dataset_name in datasets and 0 <= index < len(datasets[dataset_name]):
47
- os.remove(datasets[dataset_name][index]["image"])
48
- datasets[dataset_name].pop(index)
49
- return f"Deleted item at index {index} from dataset '{dataset_name}'."
50
- return "Invalid index."
51
-
52
- # Function to generate and download dataset as zip file
53
- def generate_dataset(dataset_name):
54
- if dataset_name in datasets:
55
- save_dataset(dataset_name)
56
- zip_filename = f"{dataset_name}.zip"
57
- with zipfile.ZipFile(zip_filename, 'w') as zipf:
58
- for root, _, files in os.walk(dataset_name):
59
- for file in files:
60
- zipf.write(os.path.join(root, file))
61
- return zip_filename
62
- return "Dataset not found."
63
-
64
- # Gradio interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  with gr.Blocks() as demo:
66
- gr.Markdown("# Dataset Builder")
67
-
68
- with gr.Row():
69
- with gr.Column(scale=2):
70
- gr.Markdown("## Create/Edit Dataset")
71
- dataset_name = gr.Textbox(label="Dataset Name")
72
- image = gr.Image(label="Upload Image")
73
- prompt = gr.Textbox(label="Prompt")
74
- add_button = gr.Button("Add to Dataset")
75
- add_output = gr.Textbox()
76
- add_button.click(add_to_dataset, inputs=[dataset_name, image, prompt], outputs=add_output)
77
-
78
- index = gr.Number(label="Index to Edit/Delete", precision=0)
79
- new_prompt = gr.Textbox(label="New Prompt")
80
- edit_button = gr.Button("Edit Prompt")
81
- edit_output = gr.Textbox()
82
- edit_button.click(edit_dataset, inputs=[dataset_name, index, new_prompt], outputs=edit_output)
83
-
84
- delete_button = gr.Button("Delete from Dataset")
85
- delete_output = gr.Textbox()
86
- delete_button.click(delete_from_dataset, inputs=[dataset_name, index], outputs=delete_output)
87
-
88
- with gr.Column(scale=1):
89
- gr.Markdown("## Upload Dataset")
90
- upload_dataset_name = gr.Textbox(label="Dataset Name")
91
- upload_file = gr.File(label="Upload JSONL File")
92
- upload_button = gr.Button("Upload Dataset")
93
- upload_output = gr.Textbox()
94
- upload_button.click(load_dataset, inputs=[upload_dataset_name, upload_file], outputs=upload_output)
95
-
96
- gr.Markdown("## Generate/Download Dataset")
97
- generate_button = gr.Button("Generate Dataset")
98
- generate_output = gr.File()
99
- generate_button.click(generate_dataset, inputs=[dataset_name], outputs=generate_output)
100
-
101
- gr.Markdown("## View Dataset")
102
- view_dataset_name = gr.Textbox(label="Dataset Name")
103
- view_button = gr.Button("View Dataset")
104
- view_output = gr.JSON()
105
- view_button.click(lambda name: datasets.get(name, []), inputs=[view_dataset_name], outputs=view_output)
106
-
107
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
2
  import os
3
  import zipfile
4
+ import json
5
+ from io import BytesIO
6
+ import base64
7
+ from PIL import Image
8
+ import uuid
9
+ import tempfile
10
+
11
+ def save_dataset_to_zip(dataset_name, dataset):
12
+ # Create a temporary directory
13
+ temp_dir = tempfile.mkdtemp()
14
+ dataset_path = os.path.join(temp_dir, dataset_name)
15
+ os.makedirs(dataset_path, exist_ok=True)
16
+ images_dir = os.path.join(dataset_path, 'images')
17
+ os.makedirs(images_dir, exist_ok=True)
18
+ annotations = []
19
+ for idx, entry in enumerate(dataset):
20
+ image_data = entry['image']
21
+ prompt = entry['prompt']
22
+ # Save image to images directory
23
+ image_filename = f"{uuid.uuid4().hex}.png"
24
+ image_path = os.path.join(images_dir, image_filename)
25
+ image = Image.open(BytesIO(base64.b64decode(image_data.split(",")[1])))
26
+ image.save(image_path)
27
+ # Add annotation
28
+ annotations.append({
29
+ 'file_name': os.path.join('images', image_filename),
30
+ 'text': prompt
31
+ })
32
+ # Save annotations to JSONL file
33
+ annotations_path = os.path.join(dataset_path, 'annotations.jsonl')
34
+ with open(annotations_path, 'w') as f:
35
+ for ann in annotations:
36
+ f.write(json.dumps(ann) + '\n')
37
+ # Create a zip file
38
+ zip_buffer = BytesIO()
39
+ with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zipf:
40
+ for root, dirs, files in os.walk(dataset_path):
41
+ for file in files:
42
+ abs_file = os.path.join(root, file)
43
+ rel_file = os.path.relpath(abs_file, dataset_path)
44
+ zipf.write(abs_file, rel_file)
45
+ zip_buffer.seek(0)
46
+ return zip_buffer
47
+
48
+ def load_dataset_from_zip(zip_file):
49
+ temp_dir = tempfile.mkdtemp()
50
+ with zipfile.ZipFile(zip_file.name, 'r') as zip_ref:
51
+ zip_ref.extractall(temp_dir)
52
+ # Assuming the dataset folder is the first folder in the zip
53
+ dataset_name = os.listdir(temp_dir)[0]
54
+ dataset_path = os.path.join(temp_dir, dataset_name)
55
+ dataset = []
56
+ images_dir = os.path.join(dataset_path, 'images')
57
+ annotations_path = os.path.join(dataset_path, 'annotations.jsonl')
58
+ if os.path.exists(annotations_path):
59
+ with open(annotations_path, 'r') as f:
60
+ for line in f:
61
+ ann = json.loads(line)
62
+ file_name = ann['file_name']
63
+ prompt = ann['text']
64
+ image_path = os.path.join(dataset_path, file_name)
65
+ # Read image and convert to base64
66
+ with open(image_path, 'rb') as img_f:
67
+ image_bytes = img_f.read()
68
+ encoded = base64.b64encode(image_bytes).decode()
69
+ mime_type = "image/png"
70
+ image_data = f"data:{mime_type};base64,{encoded}"
71
+ dataset.append({
72
+ 'image': image_data,
73
+ 'prompt': prompt
74
+ })
75
+ return dataset_name, dataset
76
+
77
+ def display_dataset_html(dataset):
78
+ if dataset:
79
+ html_content = ""
80
+ for idx, entry in enumerate(dataset):
81
+ image_data = entry['image']
82
+ prompt = entry['prompt']
83
+ html_content += f"""
84
+ <div style="display: flex; align-items: center; margin-bottom: 10px;">
85
+ <div style="width: 50px;">{idx}</div>
86
+ <img src="{image_data}" alt="Image {idx}" style="max-height: 100px; margin-right: 10px;"/>
87
+ <div>{prompt}</div>
88
+ </div>
89
+ """
90
+ return html_content
91
+ else:
92
+ return "<div>No entries in dataset.</div>"
93
+
94
  with gr.Blocks() as demo:
95
+ gr.Markdown("<h1 style='text-align: center; margin-bottom: 20px;'>Dataset Builder</h1>")
96
+ datasets = gr.State({})
97
+ current_dataset_name = gr.State("")
98
+ dataset_selector = gr.Dropdown(label="Select Dataset", interactive=True)
99
+ dataset_html = gr.HTML()
100
+ message_box = gr.Textbox(interactive=False, label="Message")
101
+
102
+ with gr.Tab("Create / Upload Dataset"):
103
+ with gr.Row():
104
+ with gr.Column():
105
+ gr.Markdown("### Create a New Dataset")
106
+ dataset_name_input = gr.Textbox(label="New Dataset Name")
107
+ create_button = gr.Button("Create Dataset")
108
+ with gr.Column():
109
+ gr.Markdown("### Upload Existing Dataset")
110
+ upload_input = gr.File(label="Upload Dataset Zip", file_types=['.zip'])
111
+ upload_button = gr.Button("Upload Dataset")
112
+
113
+ def create_dataset(name, datasets):
114
+ if not name:
115
+ return gr.update(), "Please enter a dataset name."
116
+ if name in datasets:
117
+ return gr.update(), f"Dataset '{name}' already exists."
118
+ datasets[name] = []
119
+ return gr.Dropdown.update(choices=list(datasets.keys()), value=name), f"Dataset '{name}' created."
120
+
121
+ create_button.click(
122
+ create_dataset,
123
+ inputs=[dataset_name_input, datasets],
124
+ outputs=[dataset_selector, message_box]
125
+ )
126
+
127
+ def upload_dataset(zip_file, datasets):
128
+ if zip_file is None:
129
+ return gr.update(), "Please upload a zip file."
130
+ dataset_name, dataset = load_dataset_from_zip(zip_file)
131
+ if dataset_name in datasets:
132
+ return gr.update(), f"Dataset '{dataset_name}' already exists."
133
+ datasets[dataset_name] = dataset
134
+ return gr.Dropdown.update(choices=list(datasets.keys()), value=dataset_name), f"Dataset '{dataset_name}' uploaded."
135
+
136
+ upload_button.click(
137
+ upload_dataset,
138
+ inputs=[upload_input, datasets],
139
+ outputs=[dataset_selector, message_box]
140
+ )
141
+
142
+ def select_dataset(dataset_name, datasets):
143
+ if dataset_name in datasets:
144
+ dataset = datasets[dataset_name]
145
+ html_content = display_dataset_html(dataset)
146
+ return current_dataset_name.update(value=dataset_name), dataset_html.update(value=html_content), ""
147
+ else:
148
+ return current_dataset_name.update(value=""), dataset_html.update(value="<div>Select a dataset.</div>"), ""
149
+
150
+ dataset_selector.change(
151
+ select_dataset,
152
+ inputs=[dataset_selector, datasets],
153
+ outputs=[current_dataset_name, dataset_html, message_box]
154
+ )
155
+
156
+ with gr.Tab("Add Entry"):
157
+ with gr.Row():
158
+ image_input = gr.Image(label="Upload Image")
159
+ prompt_input = gr.Textbox(label="Prompt")
160
+ add_button = gr.Button("Add Entry")
161
+
162
+ def add_entry(image_data, prompt, current_dataset_name, datasets):
163
+ if not current_dataset_name:
164
+ return datasets, dataset_html.update(), "No dataset selected."
165
+ if image_data is None or not prompt:
166
+ return datasets, dataset_html.update(), "Please provide both an image and a prompt."
167
+ datasets[current_dataset_name].append({'image': image_data, 'prompt': prompt})
168
+ html_content = display_dataset_html(datasets[current_dataset_name])
169
+ return datasets, dataset_html.update(value=html_content), f"Entry added to dataset '{current_dataset_name}'."
170
+
171
+ add_button.click(
172
+ add_entry,
173
+ inputs=[image_input, prompt_input, current_dataset_name, datasets],
174
+ outputs=[datasets, dataset_html, message_box]
175
+ )
176
+
177
+ with gr.Tab("Edit / Delete Entry"):
178
+ index_input = gr.Number(label="Entry Index", precision=0)
179
+ new_prompt_input = gr.Textbox(label="New Prompt (for Edit)")
180
+ with gr.Row():
181
+ edit_button = gr.Button("Edit Entry")
182
+ delete_button = gr.Button("Delete Entry")
183
+
184
+ def edit_entry(index, new_prompt, current_dataset_name, datasets):
185
+ if not current_dataset_name:
186
+ return datasets, dataset_html.update(), "No dataset selected."
187
+ if index is None or not new_prompt:
188
+ return datasets, dataset_html.update(), "Please provide both index and new prompt."
189
+ index = int(index)
190
+ if 0 <= index < len(datasets[current_dataset_name]):
191
+ datasets[current_dataset_name][index]['prompt'] = new_prompt
192
+ html_content = display_dataset_html(datasets[current_dataset_name])
193
+ return datasets, dataset_html.update(value=html_content), f"Entry {index} updated."
194
+ else:
195
+ return datasets, dataset_html.update(), "Invalid index."
196
+
197
+ edit_button.click(
198
+ edit_entry,
199
+ inputs=[index_input, new_prompt_input, current_dataset_name, datasets],
200
+ outputs=[datasets, dataset_html, message_box]
201
+ )
202
+
203
+ def delete_entry(index, current_dataset_name, datasets):
204
+ if not current_dataset_name:
205
+ return datasets, dataset_html.update(), "No dataset selected."
206
+ if index is None:
207
+ return datasets, dataset_html.update(), "Please provide an index."
208
+ index = int(index)
209
+ if 0 <= index < len(datasets[current_dataset_name]):
210
+ del datasets[current_dataset_name][index]
211
+ html_content = display_dataset_html(datasets[current_dataset_name])
212
+ return datasets, dataset_html.update(value=html_content), f"Entry {index} deleted."
213
+ else:
214
+ return datasets, dataset_html.update(), "Invalid index."
215
+
216
+ delete_button.click(
217
+ delete_entry,
218
+ inputs=[index_input, current_dataset_name, datasets],
219
+ outputs=[datasets, dataset_html, message_box]
220
+ )
221
+
222
+ with gr.Tab("Download Dataset"):
223
+ download_button = gr.Button("Download Dataset")
224
+ download_output = gr.File(label="Download Zip")
225
+ def download_dataset(current_dataset_name, datasets):
226
+ if not current_dataset_name:
227
+ return None, "No dataset selected."
228
+ zip_buffer = save_dataset_to_zip(current_dataset_name, datasets[current_dataset_name])
229
+ return zip_buffer.getvalue(), f"Dataset '{current_dataset_name}' is ready for download."
230
+ download_button.click(
231
+ download_dataset,
232
+ inputs=[current_dataset_name, datasets],
233
+ outputs=[download_output, message_box]
234
+ )
235
+
236
+ # Initially update dataset_html
237
+ demo.load(
238
+ lambda datasets: gr.Dropdown.update(choices=list(datasets.keys())),
239
+ inputs=None,
240
+ outputs=dataset_selector
241
+ )
242
+
243
+ demo.launch()