File size: 2,835 Bytes
8e0293d
 
 
 
 
 
 
 
 
 
52e6894
 
 
 
 
 
 
 
 
 
ec3e9d1
 
 
8e0293d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ec3e9d1
 
8e0293d
 
 
 
 
 
 
7c6e75e
8e0293d
 
 
488ebed
 
 
 
 
8e0293d
 
488ebed
8e0293d
 
ec3e9d1
 
 
 
7c6e75e
 
 
8e0293d
 
7c6e75e
ec3e9d1
7c6e75e
 
 
8e0293d
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import json

import gradio as gr


def process_metadata(file):
    # Read the uploaded JSON file
    with open(file.name, "r") as f:
        data = json.load(f)

    # First pass: collect all unique categories
    unique_categories = set()
    for item in data["media_items"]:
        for metadata in item["metadata_items"]:
            if metadata["type"] == "object_label":
                unique_categories.add(metadata["properties"]["category_name"])

    # Create category mapping dynamically
    category_mapping = {cat: idx for idx, cat in enumerate(sorted(unique_categories))}

    # Create a formatted string of the mapping
    mapping_str = "\n".join([f"{cat}: {idx}" for cat, idx in category_mapping.items()])

    results = []
    # Process each media item
    for item in data["media_items"]:
        # Initialize lists for bboxes and categories
        bboxes = []
        categories = []

        # Process each metadata item
        for metadata in item["metadata_items"]:
            if metadata["type"] == "object_label":
                bbox = metadata["properties"]["bbox"]
                bboxes.append(bbox)

                category = category_mapping.get(
                    metadata["properties"]["category_name"], 0
                )
                categories.append(category)

        # Create output format
        output = {
            "file_name": item["file_name"],
            "objects": {"bbox": bboxes, "category": categories},
        }
        results.append(json.dumps(output))

    # Return both results and mapping
    return "\n".join(results), mapping_str


def save_text(text):
    # Create a temporary file to save the output
    temp_file = "metadata.jsonl"
    with open(temp_file, "w") as f:
        f.write(text)
    return temp_file


# Create Gradio interface
with gr.Blocks(title="VL-HF Annotations Converter") as iface:
    gr.Markdown("# VL-HF Annotations Converter")
    gr.Markdown(
        "Upload a JSON annotations file exported from Visual Layer and get a JSONL file to be used in Hugging Face AutoTrain."
    )

    with gr.Row():
        input_file = gr.File(label="Upload JSON file exported from Visual Layer")

    with gr.Row():
        output_text = gr.Textbox(label="Converted JSONL output", lines=10, scale=3)
        mapping_text = gr.Textbox(label="Category Mapping", lines=10)

    with gr.Row():
        download_btn = gr.DownloadButton(
            label="Download JSONL", value="metadata.jsonl", interactive=True
        )

    # Connect the components
    output = input_file.change(
        fn=process_metadata, inputs=[input_file], outputs=[output_text, mapping_text]
    ).then(  # Chain the save operation immediately after processing
        fn=save_text, inputs=[output_text], outputs=[download_btn]
    )

if __name__ == "__main__":
    iface.launch()