File size: 1,585 Bytes
c4edf1f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
import os
import json
folder_path = "d:\\Dropbox\\YandexDisk\\Dataset\\Human_Captions_done\\cleaned\\"
base_folder = "d:\\Dropbox\\YandexDisk\\Dataset\\"
tags_folder_path = "d:\\Dropbox\\YandexDisk\\Dataset\\Human_Captions_basetxt\\"
json_data = []
id_counter = 0
for filename in os.listdir(folder_path):
if filename.endswith(".jpg"):
image_name = os.path.splitext(filename)[0]
image_path = os.path.join(folder_path, filename)
txt_path = os.path.join(folder_path, f"{image_name}.txt")
if os.path.exists(txt_path):
with open(txt_path, "r") as f:
txt_content = f.read()
tags_path = os.path.join(tags_folder_path, f"{image_name}.txt")
if os.path.exists(tags_path):
with open(tags_path, "r") as f:
tags_content = f.read().strip()
prompt = f"<ImageHere> Make a caption that describe this image. Here is the tags for this image: {tags_content}"
else:
prompt = "<ImageHere> Make a caption that describe this image"
json_object = {
"id": str(id_counter),
"image": [image_path],
"conversations": [
{"from": "user", "value": prompt},
{"from": "assistant", "value": txt_content}
]
}
json_data.append(json_object)
id_counter += 1
with open(os.path.join(base_folder, "output.json"), "w") as f:
json.dump(json_data, f, indent=4) |