|
import pandas as pd |
|
import re |
|
import json |
|
|
|
def qfabric_semiconverted_to_geochat_dataset_format(json_file): |
|
with open(json_file) as f: |
|
data = json.load(f) |
|
for conversation_group in data: |
|
for item in conversation_group["conversations"]: |
|
|
|
item["value"] = re.sub(r"This is a satellite image :", "", item["value"]) |
|
item["value"] = re.sub(r"This is a satellite image:", "", item["value"]) |
|
item["value"] = re.sub(r"This is a high resolution, optical satellite image .*?:\s*", "", item["value"]) |
|
item["value"] = re.sub(r"This is a high resolution, optical satellite image.*?:\s*", "", item["value"]) |
|
item["value"] = re.sub(r"This is a satellite image from .*?:\s*", "", item["value"]) |
|
item["value"] = re.sub(r"This is a satellite image from.*?:\s*", "", item["value"]) |
|
|
|
item["value"] = re.sub(r'What is <identify>|this area {<', lambda x: '[identify]' if 'What is [identify]' in x.group() else '{<', item["value"]) |
|
|
|
item["value"] = re.sub(r'<video>', '', item["value"]) |
|
|
|
item["value"] = re.sub(r'this region {<', '{<', item["value"]) |
|
|
|
if '[identify]' in item["value"]: |
|
|
|
identify_index = item["value"].find('[identify]') |
|
identify_word_index = item["value"].find('Identify ', identify_index + 8) |
|
|
|
|
|
closing_brace_index = item["value"].find('>}', identify_index) |
|
return data |
|
|
|
def fmow_to_geochat_dataset_format(json_file): |
|
with open(json_file) as f: |
|
data = json.load(f) |
|
for i, entry in enumerate(data): |
|
video_count = len(entry.get("video", [])) |
|
if video_count > 1: |
|
original_videos = entry["video"] |
|
for idx in range(video_count): |
|
new_entry = entry.copy() |
|
new_entry['video'] = [original_videos[idx]] |
|
new_entry['image'] = original_videos[idx] |
|
new_entry['linked_id'] = entry['id'] |
|
new_entry['img_idx_from_video_lst_id'] = idx |
|
data.append(new_entry) |
|
else: |
|
new_entry = entry.copy() |
|
new_entry['image'] = original_videos[0] |
|
for conversation_group in data: |
|
for item in conversation_group["conversations"]: |
|
item["value"] = re.sub(r"This is a satellite image :", "", item["value"]) |
|
item["value"] = re.sub(r"This is a satellite image:", "", item["value"]) |
|
item["value"] = re.sub(r"This is a high resolution, optical satellite image .*?:\s*", "", item["value"]) |
|
item["value"] = re.sub(r"This is a high resolution, optical satellite image.*?:\s*", "", item["value"]) |
|
item["value"] = re.sub(r"This is a satellite image from .*?:\s*", "", item["value"]) |
|
item["value"] = re.sub(r"This is a satellite image from.*?:\s*", "", item["value"]) |
|
item["value"] = re.sub(r"This is a sequence of low-resolution, optical satellite images capturing the same location at different times: ", "", item["value"]) |
|
item["value"] = re.sub(r"This is a sequence of high-resolution, optical satellite images capturing the same location at different times:", "", item["value"]) |
|
item["value"] = re.sub(r"This is a sequence of satellite images capturing the same location at different times:", "", item["value"]) |
|
item["value"] = re.sub(r"This is a sequence of satellite images from .*? the same location at different times:", "", item["value"]) |
|
item["value"] = re.sub(r'This is a high resolution,? optical satellite image .*:\s*<image>\n', '\n', item["value"]) |
|
item["value"] = re.sub(r'^This is a high[- ]resolution,? .*?image:\s*<image>\n', '\n', item["value"], flags=re.IGNORECASE | re.DOTALL) |
|
|
|
|
|
item["value"] = re.sub(r'<video>', '', item["value"]) |
|
|
|
item["value"] = re.sub(r'this region {<', '{<', item["value"]) |
|
|
|
item["value"] = re.sub(r'Which of the following classes does this sequence of images belong to', 'Which of the following classes does this image belong to', item["value"]) |
|
|
|
item["value"] = re.sub(r'Please answer using only one of the following classes:', 'Please use one of the following classes:', item["value"]) |
|
|
|
if '[identify]' in item["value"]: |
|
|
|
identify_index = item["value"].find('[identify]') |
|
for i, entry in enumerate(data): |
|
video_count = len(entry.get("video", [])) |
|
if video_count > 1: |
|
data.pop(i) |
|
return data |
|
|
|
def xbd_to_geochat_dataset_format(json_file): |
|
|
|
with open(json_file) as f: |
|
data = json.load(f) |
|
|
|
new_data = [] |
|
for i, entry in enumerate(data): |
|
if entry["task"].startswith("localization"): |
|
new_entry=entry.copy() |
|
new_entry['image'] = entry['video'][0] |
|
new_data.append(new_entry) |
|
if entry["task"].startswith("classification"): |
|
new_entry=entry.copy() |
|
new_entry['image'] = entry['video'][1] |
|
new_data.append(new_entry) |
|
|
|
else: |
|
new_entry=entry.copy() |
|
new_entry['image'] = entry['video'][1] |
|
new_data.append(new_entry) |
|
|
|
for conversation_group in new_data: |
|
localization=False |
|
classification=False |
|
|
|
if conversation_group["task"].startswith("localization") or "identify" in conversation_group["task"].lower(): |
|
localization=True |
|
|
|
if conversation_group["task"].startswith("classification"): |
|
classification=True |
|
|
|
for item in conversation_group["conversations"]: |
|
item["value"] = re.sub(r"This is a satellite image :", "", item["value"]) |
|
item["value"] = re.sub(r"This is a satellite image:", "", item["value"]) |
|
item["value"] = re.sub(r"This is a high resolution, optical satellite image .*?:\s*", "", item["value"]) |
|
item["value"] = re.sub(r"This is a high resolution, optical satellite image.*?:\s*", "", item["value"]) |
|
item["value"] = re.sub(r"This is a satellite image from .*?:\s*", "", item["value"]) |
|
item["value"] = re.sub(r"These are two satellite images from .*? capturing the same location at different times: ", "", item["value"]) |
|
item["value"] = re.sub(r"These are two low-resolution, optical satellite images capturing the same location at different times:", "", item["value"]) |
|
item["value"] = re.sub(r"These are two high-resolution, optical satellite images capturing the same location at different times:", "", item["value"]) |
|
item["value"] = re.sub(r"These are two high-resolution, optical satellite images from .*? capturing the same location at different times:", "", item["value"]) |
|
item["value"] = re.sub(r"These are two satellite images capturing the same location at different times:", "", item["value"]) |
|
item["value"] = re.sub(r"These are two satellite images from .*? capturing the same location at different times:", "", item["value"]) |
|
item["value"] = re.sub(r'These are two high-resolution,? optical satellite images .*:\s*<image>\n', '<image>\n', item["value"]) |
|
item["value"] = re.sub(r'These are two high resolution,? optical satellite images .*:\s*<image>\n', '<image>\n', item["value"]) |
|
item["value"] = re.sub(r'^This is a high[- ]resolution,? .*?image:\s*<image>\n', '<image>\n', item["value"], flags=re.IGNORECASE | re.DOTALL) |
|
|
|
|
|
if classification: |
|
item["value"] = re.sub(r'<video> \n', '<image> \n [identify] ', item["value"]) |
|
item["value"] = re.sub(r' in the second image.', '.', item["value"]) |
|
elif localization: |
|
item["value"] = re.sub(r'<video> \n', '<image> \n [refer] ', item["value"]) |
|
item["value"] = re.sub(r'Image 1', 'the image', item["value"]) |
|
else: |
|
item["value"] = re.sub(r'<video> \n', '<image> \n ', item["value"]) |
|
|
|
|
|
replacements = { |
|
'Are there any buildings in the first image which have been damaged in the second image? Answer with one word.': 'Are there any damaged buildings in the image? Answer with one word.', |
|
'Have any buildings in the first image been damaged in the second image? Answer with one word.': 'Have any buildings been damaged in the area? Answer with one word.', |
|
'What disaster has occurred between the first and second image?': 'What disaster has occurred here?', |
|
'Identify the buildings in the first image which were severely damaged or destroyed in the second image. Include a bounding box of the form [x_min, y_min, x_max, y_max] for each identified building in your response. If there are no such buildings, do not output a bounding box.': 'Identify the severely damaged or destroyed buildings in the image. Include a bounding box of the form [x_min, y_min, x_max, y_max] for each identified building in your response. If there are no such buildings, do not output a bounding box.' |
|
} |
|
for old, new in replacements.items(): |
|
item['value'] = re.sub(re.escape(old), new, item['value']) |
|
|
|
|
|
|
|
item["value"] = re.sub(r'this region {<', '{<', item["value"]) |
|
|
|
item["value"] = re.sub(r'Which of the following classes does this sequence of images belong to', 'Which of the following classes does this image belong to', item["value"]) |
|
|
|
item["value"] = re.sub(r'Please answer using only one of the following classes:', 'Please use one of the following classes:', item["value"]) |
|
|
|
item["value"] = re.sub(r'\[(\d+), (\d+), (\d+), (\d+)\]', r'{<\1><\2><\3><\4>|<0>}', item["value"]) |
|
|
|
item["value"] = re.sub(r'\[(x_min), (y_min), (x_max), (y_max)\]', r'{<\1><\2><\3><\4>|<0>}', item["value"]) |
|
return new_data |
|
|
|
def s2looking_to_geochat_dataset_format(json_file): |
|
with open(json_file) as f: |
|
data = json.load(f) |
|
|
|
question = "<image>\n [refer] Identify all buildings in the image." |
|
|
|
new_dataset = [] |
|
for elem in data: |
|
for i in range(2): |
|
new_item = {} |
|
new_item['id'] = elem['id'] + '_' + str(i) |
|
new_item['metadata'] = elem['metadata'][i] |
|
new_item['original_input_polygon'] = elem['original_input_polygon'] |
|
new_item['task'] = elem['task'] |
|
new_item['image'] = elem['video'][i] |
|
new_item['geovlm_id'] = i |
|
new_item['original_conversation'] = elem['conversations'] |
|
new_item['conversations'] = [ |
|
{ |
|
"from": "human", |
|
"value": question |
|
}, |
|
{ |
|
"from": "gpt", |
|
"value": "" |
|
} |
|
] |
|
new_dataset.append(new_item) |
|
|
|
data = new_dataset |
|
|
|
for conversation_group in data: |
|
for item in conversation_group["conversations"]: |
|
|
|
if (item["value"].startswith("This is") or item["value"].startswith("These are")) and "<image>" in item["value"]: |
|
colon_index = item["value"].find(":") |
|
if colon_index != -1 and item["value"][colon_index+1:].strip().startswith("<image>"): |
|
item["value"] = item["value"][colon_index+1:].strip() |
|
item["value"] = re.sub(r"This is a sequence of high-resolution, optical satellite images from Maxar's GeoEye-1, QuickBird-2, WorldView-2, or WorldView-3 capturing the same location at different times:", "", item["value"]) |
|
item["value"] = re.sub(r"This is a sequence of low-resolution, optical satellite images from Sentinel-2 capturing the same location at different times:", "", item["value"]) |
|
item["value"] = re.sub(r"This is a satellite image :", "", item["value"]) |
|
item["value"] = re.sub(r"This is a satellite image:", "", item["value"]) |
|
item["value"] = re.sub(r"This is a high resolution, optical satellite image .*?:\s*", "", item["value"]) |
|
item["value"] = re.sub(r"This is a high resolution, optical satellite image.*?:\s*", "", item["value"]) |
|
item["value"] = re.sub(r"This is a satellite image from .*?:\s*", "", item["value"]) |
|
item["value"] = re.sub(r"This is a satellite image from.*?:\s*", "", item["value"]) |
|
|
|
item["value"] = re.sub(r'^This is a sequence of.*times:$', '', item["value"]) |
|
item["value"] = re.sub(r"This is a sequence of high-resolution, optical satellite images from .*? capturing the same location at different times:", "", item["value"]) |
|
item["value"] = re.sub(r"This is a sequence of low-resolution, optical satellite images capturing the same location at different times: ", "", item["value"]) |
|
item["value"] = re.sub(r"This is a sequence of high-resolution, optical satellite images capturing the same location at different times:", "", item["value"]) |
|
item["value"] = re.sub(r"This is a sequence of satellite images capturing the same location at different times:", "", item["value"]) |
|
item["value"] = re.sub(r"This is a sequence of satellite images from .*? the same location at different times:", "", item["value"]) |
|
item["value"] = re.sub(r"These are two high-resolution, optical satellite images capturing the same location at different times:", "", item["value"]) |
|
item["value"] = re.sub(r"This is a sequence of images from the satellites GaoFen, SuperView and BeiJing-2, capturing the same location at different times:", "", item["value"]) |
|
|
|
|
|
item["value"] = re.sub(r'<video>', '', item["value"]) |
|
|
|
item["value"] = re.sub(r'this region {<', '{<', item["value"]) |
|
|
|
item["value"] = re.sub(r'Which of the following classes does this sequence of images belong to', 'Which of the following classes does this image belong to', item["value"]) |
|
|
|
item["value"] = re.sub(r'Please answer using only one of the following classes:', 'Please use one of the following classes:', item["value"]) |
|
|
|
if '[identify]' in item["value"]: |
|
|
|
identify_index = item["value"].find('[identify]') |
|
closing_brace_index = item["value"].find('>}', identify_index) |
|
|
|
|
|
for conversation_group in data: |
|
for item in conversation_group["conversations"]: |
|
|
|
item["value"] = re.sub(r'\[(\d+), (\d+), (\d+), (\d+)\]', r'{<\1><\2><\3><\4>|<0>}', item["value"]) |
|
|
|
item["value"] = re.sub(r'\[(x_min), (y_min), (x_max), (y_max)\]', r'{<\1><\2><\3><\4>|<0>}', item["value"]) |
|
for i, entry in enumerate(data): |
|
video_count = len(entry.get("video", [])) |
|
if video_count > 1: |
|
data.pop(i) |
|
return data |
|
|
|
def check_file(file_path): |
|
with open(file_path, 'r') as file: |
|
data = json.load(file) |
|
for conversation_group in data: |
|
for item in conversation_group["conversations"]: |
|
if '<image>' not in item["value"]: |
|
if item["from"] != 'gpt': |
|
print(f"Missing <image> in: {item}") |
|
if any(sentence.strip().startswith(('This is', 'These are')) for sentence in item["value"].split('.')): |
|
print(f"Starts with 'This is' or 'These are' in: {item}") |
|
if __name__ == "__main__": |
|
|
|
|
|
fmow_0 = "/scr/geovlm/fmow_low_res_val.json" |
|
fmow_1 = "/scr/geovlm/fmow_high_res_val.json" |
|
|
|
qfabric_0 = '/scr/geovlm/QFabric/test_geochat_seqlen_5_256.json' |
|
qfabric_1 = '/scr/geovlm/QFabric/test_geochat_seqlen_2_256.json' |
|
|
|
xbd_0 = '/scr/geovlm/xbd_test_auxiliary_multi_image.json' |
|
xbd_1 = '/scr/geovlm/xbd_test_canon_classification.json' |
|
xbd_2 = '/scr/geovlm/xbd_test_canon_localization.json' |
|
|
|
print("Running conversion on all datasets, storing updated datasets in variables") |
|
|
|
from tqdm import tqdm |
|
|
|
dataset_formats = [ |
|
(fmow_to_geochat_dataset_format, fmow_0), |
|
(fmow_to_geochat_dataset_format, fmow_1), |
|
] |
|
formatted_datasets = [] |
|
for format_func, dataset in tqdm(dataset_formats, desc="Converting datasets"): |
|
if "xbd_test_auxiliary" in dataset: |
|
formatted_datasets.append(format_func(dataset)) |
|
|
|
fmow_0_formatted, fmow_1_formatted = formatted_datasets |
|
|
|
|
|
with open('/scr/geovlm/geochat_fmow_RECENT_format_low_res.json', 'w') as file: |
|
json.dump(fmow_0_formatted, file) |
|
|
|
|
|
with open('/scr/geovlm/geochat_fmow_RECENT_format_high_res.json', 'w') as file: |
|
json.dump(fmow_1_formatted, file) |
|
|
|
check_file('/scr/geovlm/geochat_fmow_RECENT_format_low_res.json') |
|
check_file('/scr/geovlm/geochat_fmow_RECENT_format_high_res.json') |
|
|