Spaces:

jirvin16
/

TEOChat

Sleeping

App Files Files Community

TEOChat / videollava /eval /ben_utils.py

jirvin16

Initial commit

134cb11 about 1 month ago

raw

history blame contribute delete

3.91 kB

	import json
	import numpy as np
	from tqdm import tqdm
	from pathlib import Path

	from videollava.constants import DEFAULT_IMAGE_TOKEN

	from infer_utils import run_inference_single


	def run_ben_inference(
	model,
	dataset_path,
	processor,
	tokenizer,
	conv_mode,
	use_video_data=False,
	open_prompt=None,
	repeat_frames=None,
	prompt_strategy="interleave",
	chronological_prefix=True,
	data_frac=1,
	data_size=None,
	delete_system_prompt=False,
	last_image=False,
	start_ind=None,
	end_ind=None,
	print_prompt=False,
	**kwargs
	):
	for k, v in kwargs.items():
	print("WARNING: Unused argument:", k, v)

	dataset_path = Path(dataset_path)
	data_dir = dataset_path.parent
	questions_path = data_dir / dataset_path.name.replace(".json", "_questions.json")
	answers_path = data_dir / dataset_path.name.replace(".json", "_answers.json")
	images_path = data_dir / dataset_path.name.replace(".json", "_images.json")

	with open(questions_path) as json_data:
	questionsJSON = json.load(json_data)

	with open(answers_path) as json_data:
	answersJSON = json.load(json_data)

	with open(images_path) as json_data:
	imagesJSON = json.load(json_data)

	if data_size is not None:
	data_size = min(data_size, len(questionsJSON))
	idx = np.random.choice(len(questionsJSON), data_size, replace=False)
	imagesJSON = [imagesJSON[i] for i in idx]
	elif data_frac < 1:
	idx = np.random.choice(len(questionsJSON), int(len(questionsJSON) * data_frac), replace=False)
	imagesJSON = [imagesJSON[i] for i in idx]

	if 'LRBEN' in str(dataset_path):
	image_folder = 'Images_LR'
	else:
	image_folder = 'Data'

	# Get the image IDs of test images
	images_ids = [img['id'] for img in imagesJSON['images'] if img['active']]

	if start_ind is not None and end_ind is not None:
	print("Subsetting data from index", start_ind, "to", end_ind)
	images_ids = images_ids[start_ind:end_ind]
	elif start_ind is not None:
	print("Subsetting data from index", start_ind, "to end")
	images_ids = images_ids[start_ind:]
	elif end_ind is not None:
	print("Subsetting data from start to index", end_ind)
	images_ids = images_ids[:end_ind]

	# Store all predicted answers
	answers = {}
	# Read image corresponding to each ID and get its associated question and answer
	for id in tqdm(images_ids):

	image_paths = [str(data_dir / image_folder / (str(id)+'.tif'))]

	for questionid in imagesJSON['images'][id]['questions_ids']:
	question = questionsJSON['questions'][questionid]
	if not question['active']:
	continue
	inp = question["question"] + " Answer with one word or number."
	inp = DEFAULT_IMAGE_TOKEN + '\n' + inp
	type_str = question["type"]
	answer_str = answersJSON['answers'][question["answers_ids"][0]]['answer']

	outputs = run_inference_single(
	model=model,
	processor=processor,
	tokenizer=tokenizer,
	conv_mode=conv_mode,
	inp=inp,
	image_paths=image_paths,
	metadata=None,
	use_video_data=use_video_data,
	repeat_frames=repeat_frames,
	prompt_strategy=prompt_strategy,
	chronological_prefix=chronological_prefix,
	delete_system_prompt=delete_system_prompt,
	last_image=last_image,
	print_prompt=print_prompt
	)

	answers[f"{id}_{questionid}"] = {
	"predicted": outputs,
	"ground_truth": answer_str,
	"task": type_str
	}

	return answers