Spaces:

KalbeDigitalLab
/

nutrigenme-paper-extractor

Sleeping

App Files Files Community

nutrigenme-paper-extractor / table_detector.py

fadliaulawi

Initial commit

fb4710e about 2 months ago

raw history blame contribute delete

No virus

2.11 kB

	from img2table.ocr import DocTR
	from torchvision import transforms
	from transformers import AutoModelForObjectDetection

	import torch

	def box_cxcywh_to_xyxy(x):
	x_c, y_c, w, h = x.unbind(-1)
	b = [(x_c - 0.5 * w), (y_c - 0.5 * h), (x_c + 0.5 * w), (y_c + 0.5 * h)]
	return torch.stack(b, dim=1)

	def rescale_bboxes(out_bbox, size):
	width, height = size
	boxes = box_cxcywh_to_xyxy(out_bbox)
	boxes = boxes * torch.tensor(
	[width, height, width, height], dtype=torch.float32
	)
	return boxes

	def outputs_to_objects(outputs, img_size, id2label):
	m = outputs.logits.softmax(-1).max(-1)
	pred_labels = list(m.indices.detach().cpu().numpy())[0]
	pred_scores = list(m.values.detach().cpu().numpy())[0]
	pred_bboxes = outputs["pred_boxes"].detach().cpu()[0]
	pred_bboxes = [
	elem.tolist() for elem in rescale_bboxes(pred_bboxes, img_size)
	]

	objects = []
	for label, score, bbox in zip(pred_labels, pred_scores, pred_bboxes):
	class_label = id2label[int(label)]
	if not class_label == "no object":
	objects.append(
	{
	"label": class_label,
	"score": float(score),
	"bbox": [float(elem) for elem in bbox],
	}
	)

	return objects

	class MaxResize(object):
	def __init__(self, max_size=800):
	self.max_size = max_size

	def __call__(self, image):
	width, height = image.size
	current_max_size = max(width, height)
	scale = self.max_size / current_max_size
	resized_image = image.resize(
	(int(round(scale * width)), int(round(scale * height)))
	)

	return resized_image

	detection_transform = transforms.Compose(
	[
	MaxResize(800),
	transforms.ToTensor(),
	transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
	]
	)

	device = "cuda" if torch.cuda.is_available() else "cpu"
	model = AutoModelForObjectDetection.from_pretrained("microsoft/table-transformer-detection", revision="no_timm").to(device)

	ocr = DocTR()