Spaces:
Runtime error
Runtime error
Upload utils.py
Browse files
utils.py
ADDED
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# import random
|
2 |
+
import torch
|
3 |
+
import math
|
4 |
+
from torch.nn.utils.rnn import pad_sequence
|
5 |
+
|
6 |
+
|
7 |
+
def find_pad_idx(boxes):
|
8 |
+
for i, j in enumerate(boxes):
|
9 |
+
if int(boxes[i].sum().item()) == 0:
|
10 |
+
return i
|
11 |
+
return i
|
12 |
+
|
13 |
+
|
14 |
+
|
15 |
+
def apply_mask_on_token_bbox(boxes, tokenized_words, only_actual_words = False, span = 4, proportion_to_mask = 0.15, special_token = 103):
|
16 |
+
|
17 |
+
# '''
|
18 |
+
# code taken from here: https://www.geeksforgeeks.org/python-non-overlapping-random-ranges/
|
19 |
+
|
20 |
+
# Note: A more robust solution is to be coded
|
21 |
+
# '''
|
22 |
+
# length_to_be_masked = int(proportion_to_mask*len(boxes))
|
23 |
+
|
24 |
+
# if only_actual_words:
|
25 |
+
# tot = find_pad_idx(tokenized_words)
|
26 |
+
# else:
|
27 |
+
# tot = len(boxes)
|
28 |
+
|
29 |
+
# res = set()
|
30 |
+
# for _ in range(length_to_be_masked):
|
31 |
+
# temp = random.randint(0, tot - span)
|
32 |
+
# while any(((temp >= idx) and (temp <= idx + span)) for idx in res):
|
33 |
+
# temp = random.randint(0, tot - span)
|
34 |
+
# res.add(temp)
|
35 |
+
|
36 |
+
# ## Applying the mask on token
|
37 |
+
# tokenized_words[temp] = special_token
|
38 |
+
|
39 |
+
# ## Applying the masking on the box
|
40 |
+
# boxes[temp, 0] = torch.min(boxes[temp: temp+span, 0])
|
41 |
+
# boxes[temp, 1] = torch.min(boxes[temp: temp+span, 1])
|
42 |
+
# boxes[temp, 2] = torch.max(boxes[temp: temp+span, 2])
|
43 |
+
# boxes[temp, 3] = torch.max(boxes[temp: temp+span, 3])
|
44 |
+
# boxes[temp, 4] = boxes[temp, 2] - boxes[temp, 0]
|
45 |
+
# boxes[temp, 5] = boxes[temp, 3] - boxes[temp, 1]
|
46 |
+
|
47 |
+
# return res,boxes, tokenized_words
|
48 |
+
|
49 |
+
|
50 |
+
def convert_ans_to_token(answer, label2id, max_seq_length = 512 ):
|
51 |
+
|
52 |
+
## Simple Trick to pad a sequence to deired length
|
53 |
+
dummy_array = torch.zeros(max_seq_length)
|
54 |
+
actual_ans_array = []
|
55 |
+
|
56 |
+
answer = answer.split(" ")
|
57 |
+
for token in answer:
|
58 |
+
actual_ans_array.append(label2id[token]['id'])
|
59 |
+
|
60 |
+
actual_ans_array = torch.tensor(actual_ans_array, dtype = torch.int32)
|
61 |
+
actual_ans_array = pad_sequence([actual_ans_array,dummy_array], batch_first = True)[0]
|
62 |
+
|
63 |
+
return actual_ans_array
|
64 |
+
|
65 |
+
|
66 |
+
def convert_ques_to_token(question, tokenizer, pad_token_id = 0, max_seq_len = 512):
|
67 |
+
|
68 |
+
question_array = []
|
69 |
+
question = question.split(" ")
|
70 |
+
|
71 |
+
for token in question:
|
72 |
+
question_array.extend(tokenizer(token, add_special_tokens = False).input_ids)
|
73 |
+
|
74 |
+
if len(question_array)< max_seq_len:
|
75 |
+
question_array.extend([pad_token_id]* (max_seq_len-len(question_array)))
|
76 |
+
|
77 |
+
question_array = torch.tensor(question_array, dtype = torch.int32)
|
78 |
+
return question_array[:max_seq_len]
|
79 |
+
|
80 |
+
|
81 |
+
## To be taken from here
|
82 |
+
## https://logicatcore.github.io/scratchpad/lidar/sensor-fusion/jupyter/2021/04/20/3D-Oriented-Bounding-Box.html
|
83 |
+
|
84 |
+
def rotate(origin, point, angle):
|
85 |
+
"""
|
86 |
+
Rotate a point counterclockwise by a given angle around a given origin.
|
87 |
+
The angle should be given in radians.
|
88 |
+
|
89 |
+
modified from answer here: https://stackoverflow.com/questions/34372480/rotate-point-about-another-point-in-degrees-python
|
90 |
+
"""
|
91 |
+
# angle = np.deg2rad(angle)
|
92 |
+
ox, oy = origin
|
93 |
+
px, py = point
|
94 |
+
|
95 |
+
qx = ox + math.cos(angle) * (px - ox) - math.sin(angle) * (py - oy)
|
96 |
+
qy = oy + math.sin(angle) * (px - ox) + math.cos(angle) * (py - oy)
|
97 |
+
return int(qx), int(qy)
|
98 |
+
|
99 |
+
|
100 |
+
def convert_token_to_ques(ques, tokenizer):
|
101 |
+
decoded_ques = tokenizer.decode(ques, skip_special_tokens=True)
|
102 |
+
return decoded_ques
|
103 |
+
|
104 |
+
|
105 |
+
def convert_token_to_answer(ans, id2label):
|
106 |
+
non_zero_argument = torch.nonzero(ans,as_tuple = False).view(-1)
|
107 |
+
|
108 |
+
actual_answer = ans[non_zero_argument].cpu().numpy()
|
109 |
+
decoded_answer = []
|
110 |
+
|
111 |
+
for token in actual_answer:
|
112 |
+
decoded_answer.append(id2label[token])
|
113 |
+
|
114 |
+
decoded_answer = " ".join(decoded_answer)
|
115 |
+
return decoded_answer
|
116 |
+
|