iakarshu commited on
Commit
739e9ea
·
1 Parent(s): 4f80374

Upload utils.py

Browse files
Files changed (1) hide show
  1. utils.py +116 -0
utils.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import random
2
+ import torch
3
+ import math
4
+ from torch.nn.utils.rnn import pad_sequence
5
+
6
+
7
+ def find_pad_idx(boxes):
8
+ for i, j in enumerate(boxes):
9
+ if int(boxes[i].sum().item()) == 0:
10
+ return i
11
+ return i
12
+
13
+
14
+
15
+ def apply_mask_on_token_bbox(boxes, tokenized_words, only_actual_words = False, span = 4, proportion_to_mask = 0.15, special_token = 103):
16
+
17
+ # '''
18
+ # code taken from here: https://www.geeksforgeeks.org/python-non-overlapping-random-ranges/
19
+
20
+ # Note: A more robust solution is to be coded
21
+ # '''
22
+ # length_to_be_masked = int(proportion_to_mask*len(boxes))
23
+
24
+ # if only_actual_words:
25
+ # tot = find_pad_idx(tokenized_words)
26
+ # else:
27
+ # tot = len(boxes)
28
+
29
+ # res = set()
30
+ # for _ in range(length_to_be_masked):
31
+ # temp = random.randint(0, tot - span)
32
+ # while any(((temp >= idx) and (temp <= idx + span)) for idx in res):
33
+ # temp = random.randint(0, tot - span)
34
+ # res.add(temp)
35
+
36
+ # ## Applying the mask on token
37
+ # tokenized_words[temp] = special_token
38
+
39
+ # ## Applying the masking on the box
40
+ # boxes[temp, 0] = torch.min(boxes[temp: temp+span, 0])
41
+ # boxes[temp, 1] = torch.min(boxes[temp: temp+span, 1])
42
+ # boxes[temp, 2] = torch.max(boxes[temp: temp+span, 2])
43
+ # boxes[temp, 3] = torch.max(boxes[temp: temp+span, 3])
44
+ # boxes[temp, 4] = boxes[temp, 2] - boxes[temp, 0]
45
+ # boxes[temp, 5] = boxes[temp, 3] - boxes[temp, 1]
46
+
47
+ # return res,boxes, tokenized_words
48
+
49
+
50
+ def convert_ans_to_token(answer, label2id, max_seq_length = 512 ):
51
+
52
+ ## Simple Trick to pad a sequence to deired length
53
+ dummy_array = torch.zeros(max_seq_length)
54
+ actual_ans_array = []
55
+
56
+ answer = answer.split(" ")
57
+ for token in answer:
58
+ actual_ans_array.append(label2id[token]['id'])
59
+
60
+ actual_ans_array = torch.tensor(actual_ans_array, dtype = torch.int32)
61
+ actual_ans_array = pad_sequence([actual_ans_array,dummy_array], batch_first = True)[0]
62
+
63
+ return actual_ans_array
64
+
65
+
66
+ def convert_ques_to_token(question, tokenizer, pad_token_id = 0, max_seq_len = 512):
67
+
68
+ question_array = []
69
+ question = question.split(" ")
70
+
71
+ for token in question:
72
+ question_array.extend(tokenizer(token, add_special_tokens = False).input_ids)
73
+
74
+ if len(question_array)< max_seq_len:
75
+ question_array.extend([pad_token_id]* (max_seq_len-len(question_array)))
76
+
77
+ question_array = torch.tensor(question_array, dtype = torch.int32)
78
+ return question_array[:max_seq_len]
79
+
80
+
81
+ ## To be taken from here
82
+ ## https://logicatcore.github.io/scratchpad/lidar/sensor-fusion/jupyter/2021/04/20/3D-Oriented-Bounding-Box.html
83
+
84
+ def rotate(origin, point, angle):
85
+ """
86
+ Rotate a point counterclockwise by a given angle around a given origin.
87
+ The angle should be given in radians.
88
+
89
+ modified from answer here: https://stackoverflow.com/questions/34372480/rotate-point-about-another-point-in-degrees-python
90
+ """
91
+ # angle = np.deg2rad(angle)
92
+ ox, oy = origin
93
+ px, py = point
94
+
95
+ qx = ox + math.cos(angle) * (px - ox) - math.sin(angle) * (py - oy)
96
+ qy = oy + math.sin(angle) * (px - ox) + math.cos(angle) * (py - oy)
97
+ return int(qx), int(qy)
98
+
99
+
100
+ def convert_token_to_ques(ques, tokenizer):
101
+ decoded_ques = tokenizer.decode(ques, skip_special_tokens=True)
102
+ return decoded_ques
103
+
104
+
105
+ def convert_token_to_answer(ans, id2label):
106
+ non_zero_argument = torch.nonzero(ans,as_tuple = False).view(-1)
107
+
108
+ actual_answer = ans[non_zero_argument].cpu().numpy()
109
+ decoded_answer = []
110
+
111
+ for token in actual_answer:
112
+ decoded_answer.append(id2label[token])
113
+
114
+ decoded_answer = " ".join(decoded_answer)
115
+ return decoded_answer
116
+