|
import numpy as np |
|
import pickle |
|
from os.path import join as pjoin |
|
|
|
POS_enumerator = { |
|
'VERB': 0, |
|
'NOUN': 1, |
|
'DET': 2, |
|
'ADP': 3, |
|
'NUM': 4, |
|
'AUX': 5, |
|
'PRON': 6, |
|
'ADJ': 7, |
|
'ADV': 8, |
|
'Loc_VIP': 9, |
|
'Body_VIP': 10, |
|
'Obj_VIP': 11, |
|
'Act_VIP': 12, |
|
'Desc_VIP': 13, |
|
'OTHER': 14, |
|
} |
|
|
|
Loc_list = ('left', 'right', 'clockwise', 'counterclockwise', 'anticlockwise', 'forward', 'back', 'backward', |
|
'up', 'down', 'straight', 'curve') |
|
|
|
Body_list = ('arm', 'chin', 'foot', 'feet', 'face', 'hand', 'mouth', 'leg', 'waist', 'eye', 'knee', 'shoulder', 'thigh') |
|
|
|
Obj_List = ('stair', 'dumbbell', 'chair', 'window', 'floor', 'car', 'ball', 'handrail', 'baseball', 'basketball') |
|
|
|
Act_list = ('walk', 'run', 'swing', 'pick', 'bring', 'kick', 'put', 'squat', 'throw', 'hop', 'dance', 'jump', 'turn', |
|
'stumble', 'dance', 'stop', 'sit', 'lift', 'lower', 'raise', 'wash', 'stand', 'kneel', 'stroll', |
|
'rub', 'bend', 'balance', 'flap', 'jog', 'shuffle', 'lean', 'rotate', 'spin', 'spread', 'climb') |
|
|
|
Desc_list = ('slowly', 'carefully', 'fast', 'careful', 'slow', 'quickly', 'happy', 'angry', 'sad', 'happily', |
|
'angrily', 'sadly') |
|
|
|
VIP_dict = { |
|
'Loc_VIP': Loc_list, |
|
'Body_VIP': Body_list, |
|
'Obj_VIP': Obj_List, |
|
'Act_VIP': Act_list, |
|
'Desc_VIP': Desc_list, |
|
} |
|
|
|
|
|
class WordVectorizer(object): |
|
def __init__(self, meta_root, prefix): |
|
vectors = np.load(pjoin(meta_root, '%s_data.npy'%prefix)) |
|
words = pickle.load(open(pjoin(meta_root, '%s_words.pkl'%prefix), 'rb')) |
|
self.word2idx = pickle.load(open(pjoin(meta_root, '%s_idx.pkl'%prefix), 'rb')) |
|
self.word2vec = {w: vectors[self.word2idx[w]] for w in words} |
|
|
|
def _get_pos_ohot(self, pos): |
|
pos_vec = np.zeros(len(POS_enumerator)) |
|
if pos in POS_enumerator: |
|
pos_vec[POS_enumerator[pos]] = 1 |
|
else: |
|
pos_vec[POS_enumerator['OTHER']] = 1 |
|
return pos_vec |
|
|
|
def __len__(self): |
|
return len(self.word2vec) |
|
|
|
def __getitem__(self, item): |
|
word, pos = item.split('/') |
|
if word in self.word2vec: |
|
word_vec = self.word2vec[word] |
|
vip_pos = None |
|
for key, values in VIP_dict.items(): |
|
if word in values: |
|
vip_pos = key |
|
break |
|
if vip_pos is not None: |
|
pos_vec = self._get_pos_ohot(vip_pos) |
|
else: |
|
pos_vec = self._get_pos_ohot(pos) |
|
else: |
|
word_vec = self.word2vec['unk'] |
|
pos_vec = self._get_pos_ohot('OTHER') |
|
return word_vec, pos_vec |
|
|
|
|
|
class WordVectorizerV2(WordVectorizer): |
|
def __init__(self, meta_root, prefix): |
|
super(WordVectorizerV2, self).__init__(meta_root, prefix) |
|
self.idx2word = {self.word2idx[w]: w for w in self.word2idx} |
|
|
|
def __getitem__(self, item): |
|
word_vec, pose_vec = super(WordVectorizerV2, self).__getitem__(item) |
|
word, pos = item.split('/') |
|
if word in self.word2vec: |
|
return word_vec, pose_vec, self.word2idx[word] |
|
else: |
|
return word_vec, pose_vec, self.word2idx['unk'] |
|
|
|
def itos(self, idx): |
|
if idx == len(self.idx2word): |
|
return "pad" |
|
return self.idx2word[idx] |