Spaces:
Running
Running
import os | |
import numpy as np | |
import pandas as pd | |
import torch | |
from sklearn.manifold import TSNE | |
from tqdm import tqdm | |
def load_feats(path='./feats'): | |
print('==> loading feats') | |
feats = {} | |
for pt in os.listdir(path): | |
if pt.split('.')[-1] == 'pt' and pt.split('.')[0].isdigit(): | |
feats[int(pt.split('.')[0])] = torch.load(os.path.join('../data/feats', pt)) | |
return feats | |
def calc_tsne(feat): | |
tsne = TSNE(n_components=2, random_state=0, perplexity=30, n_iter=1000) | |
res = tsne.fit_transform(feat['all'].numpy()) | |
return res | |
def test_open(fp='./feats_tsne.parquet'): | |
df = pd.read_parquet(fp) | |
print(df.head()) | |
if __name__ == '__main__': | |
feats = load_feats() | |
df = pd.DataFrame(columns=['x', 'y', 'prompt_id', 'modelVersion_id']) | |
print('==> applying t-SNE') | |
for k, v in tqdm(feats.items()): | |
modelVersion_ids = [] | |
for id in v.keys(): | |
if id != 'all' and id != 'tsne': | |
modelVersion_ids.append(int(id.item())) | |
res = calc_tsne(v) | |
tmp = pd.DataFrame(res, columns=['x', 'y']) | |
tmp['prompt_id'] = k | |
tmp['modelVersion_id'] = modelVersion_ids | |
df = pd.concat([df, tmp], ignore_index=True) | |
df.to_parquet('./feats_tsne.parquet') | |
# test_open() |