Spaces:
Runtime error
Runtime error
## LIBRARIES ### | |
## Data | |
import numpy as np | |
import pandas as pd | |
import torch | |
import json | |
from tqdm import tqdm | |
from math import floor | |
from datasets import load_dataset | |
from collections import defaultdict | |
from transformers import AutoTokenizer | |
pd.options.display.float_format = '${:,.2f}'.format | |
# Analysis | |
# App & Visualization | |
import streamlit as st | |
from bokeh.models import CustomJS, ColumnDataSource, HoverTool, BoxSelectTool, Callback, Select, TextInput, DataTable, TableColumn | |
from bokeh.events import SelectionGeometry | |
from bokeh.plotting import figure, output_file, show | |
from bokeh.transform import factor_cmap | |
from bokeh.palettes import Category20c_20 | |
from bokeh.layouts import column, row | |
# utils | |
from random import sample | |
def datasets_explorer_viz(df): | |
s = ColumnDataSource(df) | |
text_input = TextInput(value="", title="Search") | |
text_input.js_on_change("value", CustomJS(code=""" | |
console.log('text_input: value=' + this.value, this.toString()) | |
""")) | |
TOOLTIPS= [("dataset_id", "@dataset_id"), ("task", "@task")] | |
color = factor_cmap('task', palette=Category20c_20, factors=df['task'].unique()) | |
p = figure(plot_width=1000, plot_height=1000, tools="hover,wheel_zoom,pan,box_select", title="Dataset explorer", tooltips=TOOLTIPS, toolbar_location="above") | |
p.scatter('x', 'y', size=3, source=s, alpha=0.8,marker='circle',fill_color = color, line_color=color, legend_field = 'task') | |
p.legend.location = "bottom_right" | |
#p.legend.click_policy="mute" | |
p.legend.label_text_font_size="8pt" | |
table_source = ColumnDataSource(data=dict()) | |
columns = [ | |
# TableColumn(field="x", title="X data"), | |
# TableColumn(field="y", title="Y data"), | |
TableColumn(field="task", title="Task"), | |
TableColumn(field="dataset_id", title="Dataset ID"), | |
] | |
data_table = DataTable(source=table_source, columns=columns, width=300) | |
s.selected.js_on_change('indices', CustomJS(args=dict(umap_source=s, table_source=table_source), code=""" | |
const inds = cb_obj.indices; | |
const tableData = table_source.data; | |
const umapData = umap_source.data; | |
//tableData['x'] = [] | |
//tableData['y'] = [] | |
tableData['task'] = [] | |
tableData['dataset_id'] = [] | |
for (let i = 0; i < inds.length; i++) { | |
// tableData['x'].push(umapData['x'][inds[i]]) | |
// tableData['y'].push(umapData['y'][inds[i]]) | |
tableData['task'].push(umapData['task'][inds[i]]) | |
tableData['dataset_id'].push(umapData['dataset_id'][inds[i]]) | |
} | |
table_source.data = tableData; | |
table_source.change.emit(); | |
""" | |
)) | |
show(row(column(text_input,p), data_table)) | |
if __name__ == "__main__": | |
### STREAMLIT APP CONGFIG ### | |
st.set_page_config(layout="wide", page_title="Datasets Explorer") | |
#lcol, rcol = st.columns([2, 2]) | |
# ******* loading the mode and the data | |
### LOAD DATA AND SESSION VARIABLES ### | |
datasets_df = pd.read_parquet('./assets/data/datasets_df.parquet') | |
datasets_explorer_viz(datasets_df) |