Spaces:

bigscience-data
/

process-pipeline-visualizer

Runtime error

SaulLu commited on Jun 17, 2022

Commit

2e8bd01

•

1 Parent(s): 4810cf9

update ds

Files changed (2) hide show

app.py CHANGED Viewed

@@ -2,21 +2,21 @@ import os
 import pprint as pp
 from collections import OrderedDict, defaultdict
 import diff_viewer
 import pandas as pd
 import streamlit as st
 from datasets import load_dataset, get_dataset_config_names
-CHECK_DATASET_DIR_PATH_BEFORE_CLEAN_SELECT = os.getenv("CHECK_DATASET_DIR_PATH_BEFORE_CLEAN_SELECT")
-LOGS_DATASET_DIR_PATH_BEFORE_CLEAN_SELECT = os.getenv("LOGS_DATASET_DIR_PATH_BEFORE_CLEAN_SELECT")
-HF_API_TOKEN = os.getenv("HF_API_TOKEN")
 OPERATION_TYPES = [
     "Applied filter",
     "Applied deduplication function",
     "Applied map function",
 ]
-MAX_LEN_DS_CHECKS = os.getenv("MAX_LEN_DS_CHECKS")
 def get_ds(config):
@@ -261,9 +261,19 @@ st.set_page_config(page_title="Dataset explorer", page_icon=":hugging_face:", la
 st.write(
     "The purpose of this application is to sequentially view the changes made to a dataset."
 )
 col_option_clean, col_option_ds = st.columns(2)
-CHECK_CONFIGS = get_dataset_config_names(CHECK_DATASET_DIR_PATH_BEFORE_CLEAN_SELECT, use_auth_token=HF_API_TOKEN)
 CLEANING_VERSIONS = set()
 dataset_names = set()

 import pprint as pp
 from collections import OrderedDict, defaultdict
+import json
 import diff_viewer
 import pandas as pd
 import streamlit as st
 from datasets import load_dataset, get_dataset_config_names
+CHECK_DATASET_DIR_PATH_BEFORE_CLEAN_SELECT = st.secrets["CHECK_DATASET_DIR_PATH_BEFORE_CLEAN_SELECT"]
+LOGS_DATASET_DIR_PATH_BEFORE_CLEAN_SELECT = st.secrets["LOGS_DATASET_DIR_PATH_BEFORE_CLEAN_SELECT"]
+HF_API_TOKEN = st.secrets["HF_API_TOKEN"]
 OPERATION_TYPES = [
     "Applied filter",
     "Applied deduplication function",
     "Applied map function",
 ]
+MAX_LEN_DS_CHECKS = st.secrets["MAX_LEN_DS_CHECKS"]
 def get_ds(config):
 st.write(
     "The purpose of this application is to sequentially view the changes made to a dataset."
 )
+st.write(CHECK_DATASET_DIR_PATH_BEFORE_CLEAN_SELECT)
+ds_log = load_dataset(LOGS_DATASET_DIR_PATH_BEFORE_CLEAN_SELECT, 'clean_v1_dsname_lm_en_multi_un_2', use_auth_token=HF_API_TOKEN)
+st.write(ds_log)
 col_option_clean, col_option_ds = st.columns(2)
+with open("dataset_configs.json", "r") as f:
+    CHECK_CONFIGS = json.load(f)
+# CHECK_CONFIGS = get_dataset_config_names(CHECK_DATASET_DIR_PATH_BEFORE_CLEAN_SELECT, use_auth_token=HF_API_TOKEN)
 CLEANING_VERSIONS = set()
 dataset_names = set()

dataset_configs.json ADDED Viewed

The diff for this file is too large to render. See raw diff