Spaces:
Runtime error
Runtime error
update ds
Browse files- app.py +16 -6
- dataset_configs.json +0 -0
app.py
CHANGED
@@ -2,21 +2,21 @@ import os
|
|
2 |
import pprint as pp
|
3 |
from collections import OrderedDict, defaultdict
|
4 |
|
|
|
5 |
import diff_viewer
|
6 |
import pandas as pd
|
7 |
import streamlit as st
|
8 |
from datasets import load_dataset, get_dataset_config_names
|
9 |
|
10 |
-
CHECK_DATASET_DIR_PATH_BEFORE_CLEAN_SELECT =
|
11 |
-
LOGS_DATASET_DIR_PATH_BEFORE_CLEAN_SELECT =
|
12 |
-
HF_API_TOKEN =
|
13 |
-
|
14 |
OPERATION_TYPES = [
|
15 |
"Applied filter",
|
16 |
"Applied deduplication function",
|
17 |
"Applied map function",
|
18 |
]
|
19 |
-
MAX_LEN_DS_CHECKS =
|
20 |
|
21 |
|
22 |
def get_ds(config):
|
@@ -261,9 +261,19 @@ st.set_page_config(page_title="Dataset explorer", page_icon=":hugging_face:", la
|
|
261 |
st.write(
|
262 |
"The purpose of this application is to sequentially view the changes made to a dataset."
|
263 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
264 |
col_option_clean, col_option_ds = st.columns(2)
|
265 |
|
266 |
-
|
|
|
|
|
267 |
|
268 |
CLEANING_VERSIONS = set()
|
269 |
dataset_names = set()
|
|
|
2 |
import pprint as pp
|
3 |
from collections import OrderedDict, defaultdict
|
4 |
|
5 |
+
import json
|
6 |
import diff_viewer
|
7 |
import pandas as pd
|
8 |
import streamlit as st
|
9 |
from datasets import load_dataset, get_dataset_config_names
|
10 |
|
11 |
+
CHECK_DATASET_DIR_PATH_BEFORE_CLEAN_SELECT = st.secrets["CHECK_DATASET_DIR_PATH_BEFORE_CLEAN_SELECT"]
|
12 |
+
LOGS_DATASET_DIR_PATH_BEFORE_CLEAN_SELECT = st.secrets["LOGS_DATASET_DIR_PATH_BEFORE_CLEAN_SELECT"]
|
13 |
+
HF_API_TOKEN = st.secrets["HF_API_TOKEN"]
|
|
|
14 |
OPERATION_TYPES = [
|
15 |
"Applied filter",
|
16 |
"Applied deduplication function",
|
17 |
"Applied map function",
|
18 |
]
|
19 |
+
MAX_LEN_DS_CHECKS = st.secrets["MAX_LEN_DS_CHECKS"]
|
20 |
|
21 |
|
22 |
def get_ds(config):
|
|
|
261 |
st.write(
|
262 |
"The purpose of this application is to sequentially view the changes made to a dataset."
|
263 |
)
|
264 |
+
|
265 |
+
|
266 |
+
st.write(CHECK_DATASET_DIR_PATH_BEFORE_CLEAN_SELECT)
|
267 |
+
ds_log = load_dataset(LOGS_DATASET_DIR_PATH_BEFORE_CLEAN_SELECT, 'clean_v1_dsname_lm_en_multi_un_2', use_auth_token=HF_API_TOKEN)
|
268 |
+
st.write(ds_log)
|
269 |
+
|
270 |
+
|
271 |
+
|
272 |
col_option_clean, col_option_ds = st.columns(2)
|
273 |
|
274 |
+
with open("dataset_configs.json", "r") as f:
|
275 |
+
CHECK_CONFIGS = json.load(f)
|
276 |
+
# CHECK_CONFIGS = get_dataset_config_names(CHECK_DATASET_DIR_PATH_BEFORE_CLEAN_SELECT, use_auth_token=HF_API_TOKEN)
|
277 |
|
278 |
CLEANING_VERSIONS = set()
|
279 |
dataset_names = set()
|
dataset_configs.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|