Spaces:
Sleeping
Sleeping
# import module | |
import streamlit as st | |
import datasets | |
import pandas as pd | |
access_token="" | |
dataset="" | |
split="" | |
skip=0 | |
def load(): | |
if dataset=="nlewins/onetalk_questions_full_audio": | |
column_with_audio="audio_transcription" | |
column_with_english_text="en" | |
column_with_other_text="transcription" | |
elif dataset=="nlewins/LSK_full_with_audio": | |
column_with_audio="audio_transcription" | |
column_with_english_text="en" | |
column_with_other_text="transcription" | |
elif dataset=="nlewins/fleurs_ceb_to_en": | |
column_with_audio="audio" | |
column_with_english_text="transcription_en" | |
column_with_other_text="transcription" | |
ds = datasets.load_dataset(dataset, token=access_token if access_token!="" else st.secrets["hf_token"], split=datasets.ReadInstruction("test",from_=skip,to=skip+50)) | |
for example in ds: | |
df=pd.DataFrame([example[column_with_other_text],example[column_with_english_text]]) | |
st.table(df.values) | |
st.audio(example[column_with_audio]["array"],sample_rate=example[column_with_audio]["sampling_rate"]) | |
# Title | |
st.title("One Talk dataset explorer") | |
access_token = st.text_input("Access token", value="", max_chars=None, key=None, type="password") | |
dataset = st.text_input("Dataset", value="nlewins/LSK_full_with_audio", max_chars=None, key=None, type="default") | |
split = st.text_input("Split", value="test", max_chars=None, key=None, type="default") | |
skip = st.number_input("Skip", value=250) | |
st.button("Go",on_click=load) | |
st.divider() | |
load() | |