import streamlit as st from datasets import load_dataset st.set_page_config( page_icon="🧊", layout="wide", ) st.write( "This is an application for viewing different generations for the same prompt. The generations vary depending on the checkpoint used and also the parameters used for the generation." ) HF_API_TOKEN = st.secrets["HF_API_TOKEN"] PROMPT_COLOR = "#CA437E" def safe_text(text): text = text.replace("\n", "
") return f"

{text}

" def prompt_markup_format(text): return f'<*font color="black">{text}' def generation_markup_format(text): return f"{text}" ds = load_dataset("SaulLu/bloom-generations", use_auth_token=HF_API_TOKEN) ds = ds["train"] possible_prompts = ds.unique("prompt") col_1, col_2 = st.columns(2) with col_1: st.markdown("

Prompt

", unsafe_allow_html=True) chosen_prompt = st.selectbox("Choose a prompt", possible_prompts) st.markdown(safe_text(chosen_prompt), unsafe_allow_html=True) sub_ds = ds.filter( lambda exs: [prompt == chosen_prompt for prompt in exs["prompt"]], batched=True ) with col_2: st.markdown( "

Generation

", unsafe_allow_html=True ) index_sample = st.number_input( "Index of the chosen generation", min_value=0, max_value=len(sub_ds) - 1, value=0, step=1, ) sample = sub_ds[index_sample] markdown_text = generation_markup_format(safe_text(sample["generation"])) st.markdown(markdown_text, unsafe_allow_html=True) st.markdown( "

Generation configuration

", unsafe_allow_html=True, ) config = { key: value for key, value in sample.items() if key not in ["prompt", "generation"] } config