|
import os |
|
|
|
import streamlit as st |
|
|
|
from defaults import ( |
|
PROJECT_NAME, |
|
ARGILLA_URL, |
|
DIBT_PARENT_APP_URL, |
|
DATASET_URL, |
|
DATASET_REPO_ID, |
|
) |
|
|
|
|
|
def project_sidebar(): |
|
if PROJECT_NAME == "DEFAULT_DOMAIN": |
|
st.warning( |
|
"Please set up the project configuration in the parent app before proceeding." |
|
) |
|
st.stop() |
|
|
|
st.sidebar.subheader(f"A Data Growing Project in the domain of {PROJECT_NAME}") |
|
st.sidebar.markdown( |
|
""" |
|
This space helps you create a dataset seed for building diverse domain-specific datasets for aligning models. |
|
""" |
|
) |
|
st.sidebar.link_button(f"π Dataset Repo", DATASET_URL) |
|
st.sidebar.link_button(f"π€ Argilla Space", ARGILLA_URL) |
|
hub_username = DATASET_REPO_ID.split("/")[0] |
|
project_name = DATASET_REPO_ID.split("/")[1] |
|
st.session_state["project_name"] = project_name |
|
st.session_state["hub_username"] = hub_username |
|
st.session_state["hub_token"] = st.sidebar.text_input( |
|
"Hub Token", type="password", value=os.environ.get("HF_TOKEN", None) |
|
) |
|
if st.session_state["hub_token"] is not None: |
|
os.environ["HF_TOKEN"] = st.session_state["hub_token"] |
|
st.sidebar.link_button( |
|
"π€ Get your Hub Token", "https://huggingface.co/settings/tokens" |
|
) |
|
if all( |
|
( |
|
st.session_state.get("project_name"), |
|
st.session_state.get("hub_username"), |
|
st.session_state.get("hub_token"), |
|
) |
|
): |
|
st.success(f"Using the dataset repo {hub_username}/{project_name} on the Hub") |
|
|
|
st.sidebar.divider() |
|
|
|
st.sidebar.link_button("π§βπΎ New Project", DIBT_PARENT_APP_URL) |
|
|
|
if st.session_state["hub_token"] is None: |
|
st.error("Please provide a Hub token to generate answers") |
|
st.stop() |
|
|
|
|
|
def create_seed_terms(topics: list[str], perspectives: list[str]) -> list[str]: |
|
"""Create seed terms for self intruct to start from.""" |
|
|
|
return [ |
|
f"{topic} from a {perspective} perspective" |
|
for topic in topics |
|
for perspective in perspectives |
|
] |
|
|
|
|
|
def create_application_instruction( |
|
domain: str, system_prompt: str, examples: list[dict[str, str]] |
|
) -> str: |
|
"""Create the instruction for Self-Instruct task.""" |
|
system_prompt = f"""AI assistant in the domain of {domain}. {system_prompt}""" |
|
examples_str = "" |
|
for example in examples: |
|
question = example["question"] |
|
answer = example["answer"] |
|
if len(answer) and len(question): |
|
examples_str += f"""\n- Question: {question}\n- Answer: {answer}\n""" |
|
examples_str += f"""\n- Question: {question}\n- Answer: {answer}\n""" |
|
if len(examples_str): |
|
system_prompt += """Below are some examples of questions and answers \ |
|
that the AI assistant would generate:""" |
|
system_prompt += "\nExamples:" |
|
system_prompt += f"\n{examples_str}" |
|
return system_prompt |
|
|