Spaces:

giobin
/

MAIA_human_assessment

Sleeping

File size: 6,363 Bytes

d9cbdf1
138d0d5
 
d9cbdf1
8823c0c
138d0d5
 
57bf5d5
8823c0c
138d0d5
 
00eae37
 
 
138d0d5
 
 
 
 
 
 
 
 
 
ff62d04
8823c0c
138d0d5
 
d88bb9d
138d0d5
 
ccaeded
138d0d5
8823c0c
138d0d5
 
 
 
8823c0c
138d0d5
 
8823c0c
138d0d5
 
 
 
 
 
 
fadec32
 
 
 
 
 
138d0d5
 
4a58e4d
 
 
138d0d5
 
8823c0c
138d0d5
 
 
 
8823c0c
 
4a58e4d
138d0d5
 
8823c0c
138d0d5
 
 
d88bb9d
 
138d0d5
 
 
 
8823c0c
31b5d12
138d0d5
31b5d12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8823c0c
 
ccaeded
4a58e4d
138d0d5
 
 
 
 
 
 
 
8823c0c
138d0d5
 
8823c0c
60f83a0
19ec3d7
ccaeded
19ec3d7
8823c0c
60f83a0
4a58e4d
 
e107527
 
4a58e4d
60f83a0
ccaeded
19ec3d7
a0aa451
 
 
 
 
 
 
 
 
 
 
8823c0c
a0aa451
138d0d5
8823c0c
 
 
 
00eae37
8823c0c
4a58e4d
8823c0c
 
 
 
 
e107527
31b5d12
 
4a58e4d

import streamlit as st
import pandas as pd
from huggingface_hub import HfApi, hf_hub_download

# Constants
HF_REPO = "giobin/MAIA_human_assessment_annotations"
CSV_FILENAME = "user_selections.csv"

# Function to assign samples to users
def assign_samples(csv_path):
    df = pd.read_csv(csv_path)
    group_1 = df[(df["pool_pos"] == 1) & (~df["question_category"].str.endswith("_B"))].iloc[100:150] # select 50 sampels from the 100th 
    group_2 = df[(df["pool_pos"] == 2) & (~df["question_category"].str.endswith("_B"))].iloc[100:150]
    group_3 = df[(df["pool_pos"] == 3) & (~df["question_category"].str.endswith("_B"))].iloc[100:150]
    return {
        "Bernardo": group_1,
        "Alessandro": group_1,
        "Alessio": group_1,
        "Lenci": group_2,
        "Lucia": group_2,
        "Davide": group_2,
        "Giovanni": group_3,
        "Raffaella": group_3,
    }

# Function to load existing annotations from Hugging Face Hub
def load_existing_annotations():
    try:
        file_path = hf_hub_download(HF_REPO, CSV_FILENAME, repo_type="dataset", token=st.secrets["HF_TOKEN"])
        return pd.read_csv(file_path)
    except Exception:
        return pd.DataFrame(columns=["username", "id"])

# Load datasets
csv_file = "static/mc.csv"
assignments = assign_samples(csv_file)
existing_annotations = load_existing_annotations()

# Valid users
valid_users = list(assignments.keys())

# Initialize session state variables
if "username" not in st.session_state:
    st.session_state.username = None
if "index" not in st.session_state:
    st.session_state.index = 0
if "results" not in st.session_state:
    st.session_state.results = []

# User selectiontion
def update_name():
    """Set username and reset index."""
    st.session_state.username = st.session_state.selected_user
    st.session_state.index = 0  # Reset progress

if st.session_state.username is None:
    with st.form("user_form"):
        st.write("### Seleziona il tuo nome")
        selected_user = st.selectbox("Nome:", valid_users, key="selected_user")
        submit_button = st.form_submit_button("Inizia il task", on_click=update_name)
    st.stop()

# Retrieve assigned dataset and filter out already labeled samples
full_dataset = assignments[st.session_state.username].reset_index(drop=True)
user_labeled_ids = existing_annotations[existing_annotations["username"] == st.session_state.username]["id"].tolist()
dataset = full_dataset[~full_dataset["id"].isin(user_labeled_ids)].reset_index(drop=True)

# Check if all samples are labeled
if st.session_state.index >= len(dataset):
    st.write("### Ottimo. Hai completato il tuo task! 🎉")
    st.stop()

# Function to push updated annotations to Hugging Face Hub
def push_to_hf_hub(csv_path):
    api = HfApi()
    try:
        api.create_repo(HF_REPO, repo_type="dataset", exist_ok=True, token=st.secrets["HF_TOKEN"])
        api.upload_file(path_or_fileobj=csv_path, path_in_repo=CSV_FILENAME, repo_id=HF_REPO, repo_type="dataset", token=st.secrets["HF_TOKEN"])
        print(f"Dataset updated: https://huggingface.co/datasets/{HF_REPO}")
    except Exception as e:
        print(f"Error pushing to HF: {e}")

# Function to save user choice
def save_choice():
    sample = dataset.iloc[st.session_state.index]
    selected_answer = st.session_state.get("selected_answer", None)
    not_enough_info = st.session_state.get("not_enough_info", False)
    
    if selected_answer is not None:
        st.session_state.results.append({
            "username": st.session_state.username,
            "id": sample["id"],
            "video_id": sample["video_id"],
            "answer1": sample["answer1"],
            "answer2": sample["answer2"],
            "selected_answer": selected_answer,
            "target": sample["target"],
            "not_enough_info": not_enough_info
        })
        st.session_state.index += 1

    # Save results and push to Hugging Face Hub if all samples are labeled
    if st.session_state.index >= len(dataset):
        st.write("### Ottimo. Hai completato il tuo task! 🎉")
        result_df = pd.DataFrame(st.session_state.results)
        csv_path = "user_selections.csv"
        if not existing_annotations.empty:
            result_df = pd.concat([existing_annotations, result_df]).drop_duplicates(subset=["username", "id"], keep="last")
        result_df.to_csv(csv_path, index=False)
        push_to_hf_hub(csv_path)
        st.stop()

# Display current sample
sample = dataset.iloc[st.session_state.index]

# Page title and user information
st.markdown("<h1 style='text-align: center; font-size: 50px;'>MAIA Sample</h1>", unsafe_allow_html=True)
st.markdown(f"<h3 style='text-align: center;'>User: {st.session_state.username}</h3>", unsafe_allow_html=True)
st.write("\n\n")

# Instructions
st.markdown("""
### Istruzioni:
- Osserva attentamente il frame del video (senza premere play) e cerca di capire il contesto della scena
- Valuta le opzioni (A e B) e seleziona quella che ritieni più attinente al video. Per selezionare l'alternativa usa l'immagine, la tua conoscenza e/o qualsiasi ragionamento utile.
- Se il frame non contiene sufficienti informazioni per decidere l’alternativa appena selezionata, seleziona il checkbox sottostante. 
- Clicca 'Continua' per procedere.
""")
st.write("---")


def convert_youtube_shorts_url(url):
    """Convert a YouTube Shorts URL to a standard YouTube video URL."""
    if "youtube.com/shorts/" in url:
        video_id = url.split("/")[-1].split("?")[0]  # Extract the video ID
        return f"https://www.youtube.com/watch?v={video_id}"
    return url

fixed_url = convert_youtube_shorts_url(sample["video_url"])

st.video(fixed_url)
# Display video thumbnail
# st.video(sample["video_url"])

# Form for user input
with st.form("annotation_form"):
    # Exclusive choice between A and B
    selected_answer = st.radio(
        "Seleziona la descrizione corretta:",
        options=[0, 1],
        index=None,
        format_func=lambda x: f"A: {sample['answer1']}" if x == 0 else f"B: {sample['answer2']}",
        key="selected_answer"
    )

    # Independent checkbox for insufficient information
    not_enough_info = st.checkbox("Il frame non contiene sufficienti informazioni per scegliere", key="not_enough_info")

    # Submit button
    submit_button = st.form_submit_button("Continua", on_click=save_choice)