File size: 6,363 Bytes
d9cbdf1
138d0d5
 
d9cbdf1
8823c0c
138d0d5
 
57bf5d5
8823c0c
138d0d5
 
00eae37
 
 
138d0d5
 
 
 
 
 
 
 
 
 
ff62d04
8823c0c
138d0d5
 
d88bb9d
138d0d5
 
ccaeded
138d0d5
8823c0c
138d0d5
 
 
 
8823c0c
138d0d5
 
8823c0c
138d0d5
 
 
 
 
 
 
fadec32
 
 
 
 
 
138d0d5
 
4a58e4d
 
 
138d0d5
 
8823c0c
138d0d5
 
 
 
8823c0c
 
4a58e4d
138d0d5
 
8823c0c
138d0d5
 
 
d88bb9d
 
138d0d5
 
 
 
8823c0c
31b5d12
138d0d5
31b5d12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8823c0c
 
ccaeded
4a58e4d
138d0d5
 
 
 
 
 
 
 
8823c0c
138d0d5
 
8823c0c
60f83a0
19ec3d7
ccaeded
19ec3d7
8823c0c
60f83a0
4a58e4d
 
e107527
 
4a58e4d
60f83a0
ccaeded
19ec3d7
a0aa451
 
 
 
 
 
 
 
 
 
 
8823c0c
a0aa451
138d0d5
8823c0c
 
 
 
00eae37
8823c0c
4a58e4d
8823c0c
 
 
 
 
e107527
31b5d12
 
4a58e4d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import streamlit as st
import pandas as pd
from huggingface_hub import HfApi, hf_hub_download

# Constants
HF_REPO = "giobin/MAIA_human_assessment_annotations"
CSV_FILENAME = "user_selections.csv"

# Function to assign samples to users
def assign_samples(csv_path):
    df = pd.read_csv(csv_path)
    group_1 = df[(df["pool_pos"] == 1) & (~df["question_category"].str.endswith("_B"))].iloc[100:150] # select 50 sampels from the 100th 
    group_2 = df[(df["pool_pos"] == 2) & (~df["question_category"].str.endswith("_B"))].iloc[100:150]
    group_3 = df[(df["pool_pos"] == 3) & (~df["question_category"].str.endswith("_B"))].iloc[100:150]
    return {
        "Bernardo": group_1,
        "Alessandro": group_1,
        "Alessio": group_1,
        "Lenci": group_2,
        "Lucia": group_2,
        "Davide": group_2,
        "Giovanni": group_3,
        "Raffaella": group_3,
    }

# Function to load existing annotations from Hugging Face Hub
def load_existing_annotations():
    try:
        file_path = hf_hub_download(HF_REPO, CSV_FILENAME, repo_type="dataset", token=st.secrets["HF_TOKEN"])
        return pd.read_csv(file_path)
    except Exception:
        return pd.DataFrame(columns=["username", "id"])

# Load datasets
csv_file = "static/mc.csv"
assignments = assign_samples(csv_file)
existing_annotations = load_existing_annotations()

# Valid users
valid_users = list(assignments.keys())

# Initialize session state variables
if "username" not in st.session_state:
    st.session_state.username = None
if "index" not in st.session_state:
    st.session_state.index = 0
if "results" not in st.session_state:
    st.session_state.results = []

# User selectiontion
def update_name():
    """Set username and reset index."""
    st.session_state.username = st.session_state.selected_user
    st.session_state.index = 0  # Reset progress

if st.session_state.username is None:
    with st.form("user_form"):
        st.write("### Seleziona il tuo nome")
        selected_user = st.selectbox("Nome:", valid_users, key="selected_user")
        submit_button = st.form_submit_button("Inizia il task", on_click=update_name)
    st.stop()

# Retrieve assigned dataset and filter out already labeled samples
full_dataset = assignments[st.session_state.username].reset_index(drop=True)
user_labeled_ids = existing_annotations[existing_annotations["username"] == st.session_state.username]["id"].tolist()
dataset = full_dataset[~full_dataset["id"].isin(user_labeled_ids)].reset_index(drop=True)

# Check if all samples are labeled
if st.session_state.index >= len(dataset):
    st.write("### Ottimo. Hai completato il tuo task! 🎉")
    st.stop()

# Function to push updated annotations to Hugging Face Hub
def push_to_hf_hub(csv_path):
    api = HfApi()
    try:
        api.create_repo(HF_REPO, repo_type="dataset", exist_ok=True, token=st.secrets["HF_TOKEN"])
        api.upload_file(path_or_fileobj=csv_path, path_in_repo=CSV_FILENAME, repo_id=HF_REPO, repo_type="dataset", token=st.secrets["HF_TOKEN"])
        print(f"Dataset updated: https://huggingface.co/datasets/{HF_REPO}")
    except Exception as e:
        print(f"Error pushing to HF: {e}")

# Function to save user choice
def save_choice():
    sample = dataset.iloc[st.session_state.index]
    selected_answer = st.session_state.get("selected_answer", None)
    not_enough_info = st.session_state.get("not_enough_info", False)
    
    if selected_answer is not None:
        st.session_state.results.append({
            "username": st.session_state.username,
            "id": sample["id"],
            "video_id": sample["video_id"],
            "answer1": sample["answer1"],
            "answer2": sample["answer2"],
            "selected_answer": selected_answer,
            "target": sample["target"],
            "not_enough_info": not_enough_info
        })
        st.session_state.index += 1

    # Save results and push to Hugging Face Hub if all samples are labeled
    if st.session_state.index >= len(dataset):
        st.write("### Ottimo. Hai completato il tuo task! 🎉")
        result_df = pd.DataFrame(st.session_state.results)
        csv_path = "user_selections.csv"
        if not existing_annotations.empty:
            result_df = pd.concat([existing_annotations, result_df]).drop_duplicates(subset=["username", "id"], keep="last")
        result_df.to_csv(csv_path, index=False)
        push_to_hf_hub(csv_path)
        st.stop()

# Display current sample
sample = dataset.iloc[st.session_state.index]

# Page title and user information
st.markdown("<h1 style='text-align: center; font-size: 50px;'>MAIA Sample</h1>", unsafe_allow_html=True)
st.markdown(f"<h3 style='text-align: center;'>User: {st.session_state.username}</h3>", unsafe_allow_html=True)
st.write("\n\n")

# Instructions
st.markdown("""
### Istruzioni:
- Osserva attentamente il frame del video (senza premere play) e cerca di capire il contesto della scena
- Valuta le opzioni (A e B) e seleziona quella che ritieni più attinente al video. Per selezionare l'alternativa usa l'immagine, la tua conoscenza e/o qualsiasi ragionamento utile.
- Se il frame non contiene sufficienti informazioni per decidere l’alternativa appena selezionata, seleziona il checkbox sottostante. 
- Clicca 'Continua' per procedere.
""")
st.write("---")


def convert_youtube_shorts_url(url):
    """Convert a YouTube Shorts URL to a standard YouTube video URL."""
    if "youtube.com/shorts/" in url:
        video_id = url.split("/")[-1].split("?")[0]  # Extract the video ID
        return f"https://www.youtube.com/watch?v={video_id}"
    return url

fixed_url = convert_youtube_shorts_url(sample["video_url"])

st.video(fixed_url)
# Display video thumbnail
# st.video(sample["video_url"])

# Form for user input
with st.form("annotation_form"):
    # Exclusive choice between A and B
    selected_answer = st.radio(
        "Seleziona la descrizione corretta:",
        options=[0, 1],
        index=None,
        format_func=lambda x: f"A: {sample['answer1']}" if x == 0 else f"B: {sample['answer2']}",
        key="selected_answer"
    )

    # Independent checkbox for insufficient information
    not_enough_info = st.checkbox("Il frame non contiene sufficienti informazioni per scegliere", key="not_enough_info")

    # Submit button
    submit_button = st.form_submit_button("Continua", on_click=save_choice)