Spaces:
Sleeping
Sleeping
File size: 6,363 Bytes
d9cbdf1 138d0d5 d9cbdf1 8823c0c 138d0d5 57bf5d5 8823c0c 138d0d5 00eae37 138d0d5 ff62d04 8823c0c 138d0d5 d88bb9d 138d0d5 ccaeded 138d0d5 8823c0c 138d0d5 8823c0c 138d0d5 8823c0c 138d0d5 fadec32 138d0d5 4a58e4d 138d0d5 8823c0c 138d0d5 8823c0c 4a58e4d 138d0d5 8823c0c 138d0d5 d88bb9d 138d0d5 8823c0c 31b5d12 138d0d5 31b5d12 8823c0c ccaeded 4a58e4d 138d0d5 8823c0c 138d0d5 8823c0c 60f83a0 19ec3d7 ccaeded 19ec3d7 8823c0c 60f83a0 4a58e4d e107527 4a58e4d 60f83a0 ccaeded 19ec3d7 a0aa451 8823c0c a0aa451 138d0d5 8823c0c 00eae37 8823c0c 4a58e4d 8823c0c e107527 31b5d12 4a58e4d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
import streamlit as st
import pandas as pd
from huggingface_hub import HfApi, hf_hub_download
# Constants
HF_REPO = "giobin/MAIA_human_assessment_annotations"
CSV_FILENAME = "user_selections.csv"
# Function to assign samples to users
def assign_samples(csv_path):
df = pd.read_csv(csv_path)
group_1 = df[(df["pool_pos"] == 1) & (~df["question_category"].str.endswith("_B"))].iloc[100:150] # select 50 sampels from the 100th
group_2 = df[(df["pool_pos"] == 2) & (~df["question_category"].str.endswith("_B"))].iloc[100:150]
group_3 = df[(df["pool_pos"] == 3) & (~df["question_category"].str.endswith("_B"))].iloc[100:150]
return {
"Bernardo": group_1,
"Alessandro": group_1,
"Alessio": group_1,
"Lenci": group_2,
"Lucia": group_2,
"Davide": group_2,
"Giovanni": group_3,
"Raffaella": group_3,
}
# Function to load existing annotations from Hugging Face Hub
def load_existing_annotations():
try:
file_path = hf_hub_download(HF_REPO, CSV_FILENAME, repo_type="dataset", token=st.secrets["HF_TOKEN"])
return pd.read_csv(file_path)
except Exception:
return pd.DataFrame(columns=["username", "id"])
# Load datasets
csv_file = "static/mc.csv"
assignments = assign_samples(csv_file)
existing_annotations = load_existing_annotations()
# Valid users
valid_users = list(assignments.keys())
# Initialize session state variables
if "username" not in st.session_state:
st.session_state.username = None
if "index" not in st.session_state:
st.session_state.index = 0
if "results" not in st.session_state:
st.session_state.results = []
# User selectiontion
def update_name():
"""Set username and reset index."""
st.session_state.username = st.session_state.selected_user
st.session_state.index = 0 # Reset progress
if st.session_state.username is None:
with st.form("user_form"):
st.write("### Seleziona il tuo nome")
selected_user = st.selectbox("Nome:", valid_users, key="selected_user")
submit_button = st.form_submit_button("Inizia il task", on_click=update_name)
st.stop()
# Retrieve assigned dataset and filter out already labeled samples
full_dataset = assignments[st.session_state.username].reset_index(drop=True)
user_labeled_ids = existing_annotations[existing_annotations["username"] == st.session_state.username]["id"].tolist()
dataset = full_dataset[~full_dataset["id"].isin(user_labeled_ids)].reset_index(drop=True)
# Check if all samples are labeled
if st.session_state.index >= len(dataset):
st.write("### Ottimo. Hai completato il tuo task! 🎉")
st.stop()
# Function to push updated annotations to Hugging Face Hub
def push_to_hf_hub(csv_path):
api = HfApi()
try:
api.create_repo(HF_REPO, repo_type="dataset", exist_ok=True, token=st.secrets["HF_TOKEN"])
api.upload_file(path_or_fileobj=csv_path, path_in_repo=CSV_FILENAME, repo_id=HF_REPO, repo_type="dataset", token=st.secrets["HF_TOKEN"])
print(f"Dataset updated: https://huggingface.co/datasets/{HF_REPO}")
except Exception as e:
print(f"Error pushing to HF: {e}")
# Function to save user choice
def save_choice():
sample = dataset.iloc[st.session_state.index]
selected_answer = st.session_state.get("selected_answer", None)
not_enough_info = st.session_state.get("not_enough_info", False)
if selected_answer is not None:
st.session_state.results.append({
"username": st.session_state.username,
"id": sample["id"],
"video_id": sample["video_id"],
"answer1": sample["answer1"],
"answer2": sample["answer2"],
"selected_answer": selected_answer,
"target": sample["target"],
"not_enough_info": not_enough_info
})
st.session_state.index += 1
# Save results and push to Hugging Face Hub if all samples are labeled
if st.session_state.index >= len(dataset):
st.write("### Ottimo. Hai completato il tuo task! 🎉")
result_df = pd.DataFrame(st.session_state.results)
csv_path = "user_selections.csv"
if not existing_annotations.empty:
result_df = pd.concat([existing_annotations, result_df]).drop_duplicates(subset=["username", "id"], keep="last")
result_df.to_csv(csv_path, index=False)
push_to_hf_hub(csv_path)
st.stop()
# Display current sample
sample = dataset.iloc[st.session_state.index]
# Page title and user information
st.markdown("<h1 style='text-align: center; font-size: 50px;'>MAIA Sample</h1>", unsafe_allow_html=True)
st.markdown(f"<h3 style='text-align: center;'>User: {st.session_state.username}</h3>", unsafe_allow_html=True)
st.write("\n\n")
# Instructions
st.markdown("""
### Istruzioni:
- Osserva attentamente il frame del video (senza premere play) e cerca di capire il contesto della scena
- Valuta le opzioni (A e B) e seleziona quella che ritieni più attinente al video. Per selezionare l'alternativa usa l'immagine, la tua conoscenza e/o qualsiasi ragionamento utile.
- Se il frame non contiene sufficienti informazioni per decidere l’alternativa appena selezionata, seleziona il checkbox sottostante.
- Clicca 'Continua' per procedere.
""")
st.write("---")
def convert_youtube_shorts_url(url):
"""Convert a YouTube Shorts URL to a standard YouTube video URL."""
if "youtube.com/shorts/" in url:
video_id = url.split("/")[-1].split("?")[0] # Extract the video ID
return f"https://www.youtube.com/watch?v={video_id}"
return url
fixed_url = convert_youtube_shorts_url(sample["video_url"])
st.video(fixed_url)
# Display video thumbnail
# st.video(sample["video_url"])
# Form for user input
with st.form("annotation_form"):
# Exclusive choice between A and B
selected_answer = st.radio(
"Seleziona la descrizione corretta:",
options=[0, 1],
index=None,
format_func=lambda x: f"A: {sample['answer1']}" if x == 0 else f"B: {sample['answer2']}",
key="selected_answer"
)
# Independent checkbox for insufficient information
not_enough_info = st.checkbox("Il frame non contiene sufficienti informazioni per scegliere", key="not_enough_info")
# Submit button
submit_button = st.form_submit_button("Continua", on_click=save_choice)
|