Spaces:
Sleeping
Sleeping
import subprocess | |
import sys | |
# Install necessary libraries if not already installed | |
try: | |
from lifelines.utils import concordance_index | |
except ImportError: | |
subprocess.check_call([sys.executable, "-m", "pip", "install", "lifelines"]) | |
from lifelines.utils import concordance_index | |
try: | |
from datasets import load_dataset, Dataset | |
from huggingface_hub import login, upload_file, hf_hub_download | |
except ImportError: | |
subprocess.check_call([sys.executable, "-m", "pip", "install", "datasets huggingface_hub"]) | |
from datasets import load_dataset, Dataset | |
from huggingface_hub import login, upload_file, hf_hub_download | |
from pathlib import Path | |
import pandas as pd | |
import streamlit as st | |
import os | |
# Hugging Face authentication | |
hf_token = os.getenv('hf_token') | |
login(hf_token) # Log in with the token | |
# Load the ground truth dataset | |
dataset = load_dataset("csv", data_files="hf://datasets/HLMCC/tcga-paad-ground-truth/train.csv") | |
ground_truth = pd.DataFrame(dataset['train']) | |
# Set up a directory for storing submissions | |
submission_dir = Path("submissions") | |
submission_dir.mkdir(exist_ok=True) | |
# Download leaderboard from Hugging Face if it exists | |
leaderboard_file_path = Path("leaderboard.csv") | |
try: | |
hf_hub_download( | |
repo_id="HLMCC/tcga-paad-ground-truth", | |
filename="leaderboard.csv", | |
local_dir=".", | |
repo_type="dataset", | |
use_auth_token=hf_token | |
) | |
leaderboard_df = pd.read_csv(leaderboard_file_path) | |
print("Existing leaderboard loaded successfully.") | |
except: | |
print("No existing leaderboard found. A new leaderboard will be created.") | |
leaderboard_df = pd.DataFrame(columns=["Username", "C-Index", "Submission Date"]) | |
# Streamlit app title and description | |
st.title("PAAD Survival Prediction Submission Portal") | |
# Streamlit warning message at the top of the app | |
st.warning("Your CSV file must only contain the following columns:\n\n" | |
"- `patient_id`: Unique identifier for each patient matching the patient_id in the test set\n" | |
"- `predicted_scores`: Predicted survival scores from your model\n\n" | |
"Please ensure your file follows this format before uploading.") | |
# Form to upload CSV file | |
with st.form("submission_form"): | |
username = st.text_input("Username (required)", max_chars=20) | |
uploaded_file = st.file_uploader("Upload your prediction CSV file", type="csv") | |
submit_button = st.form_submit_button("Submit") | |
# Process submission | |
if submit_button and uploaded_file: | |
if not username.strip(): | |
st.error("Username is required. Please enter your username.") | |
elif not uploaded_file: | |
st.error("Please upload your prediction CSV file.") | |
else: | |
predictions = pd.read_csv(uploaded_file) | |
# Check if file format is correct | |
if "patient_id" in predictions.columns and "predicted_scores" in predictions.columns: | |
# Merge with ground truth to calculate C-Index | |
merged = pd.merge(ground_truth, predictions, on="patient_id", how="inner") | |
if merged.empty: | |
st.error("No matching patient IDs found between the ground truth and your submission. Please check your patient IDs.") | |
c_index = concordance_index(event_times=merged["survival_time"], | |
predicted_scores=merged["predicted_scores"], | |
event_observed=merged["vital_status"], | |
) | |
# Save submission to file | |
submission_file = submission_dir / f"{username}_{uploaded_file.name}" | |
predictions.to_csv(submission_file, index=False) | |
# Update leaderboard | |
new_entry = pd.DataFrame({ | |
"Username": [username], | |
"C-Index": [c_index], | |
"Submission Date": [pd.Timestamp.now()], | |
}) | |
leaderboard_df = pd.concat([leaderboard_df, new_entry], ignore_index=True) | |
# Save updated leaderboard locally | |
leaderboard_df.to_csv(leaderboard_file_path, index=False) | |
# Upload updated leaderboard to Hugging Face | |
upload_file( | |
path_or_fileobj=leaderboard_file_path, | |
path_in_repo="leaderboard.csv", | |
repo_type="dataset", | |
repo_id="HLMCC/tcga-paad-ground-truth", | |
token=hf_token, | |
) | |
# Display the calculated C-Index to the user | |
st.success(f"Submission received! Your C-Index score: {c_index:.4f}") | |
else: | |
st.error("Incorrect file format. Ensure columns include 'patient_id' and 'predicted_scores'.") | |
# Display the leaderboard | |
st.subheader("Leaderboard") | |
# st.write(leaderboard_df.sort_values(by="C-Index", ascending=False).reset_index(drop=True)) | |
st.dataframe(leaderboard_df.sort_values(by="C-Index", ascending=False).reset_index(drop=True), height=400) | |