Spaces:
Sleeping
Sleeping
File size: 4,979 Bytes
7fd9522 d05dc56 7fd9522 13dd8c6 ef94797 13dd8c6 d05dc56 220ec8b 13dd8c6 9f54d7d d05dc56 220ec8b d05dc56 f736933 ef94797 6dc5ea2 13dd8c6 d05dc56 13dd8c6 9f54d7d 13dd8c6 d05dc56 ef94797 d05dc56 ef94797 220ec8b d05dc56 220ec8b 4d71980 d05dc56 9edbb4c 220ec8b 9edbb4c ef94797 a9821d1 ef94797 24b2c65 ef94797 24b2c65 ef94797 24b2c65 ef94797 96ea551 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
import subprocess
import sys
# Install necessary libraries if not already installed
try:
from lifelines.utils import concordance_index
except ImportError:
subprocess.check_call([sys.executable, "-m", "pip", "install", "lifelines"])
from lifelines.utils import concordance_index
try:
from datasets import load_dataset, Dataset
from huggingface_hub import login, upload_file, hf_hub_download
except ImportError:
subprocess.check_call([sys.executable, "-m", "pip", "install", "datasets huggingface_hub"])
from datasets import load_dataset, Dataset
from huggingface_hub import login, upload_file, hf_hub_download
from pathlib import Path
import pandas as pd
import streamlit as st
import os
# Hugging Face authentication
hf_token = os.getenv('hf_token')
login(hf_token) # Log in with the token
# Load the ground truth dataset
dataset = load_dataset("csv", data_files="hf://datasets/HLMCC/tcga-paad-ground-truth/train.csv")
ground_truth = pd.DataFrame(dataset['train'])
# Set up a directory for storing submissions
submission_dir = Path("submissions")
submission_dir.mkdir(exist_ok=True)
# Download leaderboard from Hugging Face if it exists
leaderboard_file_path = Path("leaderboard.csv")
try:
hf_hub_download(
repo_id="HLMCC/tcga-paad-ground-truth",
filename="leaderboard.csv",
local_dir=".",
repo_type="dataset",
use_auth_token=hf_token
)
leaderboard_df = pd.read_csv(leaderboard_file_path)
print("Existing leaderboard loaded successfully.")
except:
print("No existing leaderboard found. A new leaderboard will be created.")
leaderboard_df = pd.DataFrame(columns=["Username", "C-Index", "Submission Date"])
# Streamlit app title and description
st.title("PAAD Survival Prediction Submission Portal")
# Streamlit warning message at the top of the app
st.warning("Your CSV file must only contain the following columns:\n\n"
"- `patient_id`: Unique identifier for each patient matching the patient_id in the test set\n"
"- `predicted_scores`: Predicted survival scores from your model\n\n"
"Please ensure your file follows this format before uploading.")
# Form to upload CSV file
with st.form("submission_form"):
username = st.text_input("Username (required)", max_chars=20)
uploaded_file = st.file_uploader("Upload your prediction CSV file", type="csv")
submit_button = st.form_submit_button("Submit")
# Process submission
if submit_button and uploaded_file:
if not username.strip():
st.error("Username is required. Please enter your username.")
elif not uploaded_file:
st.error("Please upload your prediction CSV file.")
else:
predictions = pd.read_csv(uploaded_file)
# Check if file format is correct
if "patient_id" in predictions.columns and "predicted_scores" in predictions.columns:
# Merge with ground truth to calculate C-Index
merged = pd.merge(ground_truth, predictions, on="patient_id", how="inner")
if merged.empty:
st.error("No matching patient IDs found between the ground truth and your submission. Please check your patient IDs.")
c_index = concordance_index(event_times=merged["survival_time"],
predicted_scores=merged["predicted_scores"],
event_observed=merged["vital_status"],
)
# Save submission to file
submission_file = submission_dir / f"{username}_{uploaded_file.name}"
predictions.to_csv(submission_file, index=False)
# Update leaderboard
new_entry = pd.DataFrame({
"Username": [username],
"C-Index": [c_index],
"Submission Date": [pd.Timestamp.now()],
})
leaderboard_df = pd.concat([leaderboard_df, new_entry], ignore_index=True)
# Save updated leaderboard locally
leaderboard_df.to_csv(leaderboard_file_path, index=False)
# Upload updated leaderboard to Hugging Face
upload_file(
path_or_fileobj=leaderboard_file_path,
path_in_repo="leaderboard.csv",
repo_type="dataset",
repo_id="HLMCC/tcga-paad-ground-truth",
token=hf_token,
)
# Display the calculated C-Index to the user
st.success(f"Submission received! Your C-Index score: {c_index:.4f}")
else:
st.error("Incorrect file format. Ensure columns include 'patient_id' and 'predicted_scores'.")
# Display the leaderboard
st.subheader("Leaderboard")
# st.write(leaderboard_df.sort_values(by="C-Index", ascending=False).reset_index(drop=True))
st.dataframe(leaderboard_df.sort_values(by="C-Index", ascending=False).reset_index(drop=True), height=400)
|