import subprocess
import sys

# Install necessary libraries if not already installed
try:
    from lifelines.utils import concordance_index
except ImportError:
    subprocess.check_call([sys.executable, "-m", "pip", "install", "lifelines"])
    from lifelines.utils import concordance_index

try:
    from datasets import load_dataset, Dataset
    from huggingface_hub import login, upload_file, hf_hub_download
except ImportError:
    subprocess.check_call([sys.executable, "-m", "pip", "install", "datasets huggingface_hub"])
    from datasets import load_dataset, Dataset
    from huggingface_hub import login, upload_file, hf_hub_download

from pathlib import Path
import pandas as pd
import streamlit as st
import os

# Hugging Face authentication
hf_token = os.getenv('hf_token')
login(hf_token)  # Log in with the token

# Load the ground truth dataset
dataset = load_dataset("csv", data_files="hf://datasets/HLMCC/tcga-paad-ground-truth/train.csv")
ground_truth = pd.DataFrame(dataset['train'])

# Set up a directory for storing submissions
submission_dir = Path("submissions")
submission_dir.mkdir(exist_ok=True)

# Download leaderboard from Hugging Face if it exists
leaderboard_file_path = Path("leaderboard.csv")
try:
    hf_hub_download(
        repo_id="HLMCC/tcga-paad-ground-truth", 
        filename="leaderboard.csv", 
        local_dir=".", 
        repo_type="dataset", 
        use_auth_token=hf_token
    )
    leaderboard_df = pd.read_csv(leaderboard_file_path)
    print("Existing leaderboard loaded successfully.")
except:
    print("No existing leaderboard found. A new leaderboard will be created.")
    leaderboard_df = pd.DataFrame(columns=["Username", "C-Index", "Submission Date"])

# Streamlit app title and description
st.title("PAAD Survival Prediction Submission Portal")

# Streamlit warning message at the top of the app
st.warning("Your CSV file must only contain the following columns:\n\n"
           "- `patient_id`: Unique identifier for each patient matching the patient_id in the test set\n"
           "- `predicted_scores`: Predicted survival scores from your model\n\n"
           "Please ensure your file follows this format before uploading.")

# Form to upload CSV file
with st.form("submission_form"):
    username = st.text_input("Username (required)", max_chars=20)
    uploaded_file = st.file_uploader("Upload your prediction CSV file", type="csv")
    submit_button = st.form_submit_button("Submit")

# Process submission
if submit_button and uploaded_file:
    if not username.strip():
        st.error("Username is required. Please enter your username.")
    elif not uploaded_file:
        st.error("Please upload your prediction CSV file.")
    else:
        predictions = pd.read_csv(uploaded_file)
    
        # Check if file format is correct
        if "patient_id" in predictions.columns and "predicted_scores" in predictions.columns:
            # Merge with ground truth to calculate C-Index
            merged = pd.merge(ground_truth, predictions, on="patient_id", how="inner")
            if merged.empty:
                st.error("No matching patient IDs found between the ground truth and your submission. Please check your patient IDs.")
            
            c_index = concordance_index(event_times=merged["survival_time"], 
                                        predicted_scores=merged["predicted_scores"], 
                                        event_observed=merged["vital_status"],
                                        )
    
            # Save submission to file
            submission_file = submission_dir / f"{username}_{uploaded_file.name}"
            predictions.to_csv(submission_file, index=False)
    
            # Update leaderboard
            new_entry = pd.DataFrame({
                "Username": [username],
                "C-Index": [c_index],
                "Submission Date": [pd.Timestamp.now()],
            })
            leaderboard_df = pd.concat([leaderboard_df, new_entry], ignore_index=True)
    
            # Save updated leaderboard locally
            leaderboard_df.to_csv(leaderboard_file_path, index=False)
    
            # Upload updated leaderboard to Hugging Face
            upload_file(
                path_or_fileobj=leaderboard_file_path,
                path_in_repo="leaderboard.csv",
                repo_type="dataset",
                repo_id="HLMCC/tcga-paad-ground-truth",
                token=hf_token,
            )
    
            # Display the calculated C-Index to the user
            st.success(f"Submission received! Your C-Index score: {c_index:.4f}")
        else:
            st.error("Incorrect file format. Ensure columns include 'patient_id' and 'predicted_scores'.")

# Display the leaderboard
st.subheader("Leaderboard")
# st.write(leaderboard_df.sort_values(by="C-Index", ascending=False).reset_index(drop=True))
st.dataframe(leaderboard_df.sort_values(by="C-Index", ascending=False).reset_index(drop=True), height=400)