Spaces:
Sleeping
Sleeping
File size: 3,537 Bytes
93e1b64 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import streamlit as st
from streamlit_agraph import agraph, Node, Edge, Config
import os
from sqlalchemy import create_engine, text
import pandas as pd
from utils import get_all_diseases_name, get_most_similar_diseases_from_uri, get_uri_from_name
import json
username = 'demo'
password = 'demo'
hostname = os.getenv('IRIS_HOSTNAME', 'localhost')
port = '1972'
namespace = 'USER'
CONNECTION_STRING = f"iris://{username}:{password}@{hostname}:{port}/{namespace}"
engine = create_engine(CONNECTION_STRING)
def handle_click_on_analyze_button():
# 1. Embed the textual description that the user entered using the model ()
# 2. Get 5 diseases with the highest cosine silimarity from the DB
# 3. Get the similarities of the embeddings of those diseases (cosine similarity of the embeddings of the nodes of such diseases)
# 4. Potentially filter out the diseases that are not similar enough (e.g. similarity < 0.8)
# 5. Augment the set of diseases: add new diseases that are similar to the ones that are already in the set, until we get 10-15 diseases
# 6. Query the embeddings of the diseases related to each clinical trial (also in the DB), to get the most similar clinical trials to our set of diseases
# 7. Use an LLM to get a summary of the clinical trials, in plain text format
# 8. Use an LLM to extract numerical data from the clinical trials (e.g. number of patients, number of deaths, etc.). Get summary statistics out of that.
# 9. Show the results to the user: graph of the diseases chosen, summary of the clinical trials, summary statistics of the clinical trials, and list of the details of the clinical trials considered
pass
st.write("# Klìnic")
description_input = st.text_input(label="Enter the disease description 👇")
st.write(":red[Here should be the graph]") # TODO remove
chart_data = pd.DataFrame(
np.random.randn(20, 3), columns=["a", "b", "c"]
) # TODO remove
st.scatter_chart(chart_data) # TODO remove
st.write("## Disease Overview")
disease_overview = ":red[lorem ipsum]" # TODO
st.write(disease_overview)
st.write("## Clinical Trials Details")
trials = []
# TODO replace mock data
with open("mock_trial.json") as f:
d = json.load(f)
for i in range(0, 5):
trials.append(d)
for trial in trials:
with st.expander(f"{trial['protocolSection']['identificationModule']['nctId']}"):
official_title = trial["protocolSection"]["identificationModule"][
"officialTitle"
]
st.write(f"##### {official_title}")
brief_summary = trial["protocolSection"]["descriptionModule"]["briefSummary"]
st.write(brief_summary)
status_module = {
"Status": trial["protocolSection"]["statusModule"]["overallStatus"],
"Status Date": trial["protocolSection"]["statusModule"][
"statusVerifiedDate"
],
}
st.write("###### Status")
st.table(status_module)
design_module = {
"Study Type": trial["protocolSection"]["designModule"]["studyType"],
# "Phases": trial["protocolSection"]["designModule"]["phases"], # breaks formatting because it is an array
"Allocation": trial["protocolSection"]["designModule"]["designInfo"][
"allocation"
],
"Participants": trial["protocolSection"]["designModule"]["enrollmentInfo"][
"count"
],
}
st.write("###### Design")
st.table(design_module)
# TODO more modules?
|