NursNurs commited on
Commit
d9d1579
β€’
1 Parent(s): 6005918
Files changed (3) hide show
  1. app.py +118 -0
  2. apps.py +0 -36
  3. icd_embedded.csv +0 -0
app.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import from 3rd party libraries
2
+ import streamlit as st
3
+ import streamlit.components.v1 as components
4
+ # import streamlit_analytics
5
+ import pandas as pd
6
+ import numpy as np
7
+ import re
8
+ from sklearn.metrics.pairwise import cosine_similarity
9
+ import string
10
+ import nltk
11
+ from nltk.corpus import stopwords
12
+ from nltk.stem import WordNetLemmatizer
13
+ nltk.download("stopwords")
14
+ nltk.download('wordnet')
15
+ from sentence_transformers import SentenceTransformer
16
+ import plotly.express as px
17
+ import pandas as pd
18
+ from sklearn.decomposition import PCA
19
+
20
+ st.set_page_config(page_title="Mental disorder by description", page_icon="πŸ€–")
21
+
22
+ def convert_string_to_numpy_array(s):
23
+ '''Function to convert a string to a NumPy array'''
24
+ numbers_list = re.findall(r'-?\d+\.\d+', s)
25
+ return np.array(numbers_list, dtype=np.float64)
26
+
27
+ #load the model
28
+ @st.cache_resource
29
+ def get_models():
30
+ st.write('Loading the model...')
31
+ name = "stsb-bert-large"
32
+ model = SentenceTransformer(name)
33
+ st.write("The app is loaded and ready to use!")
34
+ lemmatizer = WordNetLemmatizer()
35
+ return model, lemmatizer
36
+
37
+ model, lemmatizer = get_models()
38
+ stop_words = set(stopwords.words('english'))
39
+
40
+ #load the dataframe with disorder embeddings
41
+ @st.cache_data # πŸ‘ˆ Add the caching decorator
42
+ def load_data():
43
+ df_icd = pd.read_csv('icd_embedded.csv')
44
+ df_icd['numpy_array'] = df_icd['Embeddings'].apply(convert_string_to_numpy_array)
45
+ icd_embeddings = np.array(df_icd["numpy_array"].tolist())
46
+ return df_icd, icd_embeddings
47
+
48
+ df_icd, icd_embeddings = load_data()
49
+
50
+ #create a list of disease names
51
+ @st.cache_data # πŸ‘ˆ Add the caching decorator
52
+ def create_disease_list():
53
+ disease_names = []
54
+ for name in df_icd["Disease"]:
55
+ disease_names.append(name)
56
+ return disease_names
57
+
58
+ disease_names = create_disease_list()
59
+
60
+ if 'descriptions' not in st.session_state:
61
+ st.session_state.descriptions = []
62
+
63
+ def similarity_top(descr_emb, disorder_embs):
64
+ # reshaping the character_embedding to match the shape of mental_disorder_embeddings
65
+ descr_emb = descr_emb.reshape(1, -1)
66
+ # calculating the cosine similarity
67
+ similarity_scores = cosine_similarity(disorder_embs, descr_emb)
68
+
69
+ scores_names = []
70
+ for score, name in zip(similarity_scores, disease_names):
71
+ data = {"disease_name": name, "similarity_score": score}
72
+ scores_names.append(data)
73
+
74
+ scores_names = sorted(scores_names, key=lambda x: x['similarity_score'], reverse=True)
75
+
76
+ results = []
77
+
78
+ for item in scores_names:
79
+ disease_name = item['disease_name']
80
+ similarity_score = item['similarity_score'][0]
81
+ results.append((disease_name, similarity_score))
82
+
83
+ return results[:5]
84
+
85
+
86
+ # with text_spinner_placeholder:
87
+ # with st.spinner("Please wait while your Tweet is being generated..."):
88
+ # mood_prompt = f"{mood} " if mood else ""
89
+ # if style:
90
+ # twitter = twe.Tweets(account=style)
91
+ # tweets = twitter.fetch_tweets()
92
+ # tweets_prompt = "\n\n".join(tweets)
93
+ # prompt = (
94
+ # f"Write a {mood_prompt}Tweet about {topic} in less than 120 characters "
95
+ # f"and in the style of the following Tweets:\n\n{tweets_prompt}\n\n"
96
+
97
+ # Configure Streamlit page and state
98
+ st.title("Detect the disorder")
99
+ st.markdown(
100
+ "This mini-app predicts a mental disorder based on your description."
101
+ )
102
+
103
+ input = st.text_input(label="Your description)", placeholder="Insert a description of a character")
104
+ if input:
105
+ input_embed = model.encode(input)
106
+ sim_score = similarity_top(input_embed, icd_embeddings)
107
+ st.write(sim_score)
108
+
109
+ # mood = st.text_input(
110
+ # label="Mood (e.g. inspirational, funny, serious) (optional)",
111
+ # placeholder="inspirational",
112
+ # )
113
+ # style = st.text_input(
114
+ # label="Twitter account handle to style-copy recent Tweets (optional, limited by Twitter's API)",
115
+ # placeholder="elonmusk",
116
+ # )
117
+
118
+ text_spinner_placeholder = st.empty()
apps.py DELETED
@@ -1,36 +0,0 @@
1
- # Import from 3rd party libraries
2
- import streamlit as st
3
- import streamlit.components.v1 as components
4
- # import streamlit_analytics
5
-
6
-
7
- # with text_spinner_placeholder:
8
- # with st.spinner("Please wait while your Tweet is being generated..."):
9
- # mood_prompt = f"{mood} " if mood else ""
10
- # if style:
11
- # twitter = twe.Tweets(account=style)
12
- # tweets = twitter.fetch_tweets()
13
- # tweets_prompt = "\n\n".join(tweets)
14
- # prompt = (
15
- # f"Write a {mood_prompt}Tweet about {topic} in less than 120 characters "
16
- # f"and in the style of the following Tweets:\n\n{tweets_prompt}\n\n"
17
-
18
- # Configure Streamlit page and state
19
- st.set_page_config(page_title="Tweet", page_icon="πŸ€–")
20
-
21
- st.title("Generate Tweets")
22
- st.markdown(
23
- "This mini-app predicts a mental disorder based on your description."
24
- )
25
-
26
- topic = st.text_input(label="Topic (or hashtag)", placeholder="AI")
27
- mood = st.text_input(
28
- label="Mood (e.g. inspirational, funny, serious) (optional)",
29
- placeholder="inspirational",
30
- )
31
- style = st.text_input(
32
- label="Twitter account handle to style-copy recent Tweets (optional, limited by Twitter's API)",
33
- placeholder="elonmusk",
34
- )
35
-
36
- text_spinner_placeholder = st.empty()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
icd_embedded.csv ADDED
The diff for this file is too large to render. See raw diff