In [1]:
# Loading dataset disease and symptoms dataset
import pandas as pd

data = pd.read_csv("../artifacts/Disease_symptom_and_patient_profile_dataset.csv")
diseases = data.copy()
data.head()

Unnamed: 0,Disease,Fever,Cough,Fatigue,Difficulty Breathing,Age,Gender,Blood Pressure,Cholesterol Level,Outcome Variable
0,Influenza,Yes,No,Yes,Yes,19,Female,Low,Normal,Positive
1,Common Cold,No,Yes,Yes,No,25,Female,Normal,Normal,Negative
2,Eczema,No,Yes,Yes,No,25,Female,Normal,Normal,Negative
3,Asthma,Yes,Yes,No,Yes,25,Male,Normal,Normal,Positive
4,Asthma,Yes,Yes,No,Yes,25,Male,Normal,Normal,Positive


In [2]:
# Data size
data.shape

(349, 10)

In [3]:
# Info about dataset
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 349 entries, 0 to 348
Data columns (total 10 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   Disease               349 non-null    object
 1   Fever                 349 non-null    object
 2   Cough                 349 non-null    object
 3   Fatigue               349 non-null    object
 4   Difficulty Breathing  349 non-null    object
 5   Age                   349 non-null    int64 
 6   Gender                349 non-null    object
 7   Blood Pressure        349 non-null    object
 8   Cholesterol Level     349 non-null    object
 9   Outcome Variable      349 non-null    object
dtypes: int64(1), object(9)
memory usage: 27.4+ KB


In [4]:
# Exploratory data analysis
# Null values in the dataset
data.isnull().sum()

Disease                 0
Fever                   0
Cough                   0
Fatigue                 0
Difficulty Breathing    0
Age                     0
Gender                  0
Blood Pressure          0
Cholesterol Level       0
Outcome Variable        0
dtype: int64

In [5]:
# Duplicate entries
data.duplicated().sum()

49

In [6]:
# Remove duplicates
data.drop_duplicates(inplace=True)
data.duplicated().sum()

0

In [7]:
# Nos of dieseases
data['Disease'].value_counts()

Disease
Asthma                            16
Osteoporosis                      12
Stroke                            11
Hypertension                      10
Migraine                          10
                                  ..
Fibromyalgia                       1
Eating Disorders (Anorexia,...     1
Chickenpox                         1
Rabies                             1
Williams Syndrome                  1
Name: count, Length: 116, dtype: int64

In [8]:
# Age range
data['Age'].describe()

count    300.000000
mean      45.756667
std       12.596548
min       19.000000
25%       35.000000
50%       45.000000
75%       55.000000
max       90.000000
Name: Age, dtype: float64

In [9]:
# Numerical columns ie Age column categorization
# Categories : Less than 40 - Young, 40-60 - Mid, and above 60 aged
def age_category(age):
    if age < 40:
        return 'young'
    elif age < 60:
        return 'Middle'
    elif age >= 60:
        return 'Old'
    
data['Age'] = data['Age'].apply(lambda x: age_category(x))

In [10]:
data.head()

Unnamed: 0,Disease,Fever,Cough,Fatigue,Difficulty Breathing,Age,Gender,Blood Pressure,Cholesterol Level,Outcome Variable
0,Influenza,Yes,No,Yes,Yes,young,Female,Low,Normal,Positive
1,Common Cold,No,Yes,Yes,No,young,Female,Normal,Normal,Negative
2,Eczema,No,Yes,Yes,No,young,Female,Normal,Normal,Negative
3,Asthma,Yes,Yes,No,Yes,young,Male,Normal,Normal,Positive
5,Eczema,Yes,No,No,No,young,Female,Normal,Normal,Positive


In [11]:
# Categorical columns encoding
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
data['Fever'] = le.fit_transform(data['Fever'])
data['Cough'] = le.fit_transform(data['Cough'])
data['Fatigue'] = le.fit_transform(data['Fatigue'])
data['Difficulty Breathing'] = le.fit_transform(data['Difficulty Breathing'])
data['Age'] = le.fit_transform(data['Age'])
data['Gender'] = le.fit_transform(data['Gender'])
data['Blood Pressure'] = le.fit_transform(data['Blood Pressure'])
data['Cholesterol Level'] = le.fit_transform(data['Cholesterol Level'])
data['Outcome Variable'] = le.fit_transform(data['Outcome Variable'])

In [12]:
# Labelled data
data.head()

Unnamed: 0,Disease,Fever,Cough,Fatigue,Difficulty Breathing,Age,Gender,Blood Pressure,Cholesterol Level,Outcome Variable
0,Influenza,1,0,1,1,2,0,1,2,1
1,Common Cold,0,1,1,0,2,0,2,2,0
2,Eczema,0,1,1,0,2,0,2,2,0
3,Asthma,1,1,0,1,2,1,2,2,1
5,Eczema,1,0,0,0,2,0,2,2,1


In [13]:
# Features selection
# Predictor variables
X = data.drop(['Disease', 'Outcome Variable'], axis=1)

# Target variable
y = data['Outcome Variable']

In [14]:
# Splitting of dataset
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [15]:
# Split size verification
X_train.shape, X_test.shape

((240, 8), (60, 8))

In [16]:
# Model training
from sklearn.linear_model import LogisticRegression

logit_model = LogisticRegression()
logit_model = logit_model.fit(X_train,y_train)

In [17]:
# Model evaluation
logit_model.score(X_test,y_test)

0.75

In [18]:
# Catboost model
from catboost import CatBoostClassifier

cat_model = CatBoostClassifier(verbose=False)
cat_model = cat_model.fit(X_train,y_train)

In [19]:
# Model evaluation
cat_model.score(X_test,y_test)

0.7333333333333333

In [21]:
# Logistic regresson model has better performance
import pickle

with open("Logit_model.pkl", 'wb') as file:
    pickle.dump(logit_model, file)

In [1]:
import streamlit as st
from streamlit_TTS import auto_play, text_to_speech, text_to_audio

from gtts.lang import tts_langs

langs=tts_langs().keys()

#get the audio first
audio=text_to_audio("Choose a language, type some text, and click 'Speak it out!'.", language='en')
#then play it
auto_play(audio)

lang=st.selectbox("Choose a language",options=langs)
text=st.text_input("Choose a text to speak out:")
speak=st.button("Speak it out!")

if lang and text and speak:
    #plays the audio directly
    text_to_speech(text=text, language=lang)



FileNotFoundError: [WinError 2] The system cannot find the file specified

In [1]:
pip install streamlit-bokeh-events

Collecting streamlit-bokeh-events
  Downloading streamlit_bokeh_events-0.1.2-py3-none-any.whl.metadata (407 bytes)
Downloading streamlit_bokeh_events-0.1.2-py3-none-any.whl (2.0 MB)
   ---------------------------------------- 0.0/2.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/2.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/2.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/2.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/2.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/2.0 MB 93.1 kB/s eta 0:00:21
    --------------------------------------- 0.0/2.0 MB 108.9 kB/s eta 0:00:18
    --------------------------------------- 0.0/2.0 MB 108.9 kB/s eta 0:00:18
    --------------------------------------- 0.0/2.0 MB 108.9 kB/s eta 0:00:18
    --------------------------------------- 0.0/2.0 MB 108.9 kB/s eta 0:00:18
    --------------------------------------- 0.0/2.0 MB 85.3 kB/s eta 0:00:23
    ---

In [1]:
import streamlit as st
from bokeh.models.widgets import Button
from bokeh.models import CustomJS
from streamlit_bokeh_events import streamlit_bokeh_events

stt_button = Button(label="Speak", width=100)

stt_button.js_on_event("button_click", CustomJS(code="""
    var recognition = new webkitSpeechRecognition();
    recognition.continuous = true;
    recognition.interimResults = true;
 
    recognition.onresult = function (e) {
        var value = "";
        for (var i = e.resultIndex; i < e.results.length; ++i) {
            if (e.results[i].isFinal) {
                value += e.results[i][0].transcript;
            }
        }
        if ( value != "") {
            document.dispatchEvent(new CustomEvent("GET_TEXT", {detail: value}));
        }
    }
    recognition.start();
    """))

result = streamlit_bokeh_events(
    stt_button,
    events="GET_TEXT",
    key="listen",
    refresh_on_update=False,
    override_height=75,
    debounce_time=0)

if result:
    if "GET_TEXT" in result:
        st.write(result.get("GET_TEXT"))

2024-10-30 10:52:39.371 
  command:

    streamlit run c:\Users\umesh\anaconda3\envs\DeepLearning\lib\site-packages\ipykernel_launcher.py [ARGUMENTS]


In [1]:
pip install pyzmq

Note: you may need to restart the kernel to use updated packages.
