Map-Data / src /app.py
akhil-vaidya's picture
fix: data_sourcing
dd0a70c
raw
history blame
1.61 kB
######################################## IMPORTING REQUIRED LIBRARIES ####################################
import os
import sys
import pandas as pd
import streamlit as st
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
data_folder = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data')
from utilities import get_data, input_filter, clean_data, autogenerate_labels
def data_sourcing(left_lat, left_lon, dist, loc_name):
lat, lon = input_filter(lat = left_lat, lon=left_lon)
df = get_data(lat, lon, dist)
df.to_csv(f'{data_folder}/LOCATION_{loc_name}_DATA.csv', index=False)
return df
def data_clean_for_training(df):
df = clean_data(df)
df.to_csv(f'{data_folder}/MMR_DATA_CLEAN.csv', index=False)
return df
st.title("Map Data Analysis - ETL Pipeline")
left_lat = st.number_input("Enter the left latitude", value=18.889833)
left_lon = st.number_input("Enter the left longitude", value=72.779844)
print(left_lat, left_lon)
loc_name = st.text_input("Enter the location name", value="Mumbai")
dist = st.number_input("Enter the distance", value=35)
if st.button("Run ETL Pipeline"):
df = data_sourcing(left_lat, left_lon, dist, loc_name)
if df:
st.write("Data loaded successfully !!")
df = clean_data(df)
labelled_df, embeddings_df = autogenerate_labels(df)
labelled_df.to_csv(f'{data_folder}/DATA_{loc_name}_CLEAN_LABELLED.csv', index=False)
embeddings_df.to_csv(f'{data_folder}/DATA_{loc_name}_CLEAN_EMBEDDINGS.csv', index=False)
st.write("ETL Pipeline executed successfully !!")