Shad0ws's picture
Update app.py
5e8046c
import streamlit as st
import pandas as pd
import asyncio
import random
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
import sketch
import streamlit.components.v1 as components
from IPython.display import HTML, display
import uuid
import base64
import json
st.title("Ask Questions to Data")
st.markdown("##### Demo Application powered by sketch package")
st.sidebar.image("https://avatars.githubusercontent.com/u/106505054?s=200&v=4", width=100)
st.sidebar.title("About the Package used")
st.sidebar.markdown("##### Sketch is an AI code-writing assistant for pandas users that understands the context of the data, greatly improving the relevance of suggestions. Sketch is usable in seconds and doesn't require adding a plugin to IDE.")
st.sidebar.title("How it works:")
st.sidebar.markdown("##### Sketch uses efficient approximation algorithms (data sketches) to quickly summarize the data, and feed that information into language models. Right now, it does this by summarizing the columns and writing these summary statistics as additional context to be used by the code-writing prompt. In the future, the dev team hopes to feed these sketches directly into custom made data + language foundation models to get more accurate results.")
st.sidebar.title("Usecases:")
st.sidebar.markdown("##### Data Catalogging: General tagging (eg. PII identification), Metadata generation (names and descriptions)")
st.sidebar.markdown("##### Data Engineering: Data cleaning and masking (compliance), Derived feature creation and extraction")
st.sidebar.markdown("##### Data Analysis: Data questions, Data Visualizations")
st.sidebar.caption("Github Repository: https://github.com/approximatelabs/sketch")
def upload_data_file():
st.session_state.file = None
st.session_state.df = None
file = st.file_uploader(
label='Upload Data File',
type=["csv","xlsx","xls"]
)
if file is not None:
load_data(file)
def load_data(file):
st.session_state.file = file
df = pd.read_csv(file)
st.session_state.df = df
# Configure session state
if 'file' not in st.session_state:
st.session_state.file = None
if 'df' not in st.session_state:
st.session_state.df = None
if st.session_state.file is None:
upload_data_file()
def to_b64(data):
return base64.b64encode(json.dumps(data).encode("utf-8")).decode("utf-8")
if st.session_state.file is not None:
st.session_state.file.seek(0)
df = pd.read_csv(st.session_state.file)
st.header("Uploaded Data:")
st.dataframe(df)
with st.form("my_form"):
request_type = st.radio(
label="Selection Panel",
options=['Ask question about the data', 'Generate codes for new analysis'],
index=0
)
request = st.text_area(
label="Input your request",
value="",
height=50,
max_chars=500
)
submitted = st.form_submit_button("Submit")
if submitted:
if request_type== 'Ask question about the data':
if request != "":
answer = df.sketch.ask(request, call_display=False)
st.code(answer)
else:
if request != "":
answer1 = df.sketch.howto(request, call_display=False)
st.code(answer1)
else:
st.write('Please upload data file in order to ask questions to it.')