Spaces:

Shad0ws
/

Ask-Questions-to-Data

Runtime error

App Files Files Community

Ask-Questions-to-Data / app.py

Shad0ws

Update app.py

5e8046c over 1 year ago

raw

history blame contribute delete

3.45 kB

	import streamlit as st
	import pandas as pd
	import asyncio
	import random

	loop = asyncio.new_event_loop()
	asyncio.set_event_loop(loop)

	import sketch
	import streamlit.components.v1 as components
	from IPython.display import HTML, display
	import uuid
	import base64
	import json

	st.title("Ask Questions to Data")
	st.markdown("##### Demo Application powered by sketch package")
	st.sidebar.image("https://avatars.githubusercontent.com/u/106505054?s=200&v=4", width=100)
	st.sidebar.title("About the Package used")
	st.sidebar.markdown("##### Sketch is an AI code-writing assistant for pandas users that understands the context of the data, greatly improving the relevance of suggestions. Sketch is usable in seconds and doesn't require adding a plugin to IDE.")

	st.sidebar.title("How it works:")
	st.sidebar.markdown("##### Sketch uses efficient approximation algorithms (data sketches) to quickly summarize the data, and feed that information into language models. Right now, it does this by summarizing the columns and writing these summary statistics as additional context to be used by the code-writing prompt. In the future, the dev team hopes to feed these sketches directly into custom made data + language foundation models to get more accurate results.")

	st.sidebar.title("Usecases:")
	st.sidebar.markdown("##### Data Catalogging: General tagging (eg. PII identification), Metadata generation (names and descriptions)")
	st.sidebar.markdown("##### Data Engineering: Data cleaning and masking (compliance), Derived feature creation and extraction")
	st.sidebar.markdown("##### Data Analysis: Data questions, Data Visualizations")

	st.sidebar.caption("Github Repository: https://github.com/approximatelabs/sketch")





	def upload_data_file():
	st.session_state.file = None
	st.session_state.df = None
	file = st.file_uploader(
	label='Upload Data File',
	type=["csv","xlsx","xls"]
	)
	if file is not None:
	load_data(file)


	def load_data(file):
	st.session_state.file = file
	df = pd.read_csv(file)
	st.session_state.df = df


	# Configure session state
	if 'file' not in st.session_state:
	st.session_state.file = None
	if 'df' not in st.session_state:
	st.session_state.df = None


	if st.session_state.file is None:
	upload_data_file()


	def to_b64(data):
	return base64.b64encode(json.dumps(data).encode("utf-8")).decode("utf-8")

	if st.session_state.file is not None:
	st.session_state.file.seek(0)

	df = pd.read_csv(st.session_state.file)

	st.header("Uploaded Data:")
	st.dataframe(df)

	with st.form("my_form"):
	request_type = st.radio(
	label="Selection Panel",
	options=['Ask question about the data', 'Generate codes for new analysis'],
	index=0
	)

	request = st.text_area(
	label="Input your request",
	value="",
	height=50,
	max_chars=500
	)

	submitted = st.form_submit_button("Submit")

	if submitted:
	if request_type== 'Ask question about the data':
	if request != "":
	answer = df.sketch.ask(request, call_display=False)
	st.code(answer)
	else:
	if request != "":
	answer1 = df.sketch.howto(request, call_display=False)
	st.code(answer1)

	else:
	st.write('Please upload data file in order to ask questions to it.')