File size: 2,229 Bytes
b6fadc7
 
 
3108590
b6fadc7
 
 
df63074
b6fadc7
 
 
 
df63074
 
 
 
 
 
 
b6fadc7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b1222b7
 
 
 
 
 
b6fadc7
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import os

import streamlit as st
from st_app import launch_bot

import nest_asyncio
import asyncio
import uuid

import sqlite3
from datasets import load_dataset

# Setup for HTTP API Calls to Amplitude Analytics
if 'device_id' not in st.session_state:
    st.session_state.device_id = str(uuid.uuid4())

if "feedback_key" not in st.session_state:
    st.session_state.feedback_key = 0

def setup_db():
    db_path = 'cfpb_database.db'
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()        

    with st.spinner("Loading data... Please wait..."):
        def table_populated() -> bool:
            cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='cfpb_complaints'")
            result = cursor.fetchone()
            if not result:
                    return False
            return True

        if table_populated():
            print("Database table already populated, skipping setup")
            conn.close()
            return
        else:
            print("Populating database table")

        # Execute the SQL commands to create the database table
        with open('create_table.sql', 'r') as sql_file:
            sql_script = sql_file.read()
            cursor.executescript(sql_script)

        hf_token = os.getenv('HF_TOKEN')

        # Load data into cfpb_complaints table
        df = load_dataset("vectara/cfpb-complaints", data_files="cfpb_complaints.csv", token=hf_token)['train'].to_pandas()
        df.to_sql('cfpb_complaints', conn, if_exists='replace', index=False)

        df = load_dataset("vectara/cfpb-complaints", data_files="cfpb_county_populations.csv", token=hf_token)['train'].to_pandas()
        df.to_sql('cfpb_county_populations', conn, if_exists='replace', index=False)

        df = load_dataset("vectara/cfpb-complaints", data_files="cfpb_zip_to_county.csv", token=hf_token)['train'].to_pandas()
        df.to_sql('cfpb_zip_to_county', conn, if_exists='replace', index=False)

        # Commit changes and close connection
        conn.commit()
        conn.close()

if __name__ == "__main__":
    st.set_page_config(page_title="CFPB Complaints Assistant", layout="wide")
    setup_db()

    nest_asyncio.apply()
    asyncio.run(launch_bot())