estates / load_population.py
David Hrachovy
Update
5608ac6
import sqlite3
import pandas as pd
# Load the CSV file
file_path = "vyvoj_CZ051_CR.csv"
df = pd.read_csv(file_path)
# Clean column names (remove extra characters and spaces)
df.columns = [col.strip().replace("\n", " ").replace(")", "").replace("1", "").replace("2", "") for col in df.columns]
# Convert population columns to numeric (remove commas and convert to int)
for col in df.columns[3:]:
df[col] = df[col].astype(str).apply(lambda x: x.replace(",", "").strip() if isinstance(x, str) else x)
df[col] = pd.to_numeric(df[col], errors='coerce')
# Handle missing values
df.dropna(subset=['Název obce', 'Okres'], inplace=True)
db_path = "municipality_data.db"
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
# Create table
cursor.execute('''
CREATE TABLE IF NOT EXISTS population (
kod_obce TEXT PRIMARY KEY,
nazev_obce TEXT,
okres TEXT,
"1994" INTEGER,
"1995" INTEGER,
"1996" INTEGER,
"1997" INTEGER,
"1998" INTEGER,
"1999" INTEGER,
"2000" INTEGER,
"2001" INTEGER,
"2002" INTEGER,
"2003" INTEGER,
"2004" INTEGER,
"2005" INTEGER,
"2006" INTEGER,
"2007" INTEGER,
"2008" INTEGER,
"2009" INTEGER,
"2010" INTEGER,
"2011" INTEGER,
"2012" INTEGER,
"2013" INTEGER,
"2014" INTEGER,
"2015" INTEGER,
"2016" INTEGER,
"2017" INTEGER,
"2018" INTEGER,
"2019" INTEGER,
"2020" INTEGER,
"2021" INTEGER,
"2022" INTEGER,
"2023" INTEGER
)
''')
# Insert data into the table
df.to_sql('population', conn, if_exists='replace', index=False)
# Commit and close
conn.commit()
conn.close()
print("Data successfully loaded into SQLite database.")