import sqlite3 import pandas as pd # Load the CSV file file_path = "vyvoj_CZ051_CR.csv" df = pd.read_csv(file_path) # Clean column names (remove extra characters and spaces) df.columns = [col.strip().replace("\n", " ").replace(")", "").replace("1", "").replace("2", "") for col in df.columns] # Convert population columns to numeric (remove commas and convert to int) for col in df.columns[3:]: df[col] = df[col].astype(str).apply(lambda x: x.replace(",", "").strip() if isinstance(x, str) else x) df[col] = pd.to_numeric(df[col], errors='coerce') # Handle missing values df.dropna(subset=['Název obce', 'Okres'], inplace=True) db_path = "municipality_data.db" conn = sqlite3.connect(db_path) cursor = conn.cursor() # Create table cursor.execute(''' CREATE TABLE IF NOT EXISTS population ( kod_obce TEXT PRIMARY KEY, nazev_obce TEXT, okres TEXT, "1994" INTEGER, "1995" INTEGER, "1996" INTEGER, "1997" INTEGER, "1998" INTEGER, "1999" INTEGER, "2000" INTEGER, "2001" INTEGER, "2002" INTEGER, "2003" INTEGER, "2004" INTEGER, "2005" INTEGER, "2006" INTEGER, "2007" INTEGER, "2008" INTEGER, "2009" INTEGER, "2010" INTEGER, "2011" INTEGER, "2012" INTEGER, "2013" INTEGER, "2014" INTEGER, "2015" INTEGER, "2016" INTEGER, "2017" INTEGER, "2018" INTEGER, "2019" INTEGER, "2020" INTEGER, "2021" INTEGER, "2022" INTEGER, "2023" INTEGER ) ''') # Insert data into the table df.to_sql('population', conn, if_exists='replace', index=False) # Commit and close conn.commit() conn.close() print("Data successfully loaded into SQLite database.")