import sqlite3
import pandas as pd
import streamlit as st
import plotly.express as px
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from collections import Counter
import numpy as np

# Function to load data from SQLite database
def load_data(db_file):
    conn = sqlite3.connect(db_file)
    return conn

# Function to fetch data from database based on query
def fetch_data(conn, query):
    return pd.read_sql_query(query, conn)

# Function to fetch summary info from database
def fetch_summary_info(conn):
    # Fetch total count of movies
    query_total_movies = '''
        SELECT COUNT(*) as total_movies
        FROM title_basics
        WHERE titleType = 'movie'
    '''
    total_movies = fetch_data(conn, query_total_movies).iloc[0]['total_movies']

    # Fetch total count of years
    query_total_years = '''
        SELECT COUNT(DISTINCT startYear) as total_years
        FROM title_basics
        WHERE titleType = 'movie' AND startYear IS NOT NULL AND startYear != '\\N'
    '''
    total_years = fetch_data(conn, query_total_years).iloc[0]['total_years']

    # Fetch average rating of movies
    query_avg_rating = '''
        SELECT AVG(averageRating) as avg_rating
        FROM title_ratings
    '''
    avg_rating = fetch_data(conn, query_avg_rating).iloc[0]['avg_rating']

    return total_movies, total_years, avg_rating

# Function to plot global map of total films per region
def plot_global_map(df):
    # Country code to name mapping
    country_mapping = {
        'AF': 'Afghanistan', 'AX': 'Åland Islands', 'AL': 'Albania', 'DZ': 'Algeria', 'AS': 'American Samoa',
        'AD': 'Andorra', 'AO': 'Angola', 'AI': 'Anguilla', 'AQ': 'Antarctica', 'AG': 'Antigua and Barbuda',
        'AR': 'Argentina', 'AM': 'Armenia', 'AW': 'Aruba', 'AU': 'Australia', 'AT': 'Austria',
        'AZ': 'Azerbaijan', 'BS': 'Bahamas', 'BH': 'Bahrain', 'BD': 'Bangladesh', 'BB': 'Barbados',
        'BY': 'Belarus', 'BE': 'Belgium', 'BZ': 'Belize', 'BJ': 'Benin', 'BM': 'Bermuda',
        'BT': 'Bhutan', 'BO': 'Bolivia', 'BA': 'Bosnia and Herzegovina', 'BW': 'Botswana', 'BR': 'Brazil',
        'BN': 'Brunei Darussalam', 'BG': 'Bulgaria', 'BF': 'Burkina Faso', 'BI': 'Burundi', 'KH': 'Cambodia',
        'CM': 'Cameroon', 'CA': 'Canada', 'CV': 'Cape Verde', 'KY': 'Cayman Islands', 'CF': 'Central African Republic',
        'TD': 'Chad', 'CL': 'Chile', 'CN': 'China', 'CO': 'Colombia', 'KM': 'Comoros',
        'CG': 'Congo', 'CD': 'Congo, Democratic Republic of the', 'CR': 'Costa Rica', 'HR': 'Croatia', 'CU': 'Cuba',
        'CY': 'Cyprus', 'CZ': 'Czech Republic', 'DK': 'Denmark', 'DJ': 'Djibouti', 'DM': 'Dominica',
        'DO': 'Dominican Republic', 'EC': 'Ecuador', 'EG': 'Egypt', 'SV': 'El Salvador', 'GQ': 'Equatorial Guinea',
        'ER': 'Eritrea', 'EE': 'Estonia', 'ET': 'Ethiopia', 'FJ': 'Fiji', 'FI': 'Finland',
        'FR': 'France', 'GA': 'Gabon', 'GM': 'Gambia', 'GE': 'Georgia', 'DE': 'Germany',
        'GH': 'Ghana', 'GR': 'Greece', 'GD': 'Grenada', 'GT': 'Guatemala', 'GN': 'Guinea',
        'GW': 'Guinea-Bissau', 'GY': 'Guyana', 'HT': 'Haiti', 'HN': 'Honduras', 'HK': 'Hong Kong',
        'HU': 'Hungary', 'IS': 'Iceland', 'IN': 'India', 'ID': 'Indonesia', 'IR': 'Iran, Islamic Republic of',
        'IQ': 'Iraq', 'IE': 'Ireland', 'IL': 'Israel', 'IT': 'Italy', 'JM': 'Jamaica',
        'JP': 'Japan', 'JO': 'Jordan', 'KZ': 'Kazakhstan', 'KE': 'Kenya', 'KP': 'Korea, Democratic People\'s Republic of',
        'KR': 'Korea, Republic of', 'KW': 'Kuwait', 'KG': 'Kyrgyzstan', 'LA': 'Lao People\'s Democratic Republic',
        'LV': 'Latvia', 'LB': 'Lebanon', 'LS': 'Lesotho', 'LR': 'Liberia', 'LY': 'Libya',
        'LT': 'Lithuania', 'LU': 'Luxembourg', 'MO': 'Macao', 'MK': 'Macedonia, the Former Yugoslav Republic of',
        'MG': 'Madagascar', 'MW': 'Malawi', 'MY': 'Malaysia', 'MV': 'Maldives', 'ML': 'Mali',
        'MT': 'Malta', 'MR': 'Mauritania', 'MU': 'Mauritius', 'MX': 'Mexico', 'MD': 'Moldova, Republic of',
        'MN': 'Mongolia', 'ME': 'Montenegro', 'MA': 'Morocco', 'MZ': 'Mozambique', 'MM': 'Myanmar',
        'NA': 'Namibia', 'NP': 'Nepal', 'NL': 'Netherlands', 'NZ': 'New Zealand', 'NI': 'Nicaragua',
        'NE': 'Niger', 'NG': 'Nigeria', 'NO': 'Norway', 'OM': 'Oman', 'PK': 'Pakistan',
        'PW': 'Palau', 'PA': 'Panama', 'PG': 'Papua New Guinea', 'PY': 'Paraguay', 'PE': 'Peru',
        'PH': 'Philippines', 'PL': 'Poland', 'PT': 'Portugal', 'QA': 'Qatar', 'RO': 'Romania',
        'RU': 'Russian Federation', 'RW': 'Rwanda', 'WS': 'Samoa', 'SA': 'Saudi Arabia', 'SN': 'Senegal',
        'RS': 'Serbia', 'SL': 'Sierra Leone', 'SG': 'Singapore', 'SK': 'Slovakia', 'SI': 'Slovenia',
        'SB': 'Solomon Islands', 'ZA': 'South Africa', 'ES': 'Spain', 'LK': 'Sri Lanka', 'SD': 'Sudan',
        'SR': 'Suriname', 'SZ': 'Swaziland', 'SE': 'Sweden', 'CH': 'Switzerland', 'SY': 'Syrian Arab Republic',
        'TW': 'Taiwan, Province of China', 'TJ': 'Tajikistan', 'TZ': 'Tanzania, United Republic of', 'TH': 'Thailand',
        'TL': 'Timor-Leste', 'TG': 'Togo', 'TO': 'Tonga', 'TT': 'Trinidad and Tobago', 'TN': 'Tunisia',
        'TR': 'Turkey', 'TM': 'Turkmenistan', 'UG': 'Uganda', 'UA': 'Ukraine', 'AE': 'United Arab Emirates',
        'GB': 'United Kingdom', 'US': 'United States', 'UY': 'Uruguay', 'UZ': 'Uzbekistan', 'VU': 'Vanuatu',
        'VE': 'Venezuela, Bolivarian Republic of', 'VN': 'Viet Nam', 'ZM': 'Zambia', 'ZW': 'Zimbabwe'
    }

    # Map country codes to country names
    df['region'] = df['region'].map(country_mapping)

    # Group by country and count the number of films
    df_grouped = df.groupby('region').size().reset_index(name='total_films')

    # Apply log transformation to handle outliers
    df_grouped['log_total_films'] = np.log10(df_grouped['total_films'] + 1)

    # Plotting the global map
    fig = px.choropleth(df_grouped, locations='region', locationmode='country names',
                        color='log_total_films',
                        hover_name='region',
                        color_continuous_scale=px.colors.sequential.Plasma,
                        title='Total Films by Country (Log Scale)')
    fig.update_layout(geo=dict(showframe=False, showcoastlines=False, projection_type='equirectangular'))

    return fig

# Function to create word cloud of genres
def create_genre_wordcloud(conn):
    query = '''
        SELECT genres
        FROM title_basics
        WHERE titleType = 'movie' AND genres IS NOT NULL AND genres != '\\N'
    '''
    df = fetch_data(conn, query)

    # Process genres
    genres = df['genres'].str.split(',', expand=True).stack().replace('\\N', pd.NA).dropna().reset_index(drop=True)
    genre_counts = Counter(genres)

    # Generate the word cloud
    wordcloud = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(genre_counts)

    # Display the word cloud
    plt.figure(figsize=(10, 5))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis('off')
    plt.title('Top Genres in IMDb Dataset')
    st.pyplot(plt.gcf())  # Pass the current figure explicitly to st.pyplot()

# Function to find best movie of each genre by numVotes * averageRating
def find_best_movies_by_genre(conn):
    query = '''
        SELECT tb.tconst, tb.primaryTitle, tb.startYear, tb.genres, tr.averageRating, tr.numVotes
        FROM title_basics tb
        JOIN title_ratings tr ON tb.tconst = tr.tconst
        WHERE tb.titleType = 'movie' AND tb.genres IS NOT NULL AND tb.genres != '\\N'
    '''
    df = fetch_data(conn, query)

    # Split genres and select the first genre for each movie
    df['genre'] = df['genres'].str.split(',', expand=True)[0]

    # Calculate score based on numVotes * averageRating
    df['score'] = df['numVotes'] * df['averageRating']

    # Get the best movie (highest score) for each genre
    idx = df.groupby('genre')['score'].idxmax()
    best_movies_by_genre = df.loc[idx, ['genre', 'primaryTitle', 'startYear', 'averageRating', 'numVotes', 'score']] \
        .sort_values(by='score', ascending=False).reset_index(drop=True)

    return best_movies_by_genre

# Main function to orchestrate the dashboard
def main():
    # Load data from SQLite database
    db_file = 'imdb_data.db'  # Adjust path as needed
    conn = load_data(db_file)

    # Fetch summary info
    total_movies, total_years, avg_rating = fetch_summary_info(conn)
    
    # Display summary information in three columns with big bold numbers
    st.write("# IMDb Dashboard")
    st.write("## Summary Information")

    # Layout the summary information in three columns with big bold numbers
    col1, col2, col3 = st.columns(3)
    with col1:
        st.subheader("Total Movies")
        st.markdown(f"**{total_movies}**")
    with col2:
        st.subheader("Total Years")
        st.markdown(f"**{total_years}**")
    with col3:
        st.subheader("Average Rating")
        st.markdown(f"**{avg_rating:.2f}**")

    # Create a single row layout for visualizations
    st.write("## Visualizations")

    # Use st.columns() to create a single row with three columns
    col1, col2, col3 = st.columns(3)

    # Column 1: Global map of total films by country
    with col1:
        st.subheader("Global Map of Total Films by Country")
        df_movie_region = pd.read_csv('movie_region.csv')  # Replace with your actual CSV loading
        fig = plot_global_map(df_movie_region)
        st.plotly_chart(fig, use_container_width=True)

    # Column 2: Word cloud of top genres
    with col2:
        st.subheader("Word Cloud of Top Genres")
        create_genre_wordcloud(conn)

    # Column 3: Best movie of each genre
    with col3:
        st.subheader("Best Movie of Each Genre")
        best_movies_by_genre = find_best_movies_by_genre(conn)
        st.table(best_movies_by_genre.head(10))  # Displaying top 10 best movies

    # Close database connection
    conn.close()

# Execute the main function
if __name__ == '__main__':
    main()