import gradio as gr
import pandas as pd
import os
import plotly.express as px
import numpy as np

datadir = 'data/emissions/complete'

model_param_df = pd.read_csv('data/model_parameters.csv', header=0)
model_performance_df = pd.read_csv('data/performance.csv', header=0)
emissions_df = pd.read_csv('data/co2_data.csv',header=0)
modalities_df = pd.read_csv('data/modalities_data.csv',header=0)
finetuned_df = emissions_df[~emissions_df['task'].str.contains('zero')]
finetuned_df['task'] = finetuned_df['task'].str.replace('_',' ')

fig0 = px.scatter(emissions_df, x="num_params", y="query emissions (g)", color="model", log_x=True, log_y=True)
fig0.update_layout(xaxis={'categoryorder':'mean ascending'})
fig0.update_layout(yaxis_title='Total carbon emitted (g)')
fig0.update_layout(xaxis_title='Number of Parameters')


fig1 = px.box(finetuned_df, x="task", y="query_energy (kWh)", color="task", log_y=True)
fig1.update_layout(xaxis={'categoryorder':'mean ascending'})
fig1.update_layout(yaxis_title='Total energy used (Wh)')
fig1.update_layout(xaxis_title='Task')

fig2 = px.scatter(modalities_df, x="num_params", y="query emissions (g)", color="modality",
             log_x=True, log_y=True, custom_data=['model','task'])

fig2.update_traces(
    hovertemplate="<br>".join([
        "Model: %{customdata[0]}",
        "Task: %{customdata[1]}",
    ])
)
fig2.update_layout(xaxis_title='Model size (number of parameters)')
fig2.update_layout(yaxis_title='Model emissions (g of CO<sub>2</sub>)')


demo = gr.Blocks()

with demo:
    gr.Markdown("# CO2 Inference Demo")
    gr.Markdown("### TL;DR - We ran a series of experiments to measure the energy efficiency and carbon emissions of different\
    models from the HuggingFace Hub, and to see how different tasks and models compare.")
    gr.Markdown("### We found that multi-purpose, generative models are orders of magnitude more energy-intensive than task-specific systems\
        for a variety of tasks, even for models with a similar number of parameters")
    gr.Markdown("### Explore the plots below to get more insights about the different models and tasks from our study.")
    with gr.Accordion("More details about our methodology:", open=False):
        gr.Markdown("We chose ten ML tasks: text classification, token classification, question answering, \
        ), masked language modeling, text generation, summarization, image classification, object detection, \
         image captioning and image generation. For each of the taks, we chose three of the most downloaded datasets and 8 of the most \
        downloaded models from the Hugging Face Hub. We ran each of the models ten times over a 1,000 sample from each of the models and measured the energy consumed and carbon emitted.")
    with gr.Row():
        with gr.Column():
            gr.Markdown("## All models from our study (carbon)")
            gr.Markdown("### Double click on the model name in the list on the right to isolate its datapoints:")
            gr.Markdown("The axes of the plot are in logarithmic scale, meaning that the difference between the least carbon-intensive and the most carbon-intensive models is over 9,000 times!")
            gr.Plot(fig0)
    with gr.Row():
        with gr.Column():
            gr.Markdown("## Task-by-task comparison (energy)")
            gr.Markdown("### Grouping the models by task, we can see different patterns emerge:")
            gr.Markdown("Image generation is by far the most energy- and carbon-intensive task from the ones studied, and text classification \
            is the least.")
            gr.Plot(fig1)
        with gr.Column():
            gr.Markdown("##  Modality comparison (carbon)")
            gr.Markdown("### Grouping the models by their modality shows the different characteristics of each one:")
            gr.Markdown("We can see that tasks involving images (image-to-text, image-to-category) require more energy and emit more carbon\
            than ones inolving text.")
            gr.Plot(fig2)


demo.launch()