File size: 3,604 Bytes
ed6cac9 4914557 ed6cac9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
import streamlit as st
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
import matplotlib.pyplot as plt
# Load and cache dataset
@st.cache
def load_data():
return pd.read_excel('./gcp_usage_data_2024.xlsx')
df = load_data()
# Aggregate costs by service description
service_costs = df.groupby('Service Description')['Cost ($)'].sum()
# Calculate average cost
average_cost = service_costs.mean()
# Filter services with costs greater than the average cost
services_above_average = service_costs[service_costs > average_cost].sort_values(ascending=False)
# Forecast future costs for a specific service using ARIMA
def forecast_costs(service_name, steps=3):
service_data = df[df['Service Description'] == service_name].copy()
service_data['Date'] = pd.to_datetime(service_data['Date'])
service_data.set_index('Date', inplace=True)
monthly_costs = service_data['Cost ($)'].resample('M').sum()
model = ARIMA(monthly_costs, order=(1, 1, 1))
model_fit = model.fit()
forecast = model_fit.forecast(steps=steps)
return monthly_costs, forecast
# Streamlit UI
st.title('GCP Cost Analysis and Optimization')
# Display the dataset
if st.checkbox('Show Raw Data'):
st.write(df)
# Display aggregate costs by service
st.write("### Aggregated Costs by Service")
st.dataframe(service_costs.sort_values(ascending=False))
# Show services with costs greater than the average
st.write(f"### Average Cost: ${average_cost:.2f}")
st.write("### Services with Costs Greater Than Average:")
st.dataframe(services_above_average)
# Forecast costs
st.write("### Cost Forecasting")
service_name = st.selectbox('Select a Service for Forecasting', df['Service Description'].unique())
if st.button('Forecast Costs'):
monthly_costs, forecast = forecast_costs(service_name)
st.write("### Forecasted Costs")
st.write(forecast)
# Plot the results
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(monthly_costs, label='Observed Costs')
ax.plot(pd.date_range(start=monthly_costs.index[-1], periods=len(forecast) + 1, freq='M')[1:], forecast, label='Forecast', color='red')
ax.set_title('Monthly Cost Forecast')
ax.set_xlabel('Date')
ax.set_ylabel('Cost ($)')
ax.legend()
st.pyplot(fig)
# Cost Optimization
st.write("### Cost Optimization Analysis")
optimization_factor = st.slider('Optimization Factor (%)', min_value=0, max_value=100, value=25)
df['Optimized Cost ($)'] = df['Cost ($)'] * (1 - optimization_factor / 100)
total_cost_before = df['Cost ($)'].sum()
total_cost_after = df['Optimized Cost ($)'].sum()
cost_change_percentage = ((total_cost_before - total_cost_after) / total_cost_before) * 100
dollar_saving = total_cost_before - total_cost_after
st.write(f"Total Cost Before Optimization: ${total_cost_before:.2f}")
st.write(f"Total Cost After Optimization: ${total_cost_after:.2f}")
st.write(f"Percentage Change in Cost: {cost_change_percentage:.2f}%")
st.write(f"Dollar Saving: ${dollar_saving:.2f}")
# Optionally, show a chart of cost before and after optimization
fig, ax = plt.subplots(figsize=(10, 6))
services = df['Service Description'].unique()
costs_before = df.groupby('Service Description')['Cost ($)'].sum()
costs_after = df.groupby('Service Description')['Optimized Cost ($)'].sum()
ax.barh(services, costs_before, label='Before Optimization', alpha=0.7)
ax.barh(services, costs_after, label='After Optimization', alpha=0.7)
ax.set_title('Cost Before and After Optimization')
ax.set_xlabel('Cost ($)')
ax.legend()
st.pyplot(fig)
|