emredeveloper's picture
Upload 7 files
650b0c9
# Importing Necessay Python libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objs as go
import plotly.express as px
import plotly.io as pio
pio.templates.default = "plotly_white"
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
# reading the data
data = pd.read_csv('Netflix-Subscriptions.csv')
print(data.head())
data['Time Period'] = pd.to_datetime(data['Time Period'],
format='%d/%m/%Y')
print(data.head())
fig = go.Figure()
fig.add_trace(go.Scatter(x=data['Time Period'],
y=data['Subscribers'],
mode='lines', name='Subscribers'))
fig.update_layout(title='Netflix Quarterly Subscriptions Growth',
xaxis_title='Date',
yaxis_title='Netflix Subscriptions')
fig.show()
data['Quarterly Growth Rate'] = data['Subscribers'].pct_change() * 100
data.head()
data["Bar Color"] = data["Quarterly Growth Rate"].apply(lambda x: 'green ' if x>0 else 'red')
fig = go.Figure()
fig.add_trace(go.Bar(
x=data['Time Period'],
y=data['Quarterly Growth Rate'],
marker_color=data['Bar Color'],
name='Quarterly Growth Rate'
))
fig.update_layout(title='Netflix Quarterly Subscriptions Growth Rate',
xaxis_title='Time Period',
yaxis_title='Quarterly Growth Rate (%)')
fig.show()
data["Year"] = data["Time Period"].dt.year
yearly_growth = data.groupby('Year')['Subscribers'].pct_change().fillna(0) * 100
# Create a new column for bar color (green for positive growth, red for negative growth)
data['Bar Color'] = yearly_growth.apply(lambda x: 'green' if x > 0 else 'red')
# Plot the yearly subscriber growth rate using bar graphs
fig = go.Figure()
fig.add_trace(go.Bar(
x=data['Year'],
y=yearly_growth,
marker_color=data['Bar Color'],
name='Yearly Growth Rate'
))
fig.update_layout(title='Netflix Yearly Subscriber Growth Rate',
xaxis_title='Year',
yaxis_title='Yearly Growth Rate (%)')
fig.show()
time_series = data.set_index('Time Period')['Subscribers']
differenced_series = time_series.diff().dropna()
# Plot ACF and PACF of differenced time series
fig, axes = plt.subplots(1, 2, figsize=(12, 4))
plot_acf(differenced_series, ax=axes[0])
plot_pacf(differenced_series, ax=axes[1])
plt.show()
p, d, q = 1, 1, 2
model = ARIMA(time_series, order=(p, d, q))
results = model.fit()
print(results.summary())
future_steps = 20
predictions = results.predict(len(time_series), len(time_series) + future_steps - 2)
predictions = predictions.astype(int)
predictions
# Create a DataFrame with the original data and predictions
forecast = pd.DataFrame({'Original': time_series, 'Predictions': predictions})
# Plot the original data and predictions
fig = go.Figure()
fig.add_trace(go.Scatter(x=forecast.index, y=forecast['Predictions'],
mode='lines', name='Predictions'))
fig.add_trace(go.Scatter(x=forecast.index, y=forecast['Original'],
mode='lines', name='Original Data'))
fig.update_layout(title='Netflix Quarterly Subscription Predictions',
xaxis_title='Time Period',
yaxis_title='Subscribers',
legend=dict(x=0.1, y=0.9),
showlegend=True)
fig.show()
forecast