{ "cells": [ { "cell_type": "code", "execution_count": 2, "id": "6452ab53", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "df=pd.read_csv(\"Advertising.csv\")" ] }, { "cell_type": "code", "execution_count": 3, "id": "5533bf4b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0TVradionewspapersales
01230.137.869.222.1
1244.539.345.110.4
2317.245.969.39.3
34151.541.358.518.5
45180.810.858.412.9
\n", "
" ], "text/plain": [ " Unnamed: 0 TV radio newspaper sales\n", "0 1 230.1 37.8 69.2 22.1\n", "1 2 44.5 39.3 45.1 10.4\n", "2 3 17.2 45.9 69.3 9.3\n", "3 4 151.5 41.3 58.5 18.5\n", "4 5 180.8 10.8 58.4 12.9" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 4, "id": "5f2f38b5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Unnamed: 0 0\n", "TV 0\n", "radio 0\n", "newspaper 0\n", "sales 0\n", "dtype: int64" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 6, "id": "a3c652b1", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(200, 5)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.shape" ] }, { "cell_type": "code", "execution_count": 8, "id": "c80ca9d4", "metadata": {}, "outputs": [], "source": [ "from sklearn.tree import DecisionTreeRegressor" ] }, { "cell_type": "code", "execution_count": 15, "id": "64f6ec0f", "metadata": {}, "outputs": [], "source": [ "df_features=df.columns\n", "y=df.sales\n", "X=df[df_features]" ] }, { "cell_type": "code", "execution_count": 16, "id": "f863cb72", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Index(['Unnamed: 0', 'TV', 'radio', 'newspaper', 'sales'], dtype='object')\n" ] } ], "source": [ "print(df_features)" ] }, { "cell_type": "code", "execution_count": 17, "id": "6cbf836d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0 22.1\n", "1 10.4\n", "2 9.3\n", "3 18.5\n", "4 12.9\n", " ... \n", "195 7.6\n", "196 9.7\n", "197 12.8\n", "198 25.5\n", "199 13.4\n", "Name: sales, Length: 200, dtype: float64\n" ] } ], "source": [ "print(y)" ] }, { "cell_type": "code", "execution_count": 18, "id": "889e08ad", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Unnamed: 0 TV radio newspaper sales\n", "0 1 230.1 37.8 69.2 22.1\n", "1 2 44.5 39.3 45.1 10.4\n", "2 3 17.2 45.9 69.3 9.3\n", "3 4 151.5 41.3 58.5 18.5\n", "4 5 180.8 10.8 58.4 12.9\n", ".. ... ... ... ... ...\n", "195 196 38.2 3.7 13.8 7.6\n", "196 197 94.2 4.9 8.1 9.7\n", "197 198 177.0 9.3 6.4 12.8\n", "198 199 283.6 42.0 66.2 25.5\n", "199 200 232.1 8.6 8.7 13.4\n", "\n", "[200 rows x 5 columns]\n" ] } ], "source": [ "print(X)" ] }, { "cell_type": "code", "execution_count": 9, "id": "e486a41a", "metadata": {}, "outputs": [], "source": [ "#model cretaed\n", "df_model=DecisionTreeRegressor()" ] }, { "cell_type": "code", "execution_count": 10, "id": "c7070799", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DecisionTreeRegressor()" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_model" ] }, { "cell_type": "code", "execution_count": 19, "id": "b796e14e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DecisionTreeRegressor()" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#fit model\n", "df_model.fit(X,y)" ] }, { "cell_type": "code", "execution_count": 22, "id": "650afd98", "metadata": {}, "outputs": [], "source": [ "#calculateing the MAE\n", "from sklearn.metrics import mean_absolute_error" ] }, { "cell_type": "code", "execution_count": 23, "id": "b8292980", "metadata": {}, "outputs": [], "source": [ "predicted_sales=df_model.predict(X)" ] }, { "cell_type": "code", "execution_count": 28, "id": "776ea1bb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([22.1, 10.4, 9.3, 18.5, 12.9, 7.2, 11.8, 13.2, 4.8, 10.6, 8.6,\n", " 17.4, 9.2, 9.7, 19. , 22.4, 12.5, 24.4, 11.3, 14.6, 18. , 12.5,\n", " 5.6, 15.5, 9.7, 12. , 15. , 15.9, 18.9, 10.5, 21.4, 11.9, 9.6,\n", " 17.4, 9.5, 12.8, 25.4, 14.7, 10.1, 21.5, 16.6, 17.1, 20.7, 12.9,\n", " 8.5, 14.9, 10.6, 23.2, 14.8, 9.7, 11.4, 10.7, 22.6, 21.2, 20.2,\n", " 23.7, 5.5, 13.2, 23.8, 18.4, 8.1, 24.2, 15.7, 14. , 18. , 9.3,\n", " 9.5, 13.4, 18.9, 22.3, 18.3, 12.4, 8.8, 11. , 17. , 8.7, 6.9,\n", " 14.2, 5.3, 11. , 11.8, 12.3, 11.3, 13.6, 21.7, 15.2, 12. , 16. ,\n", " 12.9, 16.7, 11.2, 7.3, 19.4, 22.2, 11.5, 16.9, 11.7, 15.5, 25.4,\n", " 17.2, 11.7, 23.8, 14.8, 14.7, 20.7, 19.2, 7.2, 8.7, 5.3, 19.8,\n", " 13.4, 21.8, 14.1, 15.9, 14.6, 12.6, 12.2, 9.4, 15.9, 6.6, 15.5,\n", " 7. , 11.6, 15.2, 19.7, 10.6, 6.6, 8.8, 24.7, 9.7, 1.6, 12.7,\n", " 5.7, 19.6, 10.8, 11.6, 9.5, 20.8, 9.6, 20.7, 10.9, 19.2, 20.1,\n", " 10.4, 11.4, 10.3, 13.2, 25.4, 10.9, 10.1, 16.1, 11.6, 16.6, 19. ,\n", " 15.6, 3.2, 15.3, 10.1, 7.3, 12.9, 14.4, 13.3, 14.9, 18. , 11.9,\n", " 11.9, 8. , 12.2, 17.1, 15. , 8.4, 14.5, 7.6, 11.7, 11.5, 27. ,\n", " 20.2, 11.7, 11.8, 12.6, 10.5, 12.2, 8.7, 26.2, 17.6, 22.6, 10.3,\n", " 17.3, 15.9, 6.7, 10.8, 9.9, 5.9, 19.6, 17.3, 7.6, 9.7, 12.8,\n", " 25.5, 13.4])" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "predicted_sales" ] }, { "cell_type": "code", "execution_count": 29, "id": "785db183", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "5.329070518200751e-17" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mean_absolute_error(y,predicted_sales)" ] }, { "cell_type": "markdown", "id": "7c56805f", "metadata": {}, "source": [ "# splitting the data" ] }, { "cell_type": "code", "execution_count": 55, "id": "c9a97954", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 35, "id": "592afda8", "metadata": {}, "outputs": [], "source": [ "#split the data and make ordiction on the slaes\n", "from sklearn.model_selection import train_test_split\n", "train_X, value_X, train_y, value_y = train_test_split(X,y,random_state=0)" ] }, { "cell_type": "code", "execution_count": 37, "id": "9b16c96b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Unnamed: 0 TV radio newspaper sales\n", "71 72 109.8 14.3 31.7 12.4\n", "124 125 229.5 32.3 74.2 19.7\n", "184 185 253.8 21.3 30.0 17.6\n", "97 98 184.9 21.0 22.0 15.5\n", "149 150 44.7 25.8 20.6 10.1\n" ] } ], "source": [ "#model defining for the splitted data\n", "print(train_X.head())" ] }, { "cell_type": "code", "execution_count": 42, "id": "ccb49d15", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Unnamed: 0 TV radio newspaper sales\n", "18 19 69.2 20.5 18.3 11.3\n", "170 171 50.0 11.6 18.4 8.4\n", "107 108 90.4 0.3 23.2 8.7\n", "98 99 289.7 42.3 51.2 25.4\n", "177 178 170.2 7.8 35.2 11.7\n" ] } ], "source": [ "print(value_X.head())" ] }, { "cell_type": "code", "execution_count": 38, "id": "1cf49a81", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "71 12.4\n", "124 19.7\n", "184 17.6\n", "97 15.5\n", "149 10.1\n", "Name: sales, dtype: float64\n" ] } ], "source": [ "print(train_y.head())" ] }, { "cell_type": "code", "execution_count": 43, "id": "2a43ddbe", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "18 11.3\n", "170 8.4\n", "107 8.7\n", "98 25.4\n", "177 11.7\n", "Name: sales, dtype: float64\n" ] } ], "source": [ "print(value_y.head())" ] }, { "cell_type": "code", "execution_count": 44, "id": "1d693cd4", "metadata": {}, "outputs": [], "source": [ "model_df2=DecisionTreeRegressor()" ] }, { "cell_type": "code", "execution_count": 54, "id": "1c9fa0e3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DecisionTreeRegressor()" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model_df2.fit(train_X,train_y)" ] }, { "cell_type": "code", "execution_count": 48, "id": "281c1e58", "metadata": {}, "outputs": [], "source": [ "df2_predict=model_df2.predict(value_X)" ] }, { "cell_type": "code", "execution_count": 49, "id": "91b05122", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([11.3, 8.6, 8.6, 25.5, 11.7, 8.6, 7.3, 13.4, 9.3, 16.9, 24.4,\n", " 10.9, 11. , 15.7, 11.8, 13.2, 17.6, 3.2, 14.7, 16.7, 25.4, 10.3,\n", " 15.2, 12.9, 8.6, 15.3, 12.5, 22.6, 11.6, 8.6, 12.6, 23.8, 15.9,\n", " 21.5, 5.5, 6.6, 9.7, 12.9, 13.2, 7.3, 10.9, 9.5, 15.2, 15.9,\n", " 17.4, 14.2, 5.3, 8. , 16. , 11. ])" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2_predict" ] }, { "cell_type": "code", "execution_count": 51, "id": "22a076e6", "metadata": {}, "outputs": [], "source": [ "mae=mean_absolute_error(value_y,df2_predict)" ] }, { "cell_type": "code", "execution_count": 52, "id": "4584d26d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.20800000000000002" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mae" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" } }, "nbformat": 4, "nbformat_minor": 5 }