{ "cells": [ { "cell_type": "code", "execution_count": 83, "metadata": {}, "outputs": [], "source": [ "import csv\n", "\n", "list_output_ALOY_MbR = []\n", "with open(\"metricas_ALOY_MbR.csv\", \"r\") as arquivo:\n", " arquivo_csv = csv.reader(arquivo)\n", " for i, linha in enumerate(arquivo_csv):\n", " list_output_ALOY_MbR.append(float(linha[0]))" ] }, { "cell_type": "code", "execution_count": 86, "metadata": {}, "outputs": [], "source": [ " list_output_ALOY_NEOSP_SVR = []\n", "with open(\"metricas_ALOY_NEOSP_SVR.csv\", \"r\") as arquivo:\n", " arquivo_csv = csv.reader(arquivo)\n", " for i, linha in enumerate(arquivo_csv):\n", " list_output_ALOY_NEOSP_SVR.append(float(linha[0]))" ] }, { "cell_type": "code", "execution_count": 87, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1.5313348545588383" ] }, "execution_count": 87, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "\n", "np.mean(list_output_ALOY_NEOSP_SVR)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.dummy import DummyRegressor\n", "from nltk.corpus import stopwords\n", "from textblob import TextBlob\n", "import textstat\n", "from sklearn.feature_extraction.text import TfidfVectorizer\n", "from sklearn import svm\n", "from sklearn.linear_model import LinearRegression\n", "from sklearn.feature_selection import SelectKBest\n", "import pandas as pd\n", "from util import escape_tags_and_content, escape_tags, escape_strings, escape_links, escape_hex_character_codes, escape_punctuation_boundaries, escape_odd_spaces\n", "from sklearn.model_selection import cross_val_score\n", "from sklearn.model_selection import RepeatedKFold\n", "from sklearn.pipeline import make_pipeline\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.feature_selection import f_classif, f_regression\n", "from sklearn.pipeline import Pipeline" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "project_name = \"7764\"\n", "\n", "df = pd.read_json(\"database\\\\neo\\\\json\\\\{}.json\".format(project_name))\n", "\n", "def extract_leg_features():\n", " return \"Extract Leg, Sent, Subj Features\"\n", " \n", "# pipeline = Pipeline(\n", "# [\n", "# (\"vect\", Tfi()),\n", "# #(\"red\", SelectKBest(f_regression, k=50)),\n", "# (\"scaler\", StandardScaler()),\n", "# (\"clf\", svm.SVR()),\n", "# ]\n", "# )\n", "# pipeline\n", "\n", "# pipeline = Pipeline(\n", "# [\n", "# (\"vect\", extract_leg_features()),\n", "# (\"red\", SelectKBest(f_regression, k=50)),\n", "# (\"scaler\", StandardScaler()),\n", "# (\"clf\", svm.SVR()),\n", "# ]\n", "# )\n", "# pipeline\n", "\n", "\n", "pipeline = Pipeline(\n", " [ \n", " (\"clf\", DummyRegressor(strategy=\"mean\")),\n", " ]\n", ")\n", "pipeline\n", "\n", "\n", " " ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.11" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }