trangannh
/

job-recommendation-model

Tabular Classification

Scikit-learn

job-recommendation

Model card Files Files and versions Community

trangannh commited on Jun 25, 2024

Commit

d061757

verified ·

1 Parent(s): b38ebef

Upload Job_Recommendation_System.ipynb

Browse files

Files changed (1) hide show

Job_Recommendation_System.ipynb +1230 -0

Job_Recommendation_System.ipynb ADDED Viewed

	@@ -0,0 +1,1230 @@

+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_NrjL2ccH3yp"
+      },
+      "source": [
+        "RECOMMENDATION MODEL"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 20,
+      "metadata": {
+        "id": "IZfnA6W_GDyf"
+      },
+      "outputs": [],
+      "source": [
+        "import numpy as np\n",
+        "import pandas as pd\n",
+        "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+        "from sklearn.metrics.pairwise import cosine_similarity"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 21,
+      "metadata": {
+        "id": "MV-7idG1F_NU"
+      },
+      "outputs": [],
+      "source": [
+        "# Mock data creation\n",
+        "def create_mock_data():\n",
+        "    users_data = \"rematch_train_candidate_field.csv\"\n",
+        "    applicants = pd.read_csv(users_data)\n",
+        "\n",
+        "    jobs_data = \"jobs_data.csv\"\n",
+        "    companies = pd.read_csv(jobs_data)\n",
+        "\n",
+        "    train_applicants = applicants\n",
+        "    test_data = \"1st_test.csv\"\n",
+        "    # \"/content/sample_data/test_train.csv\"\n",
+        "    test_applicants = pd.read_csv(test_data)\n",
+        "\n",
+        "    return train_applicants, test_applicants, companies"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 22,
+      "metadata": {
+        "id": "wF1oZ6Ez96BE"
+      },
+      "outputs": [],
+      "source": [
+        "train_user, test_user, jobs = create_mock_data()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 23,
+      "metadata": {},
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "<class 'pandas.core.frame.DataFrame'>\n"
+          ]
+        }
+      ],
+      "source": [
+        "print(type(train_user))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 24,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Gj8tJNrph8Go",
+        "outputId": "a44b8cf0-a56f-4cd2-bbda-ca9bcabf35a0"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Training data size: 23724\n",
+            "Test data size: 4745\n"
+          ]
+        }
+      ],
+      "source": [
+        "print(\"Training data size:\", train_user.shape[0])\n",
+        "print(\"Test data size:\", test_user.shape[0])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 25,
+      "metadata": {
+        "id": "d0XY4al7K0UT"
+      },
+      "outputs": [],
+      "source": [
+        "list_hard_skill = [test_user[\"hard_skill\"].iloc[i].replace(\"[\", \"\").replace(\"]\", \"\").replace(\"'\", \"\") for i in range(len(test_user))]\n",
+        "list_soft_skill = [test_user[\"soft_skill\"].iloc[i].replace(\"[\", \"\").replace(\"]\", \"\").replace(\"'\", \"\") for i in range(len(test_user))]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 26,
+      "metadata": {},
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "<class 'list'>\n"
+          ]
+        }
+      ],
+      "source": [
+        "print(type(list_hard_skill))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 27,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 213
+        },
+        "id": "JOZ9_NlLK8uS",
+        "outputId": "17d09f55-192f-4486-bb47-b56f525d44a3"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>User ID</th>\n",
+              "      <th>candidate_field</th>\n",
+              "      <th>label</th>\n",
+              "      <th>hard_skill</th>\n",
+              "      <th>soft_skill</th>\n",
+              "      <th>final_hard_skill</th>\n",
+              "      <th>final_soft_skill</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>14649</td>\n",
+              "      <td>it jobs</td>\n",
+              "      <td>1</td>\n",
+              "      <td>['act', 'advertising sales', 'algorithms', 'bu...</td>\n",
+              "      <td>['collaboration', 'decision making', 'operatio...</td>\n",
+              "      <td>act, advertising sales, algorithms, business, ...</td>\n",
+              "      <td>collaboration, decision making, operations, wr...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>801</td>\n",
+              "      <td>marketing</td>\n",
+              "      <td>0</td>\n",
+              "      <td>['act', 'brand communication', 'business', 'bu...</td>\n",
+              "      <td>['collaboration', 'customer service', 'managem...</td>\n",
+              "      <td>act, brand communication, business, business d...</td>\n",
+              "      <td>collaboration, customer service, management</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>4393</td>\n",
+              "      <td>accounting</td>\n",
+              "      <td>0</td>\n",
+              "      <td>['application', 'balance sheet', 'finance', 'p...</td>\n",
+              "      <td>['filing', 'management']</td>\n",
+              "      <td>application, balance sheet, finance, property ...</td>\n",
+              "      <td>filing, management</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "   User ID candidate_field  label  \\\n",
+              "0    14649         it jobs      1   \n",
+              "1      801       marketing      0   \n",
+              "2     4393      accounting      0   \n",
+              "\n",
+              "                                          hard_skill  \\\n",
+              "0  ['act', 'advertising sales', 'algorithms', 'bu...   \n",
+              "1  ['act', 'brand communication', 'business', 'bu...   \n",
+              "2  ['application', 'balance sheet', 'finance', 'p...   \n",
+              "\n",
+              "                                          soft_skill  \\\n",
+              "0  ['collaboration', 'decision making', 'operatio...   \n",
+              "1  ['collaboration', 'customer service', 'managem...   \n",
+              "2                           ['filing', 'management']   \n",
+              "\n",
+              "                                    final_hard_skill  \\\n",
+              "0  act, advertising sales, algorithms, business, ...   \n",
+              "1  act, brand communication, business, business d...   \n",
+              "2  application, balance sheet, finance, property ...   \n",
+              "\n",
+              "                                    final_soft_skill  \n",
+              "0  collaboration, decision making, operations, wr...  \n",
+              "1        collaboration, customer service, management  \n",
+              "2                                 filing, management  "
+            ]
+          },
+          "execution_count": 27,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "test_user[\"final_hard_skill\"] = pd.DataFrame(list_hard_skill)\n",
+        "test_user[\"final_soft_skill\"] = pd.DataFrame(list_soft_skill)\n",
+        "test_user.head(3)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 28,
+      "metadata": {
+        "id": "kYbjYsDjABda"
+      },
+      "outputs": [],
+      "source": [
+        "list_hard_skill = [train_user[\"hard_skill\"].iloc[i].replace(\"[\", \"\").replace(\"]\", \"\").replace(\"'\", \"\") for i in range(len(train_user))]\n",
+        "list_soft_skill = [train_user[\"soft_skill\"].iloc[i].replace(\"[\", \"\").replace(\"]\", \"\").replace(\"'\", \"\") for i in range(len(train_user))]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 29,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 213
+        },
+        "id": "GC8bn3cjB8D5",
+        "outputId": "436e843d-425e-4ce2-e551-e4f249bdd10b"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>User ID</th>\n",
+              "      <th>candidate_field</th>\n",
+              "      <th>label</th>\n",
+              "      <th>hard_skill</th>\n",
+              "      <th>soft_skill</th>\n",
+              "      <th>final_hard_skill</th>\n",
+              "      <th>final_soft_skill</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>1</td>\n",
+              "      <td>retail &amp; consumer products</td>\n",
+              "      <td>0</td>\n",
+              "      <td>['business', 'merchandising', 'sales', 'service']</td>\n",
+              "      <td>['customer service']</td>\n",
+              "      <td>business, merchandising, sales, service</td>\n",
+              "      <td>customer service</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>2</td>\n",
+              "      <td>sales</td>\n",
+              "      <td>0</td>\n",
+              "      <td>['application', 'business', 'business requirem...</td>\n",
+              "      <td>['accountability', 'collaboration', 'innovatio...</td>\n",
+              "      <td>application, business, business requirements, ...</td>\n",
+              "      <td>accountability, collaboration, innovation, man...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>3</td>\n",
+              "      <td>healthcare &amp; medical</td>\n",
+              "      <td>0</td>\n",
+              "      <td>['application', 'cancer', 'endocrinology', 'hy...</td>\n",
+              "      <td>['research', 'training and development']</td>\n",
+              "      <td>application, cancer, endocrinology, hydrothera...</td>\n",
+              "      <td>research, training and development</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "   User ID             candidate_field  label  \\\n",
+              "0        1  retail & consumer products      0   \n",
+              "1        2                       sales      0   \n",
+              "2        3        healthcare & medical      0   \n",
+              "\n",
+              "                                          hard_skill  \\\n",
+              "0  ['business', 'merchandising', 'sales', 'service']   \n",
+              "1  ['application', 'business', 'business requirem...   \n",
+              "2  ['application', 'cancer', 'endocrinology', 'hy...   \n",
+              "\n",
+              "                                          soft_skill  \\\n",
+              "0                               ['customer service']   \n",
+              "1  ['accountability', 'collaboration', 'innovatio...   \n",
+              "2           ['research', 'training and development']   \n",
+              "\n",
+              "                                    final_hard_skill  \\\n",
+              "0            business, merchandising, sales, service   \n",
+              "1  application, business, business requirements, ...   \n",
+              "2  application, cancer, endocrinology, hydrothera...   \n",
+              "\n",
+              "                                    final_soft_skill  \n",
+              "0                                   customer service  \n",
+              "1  accountability, collaboration, innovation, man...  \n",
+              "2                 research, training and development  "
+            ]
+          },
+          "execution_count": 29,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "train_user[\"final_hard_skill\"] = pd.DataFrame(list_hard_skill)\n",
+        "train_user[\"final_soft_skill\"] = pd.DataFrame(list_soft_skill)\n",
+        "train_user.head(3)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 30,
+      "metadata": {
+        "id": "znBy9q8XDcM7"
+      },
+      "outputs": [],
+      "source": [
+        "list_hard_skill = [jobs[\"Hard Skills\"].iloc[i].replace(\"[\", \"\").replace(\"]\", \"\").replace(\"'\", \"\") for i in range(len(jobs))]\n",
+        "list_soft_skill = [jobs[\"Soft Skills\"].iloc[i].replace(\"[\", \"\").replace(\"]\", \"\").replace(\"'\", \"\") for i in range(len(jobs))]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 31,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 213
+        },
+        "id": "knFii8o3EQmv",
+        "outputId": "47afb484-0765-4ad9-8765-d084673450ac"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>Job ID</th>\n",
+              "      <th>Major</th>\n",
+              "      <th>Hard Skills</th>\n",
+              "      <th>Soft Skills</th>\n",
+              "      <th>final_hard_skill</th>\n",
+              "      <th>final_soft_skill</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>1</td>\n",
+              "      <td>accounting</td>\n",
+              "      <td>['business', 'finance', 'excel', 'tax', 'servi...</td>\n",
+              "      <td>['management', 'planning', 'operations', 'lead...</td>\n",
+              "      <td>business, finance, excel, tax, service, data, ...</td>\n",
+              "      <td>management, planning, operations, leadership, ...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>2</td>\n",
+              "      <td>administration &amp; office support</td>\n",
+              "      <td>['service', 'business', 'data', 'excel', 'appl...</td>\n",
+              "      <td>['management', 'customer service', 'microsoft ...</td>\n",
+              "      <td>service, business, data, excel, application, s...</td>\n",
+              "      <td>management, customer service, microsoft office...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>3</td>\n",
+              "      <td>advertising, arts &amp; media</td>\n",
+              "      <td>['business', 'digital', 'sales', 'service', 'a...</td>\n",
+              "      <td>['management', 'social media', 'writing', 'com...</td>\n",
+              "      <td>business, digital, sales, service, application...</td>\n",
+              "      <td>management, social media, writing, communicati...</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "   Job ID                            Major  \\\n",
+              "0       1                       accounting   \n",
+              "1       2  administration & office support   \n",
+              "2       3        advertising, arts & media   \n",
+              "\n",
+              "                                         Hard Skills  \\\n",
+              "0  ['business', 'finance', 'excel', 'tax', 'servi...   \n",
+              "1  ['service', 'business', 'data', 'excel', 'appl...   \n",
+              "2  ['business', 'digital', 'sales', 'service', 'a...   \n",
+              "\n",
+              "                                         Soft Skills  \\\n",
+              "0  ['management', 'planning', 'operations', 'lead...   \n",
+              "1  ['management', 'customer service', 'microsoft ...   \n",
+              "2  ['management', 'social media', 'writing', 'com...   \n",
+              "\n",
+              "                                    final_hard_skill  \\\n",
+              "0  business, finance, excel, tax, service, data, ...   \n",
+              "1  service, business, data, excel, application, s...   \n",
+              "2  business, digital, sales, service, application...   \n",
+              "\n",
+              "                                    final_soft_skill  \n",
+              "0  management, planning, operations, leadership, ...  \n",
+              "1  management, customer service, microsoft office...  \n",
+              "2  management, social media, writing, communicati...  "
+            ]
+          },
+          "execution_count": 31,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "jobs[\"final_hard_skill\"] = pd.DataFrame(list_hard_skill)\n",
+        "jobs[\"final_soft_skill\"] = pd.DataFrame(list_soft_skill)\n",
+        "jobs.head(3)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 32,
+      "metadata": {
+        "id": "wiDiHL6lStnd"
+      },
+      "outputs": [],
+      "source": [
+        "# Feature Engineering\n",
+        "def feature_engineering(applicants, companies):\n",
+        "    # Vectorize skills and majors\n",
+        "    tfidf_vectorizer_skills = TfidfVectorizer()\n",
+        "    tfidf_vectorizer_majors = TfidfVectorizer()\n",
+        "\n",
+        "    all_skills = pd.concat([applicants['final_hard_skill'], applicants['final_soft_skill'],\n",
+        "                            companies['final_hard_skill'], companies['final_soft_skill']])\n",
+        "    all_majors = pd.concat([applicants['candidate_field'], companies['Major']])\n",
+        "\n",
+        "    all_skills_vectorized = tfidf_vectorizer_skills.fit_transform(all_skills)\n",
+        "    all_majors_vectorized = tfidf_vectorizer_majors.fit_transform(all_majors)\n",
+        "\n",
+        "    num_applicants = len(applicants)\n",
+        "    num_companies = len(companies)\n",
+        "\n",
+        "    # Split the TF-IDF vectors back into applicants and companies\n",
+        "    applicants_skills_vectorized = all_skills_vectorized[:num_applicants*2]  # because each applicant has 2 skill entries\n",
+        "    companies_skills_vectorized = all_skills_vectorized[num_applicants*2:]\n",
+        "\n",
+        "    applicants_majors_vectorized = all_majors_vectorized[:num_applicants]\n",
+        "    companies_majors_vectorized = all_majors_vectorized[num_applicants:]\n",
+        "\n",
+        "    return (applicants_skills_vectorized, applicants_majors_vectorized,\n",
+        "            companies_skills_vectorized, companies_majors_vectorized, tfidf_vectorizer_skills, tfidf_vectorizer_majors)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 33,
+      "metadata": {
+        "id": "THM0mszQGNyD"
+      },
+      "outputs": [],
+      "source": [
+        "def compute_similarity(applicants_skills_vectorized, applicants_majors_vectorized,\n",
+        "                       companies_skills_vectorized, companies_majors_vectorized):\n",
+        "    # Calculate similarity based on skills (averaging hard and soft skills similarities)\n",
+        "    applicants_skills = (applicants_skills_vectorized[0::2] + applicants_skills_vectorized[1::2]) / 2\n",
+        "    companies_skills = (companies_skills_vectorized[0::2] + companies_skills_vectorized[1::2]) / 2\n",
+        "\n",
+        "    skills_similarity = cosine_similarity(applicants_skills, companies_skills)\n",
+        "\n",
+        "    # Calculate similarity based on majors\n",
+        "    majors_similarity = cosine_similarity(applicants_majors_vectorized, companies_majors_vectorized)\n",
+        "\n",
+        "    # Ensure the number of companies in both similarities is aligned\n",
+        "    if skills_similarity.shape[1] != majors_similarity.shape[1]:\n",
+        "        min_dim = min(skills_similarity.shape[1], majors_similarity.shape[1])\n",
+        "        skills_similarity = skills_similarity[:, :min_dim]\n",
+        "        majors_similarity = majors_similarity[:, :min_dim]\n",
+        "\n",
+        "    # Combine these similarities (simple average for this example)\n",
+        "    combined_similarity = (skills_similarity + majors_similarity) / 2\n",
+        "    return combined_similarity"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 34,
+      "metadata": {
+        "id": "ter3YAzxoelD"
+      },
+      "outputs": [],
+      "source": [
+        "# Recommendation Function\n",
+        "def recommend_jobs(applicants, companies, similarity_scores):\n",
+        "    recommendations = {}\n",
+        "    for i, applicant in enumerate(applicants['User ID']):\n",
+        "        if i < len(similarity_scores):\n",
+        "            sorted_company_indices = np.argsort(-similarity_scores[i])  # Descending sort of scores\n",
+        "            recommended_companies = companies.iloc[sorted_company_indices]['Major'].values[:3]  # Top 3 recommendations\n",
+        "            recommendations[applicant] = recommended_companies\n",
+        "    return recommendations\n",
+        "\n",
+        "# Testing and Evaluation Function\n",
+        "def print_recommendations(applicants, companies, recommendations):\n",
+        "    # This is a mock function since we don't have ground truth to compare to.\n",
+        "    # In a real scenario, we would compare against actual matches or use some form of feedback.\n",
+        "    print(\"Recommendations for each applicant:\")\n",
+        "    for applicant in recommendations:\n",
+        "        print(f\"{applicant}: {recommendations[applicant]}\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "collapsed": true,
+        "id": "Ajxp0xelIrl2",
+        "outputId": "08bafc5b-73cc-4695-924a-931840047dd5"
+      },
+      "outputs": [],
+      "source": [
+        "# Let's create and process the data, and compute recommendations\n",
+        "# train_applicants, test_applicants, companies = create_mock_data()\n",
+        "applicants_skills_vec, applicants_majors_vec, companies_skills_vec, companies_majors_vec, tfidf_vectorizer_skills, tfidf_vectorizer_majors = feature_engineering(train_user, jobs)\n",
+        "\n",
+        "similarity_scores = compute_similarity(applicants_skills_vec, applicants_majors_vec, companies_skills_vec, companies_majors_vec)\n",
+        "recommendations = recommend_jobs(test_user, jobs, similarity_scores)\n",
+        "\n",
+        "# Output the recommendations to observe the results\n",
+        "print_recommendations(test_user, jobs, recommendations)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 36,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "nj-HEdyJlYNY",
+        "outputId": "063b84bc-5717-4a0c-8367-939a054657bc"
+      },
+      "outputs": [],
+      "source": [
+        "# Process input skills and recommend jobs\n",
+        "def recommend_jobs_for_input_skills(input_hard_skills, input_soft_skills, input_major, jobs, tfidf_vectorizer_skills, tfidf_vectorizer_majors, companies_skills_vec, companies_majors_vec):\n",
+        "    input_hard_skills_vec = tfidf_vectorizer_skills.transform([input_hard_skills])\n",
+        "    input_soft_skills_vec = tfidf_vectorizer_skills.transform([input_soft_skills])\n",
+        "    input_major_vec = tfidf_vectorizer_majors.transform([input_major])\n",
+        "\n",
+        "    # Average the vectorized hard and soft skills\n",
+        "    input_skills_vec = (input_hard_skills_vec + input_soft_skills_vec) / 2\n",
+        "\n",
+        "    # Compute similarities\n",
+        "    skills_similarity = cosine_similarity(input_skills_vec, companies_skills_vec)\n",
+        "    major_similarity = cosine_similarity(input_major_vec, companies_majors_vec)\n",
+        "\n",
+        "    # Ensure the number of companies in both similarities is aligned\n",
+        "    if skills_similarity.shape[1] != major_similarity.shape[1]:\n",
+        "        min_dim = min(skills_similarity.shape[1], major_similarity.shape[1])\n",
+        "        skills_similarity = skills_similarity[:, :min_dim]\n",
+        "        major_similarity = major_similarity[:, :min_dim]\n",
+        "\n",
+        "    # Combine similarities\n",
+        "    combined_similarity = (skills_similarity + major_similarity) / 2\n",
+        "\n",
+        "    # Get top 3 job recommendations\n",
+        "    sorted_company_indices = np.argsort(-combined_similarity[0])\n",
+        "    recommended_companies = jobs.iloc[sorted_company_indices]['Major'].values[:3]\n",
+        "\n",
+        "    return recommended_companies"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "IMTilMnQINZC"
+      },
+      "source": [
+        "TEST RECOMMENDED SYSTEM"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 37,
+      "metadata": {},
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Recommended Jobs based on input skills and major:\n",
+            "['it jobs' 'sales' 'administration & office support']\n"
+          ]
+        }
+      ],
+      "source": [
+        "input_hard_skills = \"Java, Excel, Python\"\n",
+        "input_soft_skills = \"Communication, Teamwork\"\n",
+        "input_major = \"Economy\"\n",
+        "\n",
+        "recommended_jobs = recommend_jobs_for_input_skills(input_hard_skills, input_soft_skills, input_major, jobs, tfidf_vectorizer_skills, tfidf_vectorizer_majors, companies_skills_vec, companies_majors_vec)\n",
+        "print(\"Recommended Jobs based on input skills and major:\")\n",
+        "print(recommended_jobs)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "kShd99z_NiTa"
+      },
+      "source": [
+        "Evaluating (PENDING)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 38,
+      "metadata": {
+        "id": "WfEgjqw9JE3l"
+      },
+      "outputs": [],
+      "source": [
+        "def create_ground_truth(csv_file_path):\n",
+        "    data = pd.read_csv(csv_file_path)\n",
+        "\n",
+        "    # Tạo dictionary `ground_truth`\n",
+        "    ground_truth = {}\n",
+        "    for index, row in data.iterrows():\n",
+        "        user_id = row['User ID']\n",
+        "        actual_major = row['candidate_field']\n",
+        "\n",
+        "        # Thêm vào dictionary, giả sử mỗi ứng viên chỉ chọn một công việc\n",
+        "        ground_truth[user_id] = [actual_major]\n",
+        "\n",
+        "    return ground_truth\n",
+        "\n",
+        "# Sử dụng hàm trên để tạo `ground_truth`\n",
+        "csv_file_path = '1st_test.csv'\n",
+        "ground_truth = create_ground_truth(csv_file_path)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 1000
+        },
+        "collapsed": true,
+        "id": "TRiD4oS-AKFE",
+        "outputId": "256fadeb-b250-4602-affb-005cb9c658eb"
+      },
+      "outputs": [],
+      "source": [
+        "display(ground_truth)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 40,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "pXsa_wbANjmb",
+        "outputId": "9bd4fc1e-781b-439c-fe35-c28769f6714c"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Average Precision@3 with 18979 trains and 4745 tests: 0.12764313312258516\n"
+          ]
+        }
+      ],
+      "source": [
+        "def precision_at_k(recommendations, ground_truth, k=3):\n",
+        "    \"\"\"\n",
+        "    Calculate the precision at k for recommendation system.\n",
+        "\n",
+        "    Parameters:\n",
+        "    - recommendations (dict): Dictionary where keys are user IDs and values are lists of recommended majors.\n",
+        "    - ground_truth (dict): Dictionary where keys are user IDs and values are lists of truly suitable majors.\n",
+        "    - k (int): The number of top recommendations to consider for calculating precision.\n",
+        "\n",
+        "    Returns:\n",
+        "    - float: The average precision at k for all users.\n",
+        "    \"\"\"\n",
+        "    precision_scores = []\n",
+        "\n",
+        "    for applicant, recommended_major in recommendations.items():\n",
+        "        if applicant in ground_truth:\n",
+        "            # Get top k recommendations\n",
+        "            top_k_recs = recommended_major[:k]\n",
+        "            # Calculate the number of relevant recommendations\n",
+        "            relevant_recs = sum(1 for major in top_k_recs if major in ground_truth[applicant])\n",
+        "            # Precision at k for this user\n",
+        "            precision = relevant_recs / k\n",
+        "            precision_scores.append(precision)\n",
+        "\n",
+        "    # Average precision at k over all users\n",
+        "    average_precision = np.mean(precision_scores) if precision_scores else 0\n",
+        "    return average_precision\n",
+        "\n",
+        "avg_precision = precision_at_k(recommendations, ground_truth)\n",
+        "print(\"Average Precision@3 with 18979 trains and 4745 tests:\", avg_precision)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 41,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "KAIvtKEaRQml",
+        "outputId": "7dd82dc6-0e1b-43d5-bc95-cb457cde5d72"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Average Recall@3 with 18979 trains and 4745 tests: 0.38292939936775555\n"
+          ]
+        }
+      ],
+      "source": [
+        "def recall_at_k(recommendations, ground_truth, k=3):\n",
+        "    recall_scores = []\n",
+        "\n",
+        "    for user_id, recommended_majors in recommendations.items():\n",
+        "        if user_id in ground_truth:\n",
+        "            # Get top k recommendations\n",
+        "            top_k_recs = recommended_majors[:k]\n",
+        "            # Calculate the number of relevant recommendations\n",
+        "            relevant_recs = sum(1 for major in top_k_recs if major in ground_truth[user_id])\n",
+        "            # Calculate the total number of relevant items\n",
+        "            total_relevant = len(ground_truth[user_id])\n",
+        "            # Recall at k for this user\n",
+        "            recall = relevant_recs / total_relevant if total_relevant else 0\n",
+        "            recall_scores.append(recall)\n",
+        "\n",
+        "    # Average recall at k over all users\n",
+        "    average_recall = sum(recall_scores) / len(recall_scores) if recall_scores else 0\n",
+        "    return average_recall\n",
+        "\n",
+        "# Example usage:\n",
+        "avg_recall = recall_at_k(recommendations, ground_truth)\n",
+        "print(\"Average Recall@3 with 18979 trains and 4745 tests:\", avg_recall)\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 42,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "QUHBsQS_-5Eu",
+        "outputId": "fdab3075-dab8-458e-e663-2564b20da97c"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Average F1 Score@3: 0.19146469968387775\n"
+          ]
+        }
+      ],
+      "source": [
+        "def f1_score_at_k(recommendations, ground_truth, k=3):\n",
+        "    precision = precision_at_k(recommendations, ground_truth, k)\n",
+        "    recall = recall_at_k(recommendations, ground_truth, k)\n",
+        "\n",
+        "    if precision + recall == 0:\n",
+        "        return 0\n",
+        "\n",
+        "    f1_score = 2 * (precision * recall) / (precision + recall)\n",
+        "    return f1_score\n",
+        "\n",
+        "avg_f1_score = f1_score_at_k(recommendations, ground_truth)\n",
+        "\n",
+        "print(\"Average F1 Score@3:\", avg_f1_score)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 43,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import numpy as np\n",
+        "import pandas as pd\n",
+        "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+        "from sklearn.metrics.pairwise import cosine_similarity\n",
+        "from sklearn.pipeline import Pipeline\n",
+        "from sklearn.base import BaseEstimator, TransformerMixin"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 44,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "class FeatureEngineeringTransformer(BaseEstimator, TransformerMixin):\n",
+        "    def __init__(self):\n",
+        "        self.tfidf_vectorizer_skills = TfidfVectorizer()\n",
+        "        self.tfidf_vectorizer_majors = TfidfVectorizer()\n",
+        "\n",
+        "    def fit(self, X, y=None):\n",
+        "        all_skills = pd.concat([X['final_hard_skill'], X['final_soft_skill']])\n",
+        "        all_majors = X['candidate_field']\n",
+        "        \n",
+        "        self.tfidf_vectorizer_skills.fit(all_skills)\n",
+        "        self.tfidf_vectorizer_majors.fit(all_majors)\n",
+        "        return self\n",
+        "    \n",
+        "    def transform(self, X):\n",
+        "        all_skills = pd.concat([X['final_hard_skill'], X['final_soft_skill']])\n",
+        "        all_majors = X['candidate_field']\n",
+        "        \n",
+        "        applicants_skills_vec = self.tfidf_vectorizer_skills.transform(all_skills)\n",
+        "        applicants_majors_vec = self.tfidf_vectorizer_majors.transform(all_majors)\n",
+        "        \n",
+        "        return applicants_skills_vec, applicants_majors_vec"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 45,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "class JobRecommender(BaseEstimator, TransformerMixin):\n",
+        "    def __init__(self, jobs, tfidf_vectorizer_skills, tfidf_vectorizer_majors, companies_skills_vec, companies_majors_vec):\n",
+        "        self.jobs = jobs\n",
+        "        self.tfidf_vectorizer_skills = tfidf_vectorizer_skills\n",
+        "        self.tfidf_vectorizer_majors = tfidf_vectorizer_majors\n",
+        "        self.companies_skills_vec = companies_skills_vec\n",
+        "        self.companies_majors_vec = companies_majors_vec\n",
+        "\n",
+        "    def fit(self, X, y=None):\n",
+        "        return self\n",
+        "\n",
+        "    def transform(self, X):\n",
+        "        input_hard_skills_vec = self.tfidf_vectorizer_skills.transform(X['final_hard_skill'])\n",
+        "        input_soft_skills_vec = self.tfidf_vectorizer_skills.transform(X['final_soft_skill'])\n",
+        "        input_major_vec = self.tfidf_vectorizer_majors.transform(X['candidate_field'])\n",
+        "\n",
+        "        input_skills_vec = (input_hard_skills_vec + input_soft_skills_vec) / 2\n",
+        "\n",
+        "        skills_similarity = cosine_similarity(input_skills_vec, self.companies_skills_vec)\n",
+        "        major_similarity = cosine_similarity(input_major_vec, self.companies_majors_vec)\n",
+        "\n",
+        "        if skills_similarity.shape[1] != major_similarity.shape[1]:\n",
+        "            min_dim = min(skills_similarity.shape[1], major_similarity.shape[1])\n",
+        "            skills_similarity = skills_similarity[:, :min_dim]\n",
+        "            major_similarity = major_similarity[:, :min_dim]\n",
+        "\n",
+        "        combined_similarity = (skills_similarity + major_similarity) / 2\n",
+        "\n",
+        "        recommendations = []\n",
+        "        for i in range(combined_similarity.shape[0]):\n",
+        "            sorted_company_indices = np.argsort(-combined_similarity[i])\n",
+        "            recommended_companies = self.jobs.iloc[sorted_company_indices]['Major'].values[:3]\n",
+        "            recommendations.append(recommended_companies)\n",
+        "\n",
+        "        return recommendations"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 46,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "def create_recommendation_pipeline():\n",
+        "    # Instantiate the feature engineering transformer\n",
+        "    feature_engineering = FeatureEngineeringTransformer()\n",
+        "\n",
+        "    # Define the recommendation function as a callable estimator\n",
+        "    def recommend_jobs_function(X, y=None):\n",
+        "        applicants_skills_vec, applicants_majors_vec = feature_engineering.fit_transform(X)\n",
+        "        companies_skills_vec, companies_majors_vec = feature_engineering.tfidf_vectorizer_skills.transform(jobs['final_hard_skill']), feature_engineering.tfidf_vectorizer_majors.transform(jobs['Major'])\n",
+        "        \n",
+        "        return recommend_jobs_for_input_skills(X['final_hard_skill'], X['final_soft_skill'], X['candidate_field'], jobs, feature_engineering.tfidf_vectorizer_skills, feature_engineering.tfidf_vectorizer_majors, companies_skills_vec, companies_majors_vec)\n",
+        "\n",
+        "    pipeline = Pipeline([\n",
+        "        ('feature_engineering', feature_engineering),\n",
+        "        ('recommendation', recommend_jobs_function)\n",
+        "    ])\n",
+        "    \n",
+        "    return pipeline\n",
+        "recommendation_pipeline = create_recommendation_pipeline()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 47,
+      "metadata": {},
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Model components saved successfully!\n"
+          ]
+        }
+      ],
+      "source": [
+        "import pickle\n",
+        "def create_recommendation_pipeline(jobs):\n",
+        "    feature_engineering = FeatureEngineeringTransformer()\n",
+        "\n",
+        "    # Fit feature engineering transformer to get the vectorizers and company vectors\n",
+        "    applicants_skills_vec, applicants_majors_vec = feature_engineering.fit_transform(train_user)\n",
+        "    companies_skills_vec = feature_engineering.tfidf_vectorizer_skills.transform(jobs['final_hard_skill'])\n",
+        "    companies_majors_vec = feature_engineering.tfidf_vectorizer_majors.transform(jobs['Major'])\n",
+        "\n",
+        "    recommender = JobRecommender(jobs, feature_engineering.tfidf_vectorizer_skills, feature_engineering.tfidf_vectorizer_majors, companies_skills_vec, companies_majors_vec)\n",
+        "\n",
+        "    pipeline = Pipeline([\n",
+        "        ('feature_engineering', feature_engineering),\n",
+        "        ('recommendation', recommender)\n",
+        "    ])\n",
+        "    \n",
+        "    return pipeline\n",
+        "\n",
+        "# Create the pipeline\n",
+        "recommendation_pipeline = create_recommendation_pipeline(jobs)\n",
+        "\n",
+        "# Save the pipeline using pickle\n",
+        "model_path = \"recommendation_pipeline.pkl\"\n",
+        "with open(model_path, mode=\"bw\") as f:\n",
+        "    pickle.dump(recommendation_pipeline, f)\n",
+        "print(\"Model components saved successfully!\")\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 48,
+      "metadata": {},
+      "outputs": [
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "1c9a071d0a244c4a8e8fe7403a96295c",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        }
+      ],
+      "source": [
+        "from huggingface_hub import notebook_login\n",
+        "notebook_login()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 50,
+      "metadata": {},
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "c:\\Program Files\\Python311\\Lib\\site-packages\\skops\\hub_utils\\_hf_hub.py:577: FutureWarning: Creating repos on hf.co is subject to strict rate limits now and therefore this feature is to be removed from this library in version 0.10. You can use tools directly available in the huggingface_hub library instead to create and push files.\n",
+            "  warnings.warn(\n"
+          ]
+        },
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "2d4d813e6bf0451c9dbef4b9ba67b808",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "recommendation_pipeline.pkl:   0%|          | 0.00/163k [00:00<?, ?B/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        }
+      ],
+      "source": [
+        "import shutil\n",
+        "import os\n",
+        "from skops import card, hub_utils\n",
+        "from pathlib import Path\n",
+        "\n",
+        "model_path = \"recommendation_pipeline.pkl\"\n",
+        "local_repo = \"job-recommendation-model\"\n",
+        "# Clear the existing directory if it exists\n",
+        "if os.path.exists(local_repo):\n",
+        "    shutil.rmtree(local_repo)\n",
+        "\n",
+        "sample_data = pd.DataFrame({\n",
+        "    'final_hard_skill': [\"Python, Java, Finance, Excel\"],\n",
+        "    'final_soft_skill': [\"Communication, Teamwork\"],\n",
+        "    'candidate_field': [\"\"]\n",
+        "})\n",
+        "\n",
+        "# Initialize the local repository\n",
+        "hub_utils.init(\n",
+        "    model=model_path,\n",
+        "    requirements=[\"scikit-learn\", \"pandas\", \"numpy\"],\n",
+        "    dst=local_repo,\n",
+        "    task=\"tabular-classification\",\n",
+        "    data=sample_data,\n",
+        ")\n",
+        "\n",
+        "# # Create model card metadata manually\n",
+        "# metadata = {\n",
+        "#     \"model_type\": \"Custom Recommendation Model\",\n",
+        "#     \"model_description\": \"This is a recommendation model for job applicants based on their skills and majors.\",\n",
+        "#     \"author\": \"trangannh\",\n",
+        "#     \"license\": \"mit\",\n",
+        "#     \"citation\": \"\"\"\n",
+        "# @misc{example2024recommendation,\n",
+        "#     author = {trangannh},\n",
+        "#     title = {Job Recommendation Model},\n",
+        "#     year = {2024},\n",
+        "#     howpublished = {\\\\url{https://huggingface.co/job-recommendation-model}},\n",
+        "# }\n",
+        "# \"\"\",\n",
+        "#     \"limitations\": \"This model is not ready to be used in production.\",\n",
+        "# }\n",
+        "\n",
+        "# # Create and save the model card\n",
+        "# model_card = card.Card(model=model_path, metadata=metadata)\n",
+        "\n",
+        "# # Add the get started code\n",
+        "# get_started_code = \"\"\"\n",
+        "# import pickle\n",
+        "# import pandas as pd\n",
+        "\n",
+        "# with open('recommendation_model.pkl', 'rb') as file:\n",
+        "#     tfidf_vectorizer_skills, tfidf_vectorizer_majors, companies_skills_vec, companies_majors_vec = pickle.load(file)\n",
+        "\n",
+        "# input_hard_skills = \"Python, Java, Finance, Excel\"\n",
+        "# input_soft_skills = \"Communication, Teamwork\"\n",
+        "# input_major = \"\"\n",
+        "# jobs_data = pd.read_csv(\"/content/sample_data/jobs_data.csv\")\n",
+        "\n",
+        "# recommended_jobs = recommend_jobs_for_input_skills(input_hard_skills, input_soft_skills, input_major, jobs_data, 'recommendation_model.pkl')\n",
+        "# print(\"Recommended Jobs based on input skills and major:\")\n",
+        "# print(recommended_jobs)\n",
+        "# \"\"\"\n",
+        "\n",
+        "# model_card.add(\n",
+        "#     get_started_code=get_started_code,\n",
+        "#     model_card_authors=\"trangannh\",\n",
+        "#     model_description=\"This is a recommendation model for job applicants based on their skills and majors.\",\n",
+        "#     limitations=\"This model is not ready to be used in production.\"\n",
+        "# )\n",
+        "\n",
+        "# # Save the model card\n",
+        "# model_card.save(Path(local_repo) / \"README.md\")\n",
+        "\n",
+        "# Push the repository to Hugging Face Hub\n",
+        "repo_id = \"trangannh/job-recommendation-model\"\n",
+        "token = \"\"\n",
+        "\n",
+        "hub_utils.push(\n",
+        "    repo_id=repo_id,\n",
+        "    source=local_repo,\n",
+        "    token=token,\n",
+        "    commit_message=\"Initial commit of the job recommendation model\",\n",
+        "    create_remote=True,\n",
+        ")\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 52,
+      "metadata": {},
+      "outputs": [
+        {
+          "ename": "AttributeError",
+          "evalue": "'list' object has no attribute 'lower'",
+          "output_type": "error",
+          "traceback": [
+            "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+            "\u001b[1;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
+            "Cell \u001b[1;32mIn[52], line 12\u001b[0m\n\u001b[0;32m     10\u001b[0m input_soft_skills \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCommunication\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTeamwork\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m     11\u001b[0m input_major \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mData Science\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m---> 12\u001b[0m recommended_jobs \u001b[38;5;241m=\u001b[39m \u001b[43mrecommend_jobs_for_input_skills\u001b[49m\u001b[43m(\u001b[49m\u001b[43minput_hard_skills\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minput_soft_skills\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minput_major\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mjobs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtfidf_vectorizer_skills\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtfidf_vectorizer_majors\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcompanies_skills_vec\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcompanies_majors_vec\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m     13\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRecommended Jobs based on input skills and major:\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m     14\u001b[0m \u001b[38;5;28mprint\u001b[39m(recommended_jobs)\n",
+            "Cell \u001b[1;32mIn[36], line 3\u001b[0m, in \u001b[0;36mrecommend_jobs_for_input_skills\u001b[1;34m(input_hard_skills, input_soft_skills, input_major, jobs, tfidf_vectorizer_skills, tfidf_vectorizer_majors, companies_skills_vec, companies_majors_vec)\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mrecommend_jobs_for_input_skills\u001b[39m(input_hard_skills, input_soft_skills, input_major, jobs, tfidf_vectorizer_skills, tfidf_vectorizer_majors, companies_skills_vec, companies_majors_vec):\n\u001b[1;32m----> 3\u001b[0m     input_hard_skills_vec \u001b[38;5;241m=\u001b[39m \u001b[43mtfidf_vectorizer_skills\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtransform\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[43minput_hard_skills\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m      4\u001b[0m     input_soft_skills_vec \u001b[38;5;241m=\u001b[39m tfidf_vectorizer_skills\u001b[38;5;241m.\u001b[39mtransform([input_soft_skills])\n\u001b[0;32m      5\u001b[0m     input_major_vec \u001b[38;5;241m=\u001b[39m tfidf_vectorizer_majors\u001b[38;5;241m.\u001b[39mtransform([input_major])\n",
+            "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\sklearn\\feature_extraction\\text.py:2157\u001b[0m, in \u001b[0;36mTfidfVectorizer.transform\u001b[1;34m(self, raw_documents)\u001b[0m\n\u001b[0;32m   2140\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Transform documents to document-term matrix.\u001b[39;00m\n\u001b[0;32m   2141\u001b[0m \n\u001b[0;32m   2142\u001b[0m \u001b[38;5;124;03mUses the vocabulary and document frequencies (df) learned by fit (or\u001b[39;00m\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m   2153\u001b[0m \u001b[38;5;124;03m    Tf-idf-weighted document-term matrix.\u001b[39;00m\n\u001b[0;32m   2154\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m   2155\u001b[0m check_is_fitted(\u001b[38;5;28mself\u001b[39m, msg\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe TF-IDF vectorizer is not fitted\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m-> 2157\u001b[0m X \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtransform\u001b[49m\u001b[43m(\u001b[49m\u001b[43mraw_documents\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   2158\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_tfidf\u001b[38;5;241m.\u001b[39mtransform(X, copy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n",
+            "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\sklearn\\feature_extraction\\text.py:1433\u001b[0m, in \u001b[0;36mCountVectorizer.transform\u001b[1;34m(self, raw_documents)\u001b[0m\n\u001b[0;32m   1430\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_vocabulary()\n\u001b[0;32m   1432\u001b[0m \u001b[38;5;66;03m# use the same matrix-building strategy as fit_transform\u001b[39;00m\n\u001b[1;32m-> 1433\u001b[0m _, X \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_count_vocab\u001b[49m\u001b[43m(\u001b[49m\u001b[43mraw_documents\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfixed_vocab\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[0;32m   1434\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbinary:\n\u001b[0;32m   1435\u001b[0m     X\u001b[38;5;241m.\u001b[39mdata\u001b[38;5;241m.\u001b[39mfill(\u001b[38;5;241m1\u001b[39m)\n",
+            "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\sklearn\\feature_extraction\\text.py:1275\u001b[0m, in \u001b[0;36mCountVectorizer._count_vocab\u001b[1;34m(self, raw_documents, fixed_vocab)\u001b[0m\n\u001b[0;32m   1273\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m doc \u001b[38;5;129;01min\u001b[39;00m raw_documents:\n\u001b[0;32m   1274\u001b[0m     feature_counter \u001b[38;5;241m=\u001b[39m {}\n\u001b[1;32m-> 1275\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m feature \u001b[38;5;129;01min\u001b[39;00m \u001b[43manalyze\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdoc\u001b[49m\u001b[43m)\u001b[49m:\n\u001b[0;32m   1276\u001b[0m         \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m   1277\u001b[0m             feature_idx \u001b[38;5;241m=\u001b[39m vocabulary[feature]\n",
+            "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\sklearn\\feature_extraction\\text.py:111\u001b[0m, in \u001b[0;36m_analyze\u001b[1;34m(doc, analyzer, tokenizer, ngrams, preprocessor, decoder, stop_words)\u001b[0m\n\u001b[0;32m    109\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m    110\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m preprocessor \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m--> 111\u001b[0m         doc \u001b[38;5;241m=\u001b[39m \u001b[43mpreprocessor\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdoc\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    112\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m tokenizer \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m    113\u001b[0m         doc \u001b[38;5;241m=\u001b[39m tokenizer(doc)\n",
+            "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\sklearn\\feature_extraction\\text.py:69\u001b[0m, in \u001b[0;36m_preprocess\u001b[1;34m(doc, accent_function, lower)\u001b[0m\n\u001b[0;32m     50\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Chain together an optional series of text preprocessing steps to\u001b[39;00m\n\u001b[0;32m     51\u001b[0m \u001b[38;5;124;03mapply to a document.\u001b[39;00m\n\u001b[0;32m     52\u001b[0m \n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m     66\u001b[0m \u001b[38;5;124;03m    preprocessed string\u001b[39;00m\n\u001b[0;32m     67\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m     68\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m lower:\n\u001b[1;32m---> 69\u001b[0m     doc \u001b[38;5;241m=\u001b[39m \u001b[43mdoc\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlower\u001b[49m()\n\u001b[0;32m     70\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m accent_function \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m     71\u001b[0m     doc \u001b[38;5;241m=\u001b[39m accent_function(doc)\n",
+            "\u001b[1;31mAttributeError\u001b[0m: 'list' object has no attribute 'lower'"
+          ]
+        }
+      ],
+      "source": [
+        "import pickle\n",
+        "import pandas as pd\n",
+        "\n",
+        "# Load the model (pipeline)\n",
+        "with open('recommendation_pipeline.pkl', 'rb') as file:\n",
+        "    recommendation_pipeline = pickle.load(file)\n",
+        "\n",
+        "# Example input data\n",
+        "input_hard_skills = [\"Python\", \"Java\", \"Finance\", \"Excel\"]\n",
+        "input_soft_skills = [\"Communication\", \"Teamwork\"]\n",
+        "input_major = [\"Data Science\"]\n",
+        "recommended_jobs = recommend_jobs_for_input_skills(input_hard_skills, input_soft_skills, input_major, jobs, tfidf_vectorizer_skills, tfidf_vectorizer_majors, companies_skills_vec, companies_majors_vec)\n",
+        "print(\"Recommended Jobs based on input skills and major:\")\n",
+        "print(recommended_jobs)"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.11.2"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}