{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# ISCO-08 hierarchical accuracy measure" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "ISCO CSV file downloaded\n", "Weighted ISCO hierarchy dictionary created as isco_hierarchy\n", "\n", "The ISCO-08 Hierarchical Accuracy Measure is an implementation of the measure described in [Functional Annotation of Genes Using Hierarchical Text Categorization](https://www.researchgate.net/publication/44046343_Functional_Annotation_of_Genes_Using_Hierarchical_Text_Categorization) (Kiritchenko, Svetlana and Famili, Fazel. 2005) and adapted for the ISCO-08 classification scheme by the International Labour Organization.\n", "\n", "The measure rewards more precise classifications that correctly identify an occupation's placement down to the specific Unit group level and applies penalties for misclassifications based on the hierarchical distance between the correct and assigned categories.\n", "\n", "\n" ] } ], "source": [ "import evaluate\n", "\n", "ham = evaluate.load(\"/home/dux/workspace/1-IEA_RnD/isco_hierarchical_accuracy\")\n", "print(ham.description)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "References: ['1111', '1112', '1113', '1114', '1120']\n", "Predictions: ['1111', '1113', '1120', '1211', '2111']\n", "Accuracy: 0.2, Hierarchical Precision: 0.5, Hierarchical Recall: 0.7777777777777778, Hierarchical F-measure: 0.6086956521739131\n", "{'accuracy': 0.2, 'hierarchical_precision': 0.5, 'hierarchical_recall': 0.7777777777777778, 'hierarchical_fmeasure': 0.6086956521739131}\n" ] } ], "source": [ "references = [\"1111\", \"1112\", \"1113\", \"1114\", \"1120\"]\n", "predictions = [\"1111\", \"1113\", \"1120\", \"1211\", \"2111\"]\n", "\n", "print(f\"References: {references}\")\n", "print(f\"Predictions: {predictions}\")\n", "print(ham.compute(references=references, predictions=predictions))" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "TEST CASE #1\n", "References: ['1111', '1111', '1111', '1111', '1111', '1111', '1111', '1111', '1111', '1111']\n", "Predictions: ['1111', '1112', '1120', '1211', '1311', '2111', '111', '11', '1', '9999']\n", "Accuracy: 0.1, Hierarchical Precision: 0.2222222222222222, Hierarchical Recall: 1.0, Hierarchical F-measure: 0.3636363636363636\n", "{'accuracy': 0.1, 'hierarchical_precision': 0.2222222222222222, 'hierarchical_recall': 1.0, 'hierarchical_fmeasure': 0.3636363636363636}\n", "\n", "TEST CASE #2\n", "References: ['1111']\n", "Predictions: ['1111']\n", "Accuracy: 1.0, Hierarchical Precision: 1.0, Hierarchical Recall: 1.0, Hierarchical F-measure: 1.0\n", "{'accuracy': 1.0, 'hierarchical_precision': 1.0, 'hierarchical_recall': 1.0, 'hierarchical_fmeasure': 1.0}\n", "\n", "TEST CASE #3\n", "References: ['1111']\n", "Predictions: ['1112']\n", "Accuracy: 0.0, Hierarchical Precision: 0.75, Hierarchical Recall: 0.75, Hierarchical F-measure: 0.75\n", "{'accuracy': 0.0, 'hierarchical_precision': 0.75, 'hierarchical_recall': 0.75, 'hierarchical_fmeasure': 0.75}\n", "\n", "TEST CASE #4\n", "References: ['1111']\n", "Predictions: ['1120']\n", "Accuracy: 0.0, Hierarchical Precision: 0.5, Hierarchical Recall: 0.5, Hierarchical F-measure: 0.5\n", "{'accuracy': 0.0, 'hierarchical_precision': 0.5, 'hierarchical_recall': 0.5, 'hierarchical_fmeasure': 0.5}\n", "\n", "TEST CASE #5\n", "References: ['1111']\n", "Predictions: ['1211']\n", "Accuracy: 0.0, Hierarchical Precision: 0.25, Hierarchical Recall: 0.25, Hierarchical F-measure: 0.25\n", "{'accuracy': 0.0, 'hierarchical_precision': 0.25, 'hierarchical_recall': 0.25, 'hierarchical_fmeasure': 0.25}\n", "\n", "TEST CASE #6\n", "References: ['1111']\n", "Predictions: ['1311']\n", "Accuracy: 0.0, Hierarchical Precision: 0.25, Hierarchical Recall: 0.25, Hierarchical F-measure: 0.25\n", "{'accuracy': 0.0, 'hierarchical_precision': 0.25, 'hierarchical_recall': 0.25, 'hierarchical_fmeasure': 0.25}\n", "\n", "TEST CASE #7\n", "References: ['1111']\n", "Predictions: ['2111']\n", "Accuracy: 0.0, Hierarchical Precision: 0.0, Hierarchical Recall: 0.0, Hierarchical F-measure: 0\n", "{'accuracy': 0.0, 'hierarchical_precision': 0.0, 'hierarchical_recall': 0.0, 'hierarchical_fmeasure': 0}\n", "\n", "TEST CASE #8\n", "References: ['1111']\n", "Predictions: ['111']\n", "Accuracy: 0.0, Hierarchical Precision: 1.0, Hierarchical Recall: 0.25, Hierarchical F-measure: 0.4\n", "{'accuracy': 0.0, 'hierarchical_precision': 1.0, 'hierarchical_recall': 0.25, 'hierarchical_fmeasure': 0.4}\n", "\n", "TEST CASE #9\n", "References: ['1111']\n", "Predictions: ['11']\n", "Accuracy: 0.0, Hierarchical Precision: 1.0, Hierarchical Recall: 0.25, Hierarchical F-measure: 0.4\n", "{'accuracy': 0.0, 'hierarchical_precision': 1.0, 'hierarchical_recall': 0.25, 'hierarchical_fmeasure': 0.4}\n", "\n", "TEST CASE #10\n", "References: ['1111']\n", "Predictions: ['1']\n", "Accuracy: 0.0, Hierarchical Precision: 1.0, Hierarchical Recall: 0.25, Hierarchical F-measure: 0.4\n", "{'accuracy': 0.0, 'hierarchical_precision': 1.0, 'hierarchical_recall': 0.25, 'hierarchical_fmeasure': 0.4}\n", "\n", "TEST CASE #11\n", "References: ['1111']\n", "Predictions: ['9999']\n", "Accuracy: 0.0, Hierarchical Precision: 0.0, Hierarchical Recall: 0.0, Hierarchical F-measure: 0\n", "{'accuracy': 0.0, 'hierarchical_precision': 0.0, 'hierarchical_recall': 0.0, 'hierarchical_fmeasure': 0}\n", "\n" ] } ], "source": [ "# Compute all test cases and print the results\n", "from tests import test_cases\n", "\n", "test_number = 1\n", "\n", "for test_case in test_cases:\n", " references = test_case[\"references\"]\n", " predictions = test_case[\"predictions\"]\n", " print(f\"TEST CASE #{test_number}\")\n", " print(f\"References: {references}\")\n", " print(f\"Predictions: {predictions}\")\n", " print(ham.compute(references=references, predictions=predictions))\n", " print()\n", " test_number += 1" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Model evaluation using the test split of the dataset" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "from datasets import load_dataset\n", "from transformers import pipeline\n", "import evaluate\n", "import json\n", "\n", "# Ensure that the HF_TOKEN environment variable is set\n", "hf_token = os.getenv(\"HF_TOKEN\")\n", "if hf_token is None:\n", " raise ValueError(\"HF_TOKEN environment variable is not set.\")\n", "\n", "# Load the dataset\n", "test_data_subset = (\n", " load_dataset(\n", " \"ICILS/multilingual_parental_occupations\", split=\"test\", token=hf_token\n", " )\n", " .shuffle(seed=42)\n", " .select(range(100))\n", ")\n", "test_data = load_dataset(\n", " \"ICILS/multilingual_parental_occupations\", split=\"test\", token=hf_token\n", ")\n", "\n", "validation_data = load_dataset(\n", " \"ICILS/multilingual_parental_occupations\", split=\"validation\", token=hf_token\n", ")\n", "\n", "# Initialize the pipeline\n", "pipe = pipeline(\"text-classification\", model=\"ICILS/XLM-R-ISCO\", token=hf_token)\n", "\n", "# Define the mapping from ISCO_CODE_TITLE to ISCO codes\n", "def extract_isco_code(isco_code_title: str):\n", " # ISCO_CODE_TITLE is a string like \"7412 Electrical Mechanics and Fitters\" so we need to extract the first part for the evaluation.\n", " return isco_code_title.split()[0]\n", "\n", "# Initialize the hierarchical accuracy measure\n", "hierarchical_accuracy = evaluate.load(\"danieldux/isco_hierarchical_accuracy\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Test set" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.8611914401388086, Hierarchical Precision: 0.989010989010989, Hierarchical Recall: 0.9836065573770492, Hierarchical F-measure: 0.9863013698630136\n", "Evaluation results saved to isco_test_results.json\n" ] } ], "source": [ "# Evaluate the model\n", "predictions = []\n", "references = []\n", "for example in test_data:\n", "\n", " # Predict\n", " prediction = pipe(\n", " example[\"JOB_DUTIES\"]\n", " ) # Use the key \"JOB_DUTIES\" for the text data\n", " predicted_label = extract_isco_code(prediction[0][\"label\"])\n", " predictions.append(predicted_label)\n", "\n", " # Reference\n", " reference_label = example[\"ISCO\"] # Use the key \"ISCO\" for the ISCO code\n", " references.append(reference_label)\n", "\n", "# Compute the hierarchical accuracy\n", "test_results = hierarchical_accuracy.compute(predictions=predictions, references=references)\n", "\n", "# Save the results to a JSON file\n", "with open(\"isco_test_results.json\", \"w\") as f:\n", " json.dump(test_results, f)\n", "\n", "print(\"Evaluation results saved to isco_test_results.json\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Validation set" ] }, { "cell_type": "code", "execution_count": 78, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.8576800694243564, Hierarchical Precision: 0.9757462686567164, Hierarchical Recall: 0.9812382739212008, Hierarchical F-measure: 0.9784845650140319\n", "Evaluation results saved to isco_validation_results.json\n" ] } ], "source": [ "# Evaluate the model\n", "predictions = []\n", "references = []\n", "for example in validation_data:\n", "\n", " # Predict\n", " prediction = pipe(\n", " example[\"JOB_DUTIES\"]\n", " ) # Use the key \"JOB_DUTIES\" for the text data\n", " predicted_label = extract_isco_code(prediction[0][\"label\"])\n", " predictions.append(predicted_label)\n", "\n", " # Reference\n", " reference_label = example[\"ISCO\"] # Use the key \"ISCO\" for the ISCO code\n", " references.append(reference_label)\n", "\n", "# Compute the hierarchical accuracy\n", "validation_results = hierarchical_accuracy.compute(predictions=predictions, references=references)\n", "\n", "# Save the results to a JSON file\n", "with open(\"isco_validation_results.json\", \"w\") as f:\n", " json.dump(validation_results, f)\n", "\n", "print(\"Evaluation results saved to isco_validation_results.json\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Inter rater agreement" ] }, { "cell_type": "code", "execution_count": 70, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "\n", "# icils_isco_int_ml = \"/datasets/isco-data/processed/2018/icils_2018_isco_ml.parquet\"\n", "icils_isco_int_ml = \"gs://isco-data-asia-southeast1/processed/2018/icils_2018_isco_ml.parquet\"\n", "\n", "icils_df = pd.read_parquet(icils_isco_int_ml)[['JOB', 'DUTIES', 'ISCO', 'ISCO_REL', 'LANGUAGE']]\n", "\n", "# Create a new pandas dataframe with samples that have ISCO_REL values\n", "isco_rel_df = icils_df[icils_df['ISCO'].notna()].copy()\n", "\n", "# remove rows with None values in ISCO_REL\n", "isco_rel_df = isco_rel_df[isco_rel_df['ISCO_REL'].notna()]\n", "\n", "# Group the DataFrame by LANGUAGE column\n", "grouped_df = isco_rel_df.groupby('LANGUAGE')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "results_df = pd.DataFrame(columns=['Language', 'Accuracy', 'Hierarchical Precision', 'Hierarchical Recall', 'Hierarchical F1'])\n", "\n", "# Iterate over each group\n", "for language, group in grouped_df:\n", " references = group['ISCO'].tolist()\n", " predictions = group['ISCO_REL'].tolist()\n", " \n", " # Apply the compute function\n", " rel_result = hierarchical_accuracy.compute(references=references, predictions=predictions)\n", " \n", " # Create a new DataFrame with the result for the current group\n", " group_result_df = pd.DataFrame({'Language': [language], 'Accuracy': [rel_result['accuracy']], 'Hierarchical Precision': [rel_result['hierarchical_precision']], 'Hierarchical Recall': [rel_result['hierarchical_recall']], 'Hierarchical F1': [rel_result['hierarchical_fmeasure']]})\n", " \n", " # Concatenate the group_result_df with the results_df\n", " results_df = pd.concat([results_df, group_result_df], ignore_index=True)\n", " \n", " # Print the result\n", " print(f\"Language: {language}\")\n", " # print(f\"References: {references}\")\n", " # print(f\"Predictions: {predictions}\")\n", " print(f\"Result: {rel_result}\")\n", " print()\n", "\n", "average_accuracy = results_df['Accuracy'].mean()\n", "average_hierarchical_precision = results_df['Hierarchical Precision'].mean()\n", "average_hierarchical_recall = results_df['Hierarchical Recall'].mean()\n", "average_hierarchical_f1 = results_df['Hierarchical F1'].mean()\n", "\n", "average_row = ['Average', average_accuracy, average_hierarchical_precision, average_hierarchical_recall, average_hierarchical_f1]\n", "results_df.loc[len(results_df)] = average_row\n", "\n", "\n", "results_df.to_csv('language_results.csv', index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# create a dataframe with samples where ISCO and ISCO_REL the same\n", "isco_rel_df_same = isco_rel_df[isco_rel_df['ISCO'] == isco_rel_df['ISCO_REL']]\n", "\n", "isco_rel_df_same" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# create a dataframe with samples where ISCO and ISCO_REL are different\n", "isco_rel_df_diff = isco_rel_df[isco_rel_df['ISCO'] != isco_rel_df['ISCO_REL']]\n", "\n", "isco_rel_df_diff" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [], "source": [ "# Make a list of all values in ISCO and ISCO_REL columns\n", "coder1 = list(isco_rel_df['ISCO'])\n", "coder2 = list(isco_rel_df['ISCO_REL'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Compute the hierarchical accuracy\n", "reliability_results = hierarchical_accuracy.compute(predictions=coder2, references=coder1)\n", "\n", "# Save the results to a JSON file\n", "with open(\"isco_rel_results.json\", \"w\") as f:\n", " json.dump(reliability_results, f)\n", "\n", "print(\"Evaluation results saved to isco_rel_results.json\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Giskard model testing" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from scipy.special import softmax\n", "from datasets import load_dataset\n", "from transformers import AutoModelForSequenceClassification, AutoTokenizer\n", "\n", "from giskard import Dataset, Model, scan, testing, GiskardClient, Suite" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | IDSTUD | \n", "JOB_DUTIES | \n", "ISCO | \n", "ISCO_REL | \n", "ISCO_TITLE | \n", "ISCO_CODE_TITLE | \n", "COUNTRY | \n", "LANGUAGE | \n", "
---|---|---|---|---|---|---|---|---|
0 | \n", "10670109 | \n", "forældre 1: Han arbejder som med-chef sammen... | \n", "7412 | \n", "None | \n", "Electrical Mechanics and Fitters | \n", "7412 Electrical Mechanics and Fitters | \n", "DNK | \n", "da | \n", "
1 | \n", "10130106 | \n", "asistente de parbulo y basica. ayudaba en la e... | \n", "5312 | \n", "5312 | \n", "Teachers' Aides | \n", "5312 Teachers' Aides | \n", "CHL | \n", "es | \n", "
2 | \n", "10740120 | \n", "trabajaba en el campo como capatas. aveces cui... | \n", "6121 | \n", "None | \n", "Livestock and Dairy Producers | \n", "6121 Livestock and Dairy Producers | \n", "URY | \n", "es | \n", "
3 | \n", "10170109 | \n", "gas abastible. vende gas abastible | \n", "9621 | \n", "5243 | \n", "Messengers, Package Deliverers and Luggage Por... | \n", "9621 Messengers, Package Deliverers and Luggag... | \n", "CHL | \n", "es | \n", "
4 | \n", "11480109 | \n", "jordbruk. sår potatis tar upp potatis plogar h... | \n", "6111 | \n", "6111 | \n", "Field Crop and Vegetable Growers | \n", "6111 Field Crop and Vegetable Growers | \n", "FIN | \n", "sv | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
495 | \n", "11780107 | \n", "acountent mannager|she mannages calls for jobs... | \n", "1211 | \n", "9998 | \n", "Finance Managers | \n", "1211 Finance Managers | \n", "AUS | \n", "en | \n", "
496 | \n", "10850104 | \n", "geometra/muratore. proggetta case e le restaura | \n", "3112 | \n", "3112 | \n", "Civil Engineering Technicians | \n", "3112 Civil Engineering Technicians | \n", "ITA | \n", "it | \n", "
497 | \n", "11460111 | \n", "fa parte della misericordia. Trasporta i malat... | \n", "3258 | \n", "3258 | \n", "Ambulance Workers | \n", "3258 Ambulance Workers | \n", "ITA | \n", "it | \n", "
498 | \n", "10340111 | \n", "사회복지사. 회사에서 복지원 관리 | \n", "2635 | \n", "2635 | \n", "Social Work and Counselling Professionals | \n", "2635 Social Work and Counselling Professionals | \n", "KOR | \n", "ko | \n", "
499 | \n", "10370105 | \n", "자영업. 가게를 운영하신다. | \n", "5221 | \n", "None | \n", "Shopkeepers | \n", "5221 Shopkeepers | \n", "KOR | \n", "ko | \n", "
500 rows × 8 columns
\n", "