{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "\n", "dataset_path= \"/home/investigacion/disco4TB/workspace_pablo/firvox_whisper_research/finetunnig/dataset/dataset_lang_identification\"\n", "\n", "audios_paths= []\n", "lang_id= []\n", "\n", "\n", "for lang in os.listdir(dataset_path):\n", " audio_folder= os.listdir(os.path.join(dataset_path, lang,\"clips\"))[::-1] \n", " n=0\n", " for i, audio_path in enumerate(audio_folder):\n", " if n<100:\n", " if i % (5 + 1) == 0:\n", " \n", " lang_id.append(lang)\n", " full_audio_path= os.path.join(dataset_path, lang, \"clips\", audio_path)\n", " audios_paths.append(full_audio_path)\n", " n+=1\n", "\n", "df= pd.DataFrame({\n", " \"audio_path\": audios_paths,\n", " \"language\": lang_id\n", "})" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import torch\n", "from transformers import AutoFeatureExtractor, WhisperForAudioClassification\n", "from datasets import load_dataset\n", "import librosa\n", "\n", "def get_language(audio_path):\n", " feature_extractor = AutoFeatureExtractor.from_pretrained(\"/home/investigacion/disco4TB/workspace_pablo/firvox_whisper_research/whisper-medium-fleurs-lang-id/lang_identification_models_noFirVox_audios\")\n", " model = WhisperForAudioClassification.from_pretrained(\"/home/investigacion/disco4TB/workspace_pablo/firvox_whisper_research/whisper-medium-fleurs-lang-id/lang_identification_models_noFirVox_audios\").to(\"cuda\")\n", "\n", " audio, sr= librosa.load(audio_path, sr=16000)\n", " inputs = feature_extractor(audio, sampling_rate=sr, return_tensors=\"pt\")\n", " input_features = inputs.input_features.to(\"cuda\")\n", "\n", " with torch.no_grad():\n", " logits = model(input_features).logits\n", "\n", " predicted_class_ids = torch.argmax(logits).item()\n", " predicted_label = model.config.id2label[predicted_class_ids]\n", "\n", " return predicted_label" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "models= [\n", " \"/home/investigacion/disco4TB/workspace_pablo/firvox_whisper_research/whisper-medium-fleurs-lang-id/lang_identification_models_noFirVox_audios\"\n", " ]" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "for model in models:\n", " model_name= model.split(\"/\")[-1]\n", " df[f\"lang_{model_name}\"] = df[\"audio_path\"].apply(lambda x: get_language(x))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import confusion_matrix\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "\n", "model_columns = [f\"lang_{x.split('/')[-1]}\" for x in models]\n", "\n", "results = {}\n", "confusion_matrices = {}\n", "\n", "for model in model_columns:\n", " # Cálculo de predicciones correctas\n", " df['correct_prediction'] = (df['language'] == df[model])*100\n", " accuracy_by_language = df.groupby('language')['correct_prediction'].mean()\n", " \n", " # Limpieza del nombre del modelo\n", " clean_model = model.replace(\"_small_whisper_firvox_5000_audios\", \"_\").replace(\"lang_\", \"\")\n", " \n", " # Guardar precisión por lenguaje\n", " results[clean_model] = accuracy_by_language\n", "\n", " # Crear matriz de confusión\n", " cm = confusion_matrix(df['language'], df[model], labels=df['language'].unique())\n", " confusion_matrices[clean_model] = cm\n", "\n", " # Visualizar la matriz de confusión con seaborn\n", " plt.figure(figsize=(10, 7))\n", " sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=df['language'].unique(), yticklabels=df['language'].unique())\n", " plt.title(f'Confusion Matrix for {clean_model}')\n", " plt.xlabel('Predicted')\n", " plt.ylabel('Actual')\n", " plt.show()\n", "\n", "# Convertir resultados de precisión en DataFrame\n", "accuracy_df = pd.DataFrame(results)\n", "accuracy_df\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "lastwhisper", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.9" } }, "nbformat": 4, "nbformat_minor": 2 }