{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from transformers import pipeline\n", "\n", "fill_mask = pipeline(\n", " \"fill-mask\",\n", " \"latin_BERT_final\"\n", ")" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'score': 0.010115200653672218,\n", " 'token': 265,\n", " 'token_str': 'hoc',\n", " 'sequence': 'roma in hoc est.'},\n", " {'score': 0.004335678182542324,\n", " 'token': 1156,\n", " 'token_str': 'deo',\n", " 'sequence': 'roma in deo est.'},\n", " {'score': 0.003660168731585145,\n", " 'token': 146,\n", " 'token_str': 'non',\n", " 'sequence': 'roma in non est.'},\n", " {'score': 0.0034285704605281353,\n", " 'token': 745,\n", " 'token_str': 'nobis',\n", " 'sequence': 'roma in nobis est.'},\n", " {'score': 0.0032894855830818415,\n", " 'token': 971,\n", " 'token_str': 'rebus',\n", " 'sequence': 'roma in rebus est.'}]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fill_mask(\"Roma in [MASK] est.\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'score': 0.04990068078041077,\n", " 'token': 145,\n", " 'token_str': 'est',\n", " 'sequence': 'ubi est est?.'},\n", " {'score': 0.01739734411239624,\n", " 'token': 215,\n", " 'token_str': 'quid',\n", " 'sequence': 'ubi est quid?.'},\n", " {'score': 0.008733403868973255,\n", " 'token': 391,\n", " 'token_str': 'mihi',\n", " 'sequence': 'ubi est mihi?.'},\n", " {'score': 0.007146364543586969,\n", " 'token': 368,\n", " 'token_str': 'sum',\n", " 'sequence': 'ubi est sum?.'},\n", " {'score': 0.006486538797616959,\n", " 'token': 425,\n", " 'token_str': 'tibi',\n", " 'sequence': 'ubi est tibi?.'}]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fill_mask(\"Ubi est [MASK] ?.\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'score': 0.7615262269973755,\n", " 'token': 12,\n", " 'token_str': '.',\n", " 'sequence': 'de honoratorum.'},\n", " {'score': 0.03385818004608154,\n", " 'token': 23,\n", " 'token_str': ':',\n", " 'sequence': 'de honoratorum :'},\n", " {'score': 0.02129465527832508,\n", " 'token': 10,\n", " 'token_str': ',',\n", " 'sequence': 'de honoratorum,'},\n", " {'score': 0.014383483678102493,\n", " 'token': 25,\n", " 'token_str': '?',\n", " 'sequence': 'de honoratorum?'},\n", " {'score': 0.008870471268892288,\n", " 'token': 109,\n", " 'token_str': 'et',\n", " 'sequence': 'de honoratorum et'}]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# De honoratorum vehiculis\n", "fill_mask(\"De honoratorum [MASK]\")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'score': 0.00837777741253376,\n", " 'token': 1838,\n", " 'token_str': 'urbe',\n", " 'sequence': 'gallia est omnis divisa in urbe tres'},\n", " {'score': 0.007593569345772266,\n", " 'token': 1628,\n", " 'token_str': 'corpore',\n", " 'sequence': 'gallia est omnis divisa in corpore tres'},\n", " {'score': 0.007336211856454611,\n", " 'token': 2035,\n", " 'token_str': 'medio',\n", " 'sequence': 'gallia est omnis divisa in medio tres'},\n", " {'score': 0.006218622904270887,\n", " 'token': 983,\n", " 'token_str': 'parte',\n", " 'sequence': 'gallia est omnis divisa in parte tres'},\n", " {'score': 0.0054352362640202045,\n", " 'token': 238,\n", " 'token_str': 'quo',\n", " 'sequence': 'gallia est omnis divisa in quo tres'}]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fill_mask(\"Gallia est omnis divisa in [MASK] tres\")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'score': 0.008044794201850891,\n", " 'token': 1628,\n", " 'token_str': 'corpore',\n", " 'sequence': 'gallia est omnis divisa in corpore tres.'},\n", " {'score': 0.00732386251911521,\n", " 'token': 1838,\n", " 'token_str': 'urbe',\n", " 'sequence': 'gallia est omnis divisa in urbe tres.'},\n", " {'score': 0.0072334203869104385,\n", " 'token': 983,\n", " 'token_str': 'parte',\n", " 'sequence': 'gallia est omnis divisa in parte tres.'},\n", " {'score': 0.006316048558801413,\n", " 'token': 2035,\n", " 'token_str': 'medio',\n", " 'sequence': 'gallia est omnis divisa in medio tres.'},\n", " {'score': 0.004988126456737518,\n", " 'token': 1177,\n", " 'token_str': 'terra',\n", " 'sequence': 'gallia est omnis divisa in terra tres.'}]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Adding a point changes the predictions!\n", "fill_mask(\"Gallia est omnis divisa in [MASK] tres.\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "bertenv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.6" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }