{ "cells": [ { "cell_type": "code", "execution_count": 3, "id": "1c550b9b-ab70-46a5-a584-29a0d3ae31ee", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Defaulting to user installation because normal site-packages is not writeable\n", "Collecting gradio\n", " Using cached gradio-4.41.0-py3-none-any.whl.metadata (15 kB)\n", "Collecting aiofiles<24.0,>=22.0 (from gradio)\n", " Using cached aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)\n", "Requirement already satisfied: anyio<5.0,>=3.0 in /home/obai33/.local/lib/python3.10/site-packages (from gradio) (4.4.0)\n", "Collecting fastapi (from gradio)\n", " Using cached fastapi-0.112.1-py3-none-any.whl.metadata (27 kB)\n", "Collecting ffmpy (from gradio)\n", " Using cached ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)\n", "Collecting gradio-client==1.3.0 (from gradio)\n", " Using cached gradio_client-1.3.0-py3-none-any.whl.metadata (7.1 kB)\n", "Requirement already satisfied: httpx>=0.24.1 in /home/obai33/.local/lib/python3.10/site-packages (from gradio) (0.27.0)\n", "Collecting huggingface-hub>=0.19.3 (from gradio)\n", " Using cached huggingface_hub-0.24.5-py3-none-any.whl.metadata (13 kB)\n", "Collecting importlib-resources<7.0,>=1.3 (from gradio)\n", " Using cached importlib_resources-6.4.3-py3-none-any.whl.metadata (3.9 kB)\n", "Requirement already satisfied: jinja2<4.0 in /home/obai33/.local/lib/python3.10/site-packages (from gradio) (3.1.4)\n", "Requirement already satisfied: markupsafe~=2.0 in /home/obai33/.local/lib/python3.10/site-packages (from gradio) (2.1.5)\n", "Requirement already satisfied: matplotlib~=3.0 in /usr/lib/python3/dist-packages (from gradio) (3.5.1)\n", "Requirement already satisfied: numpy<3.0,>=1.0 in /home/obai33/.local/lib/python3.10/site-packages (from gradio) (1.26.4)\n", "Collecting orjson~=3.0 (from gradio)\n", " Using cached orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (50 kB)\n", "Requirement already satisfied: packaging in /home/obai33/.local/lib/python3.10/site-packages (from gradio) (24.0)\n", "Requirement already satisfied: pandas<3.0,>=1.0 in /home/obai33/.local/lib/python3.10/site-packages (from gradio) (2.2.2)\n", "Requirement already satisfied: pillow<11.0,>=8.0 in /home/obai33/.local/lib/python3.10/site-packages (from gradio) (10.3.0)\n", "Collecting pydantic>=2.0 (from gradio)\n", " Using cached pydantic-2.8.2-py3-none-any.whl.metadata (125 kB)\n", "Collecting pydub (from gradio)\n", " Using cached pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)\n", "Collecting python-multipart>=0.0.9 (from gradio)\n", " Using cached python_multipart-0.0.9-py3-none-any.whl.metadata (2.5 kB)\n", "Requirement already satisfied: pyyaml<7.0,>=5.0 in /usr/lib/python3/dist-packages (from gradio) (5.4.1)\n", "Collecting ruff>=0.2.2 (from gradio)\n", " Using cached ruff-0.6.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)\n", "Collecting semantic-version~=2.0 (from gradio)\n", " Using cached semantic_version-2.10.0-py2.py3-none-any.whl.metadata (9.7 kB)\n", "Collecting tomlkit==0.12.0 (from gradio)\n", " Using cached tomlkit-0.12.0-py3-none-any.whl.metadata (2.7 kB)\n", "Collecting typer<1.0,>=0.12 (from gradio)\n", " Using cached typer-0.12.4-py3-none-any.whl.metadata (15 kB)\n", "Requirement already satisfied: typing-extensions~=4.0 in /home/obai33/.local/lib/python3.10/site-packages (from gradio) (4.12.1)\n", "Requirement already satisfied: urllib3~=2.0 in /home/obai33/.local/lib/python3.10/site-packages (from gradio) (2.2.1)\n", "Collecting uvicorn>=0.14.0 (from gradio)\n", " Using cached uvicorn-0.30.6-py3-none-any.whl.metadata (6.6 kB)\n", "Requirement already satisfied: fsspec in /home/obai33/.local/lib/python3.10/site-packages (from gradio-client==1.3.0->gradio) (2024.5.0)\n", "Collecting websockets<13.0,>=10.0 (from gradio-client==1.3.0->gradio)\n", " Using cached websockets-12.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n", "Requirement already satisfied: idna>=2.8 in /home/obai33/.local/lib/python3.10/site-packages (from anyio<5.0,>=3.0->gradio) (3.7)\n", "Requirement already satisfied: sniffio>=1.1 in /home/obai33/.local/lib/python3.10/site-packages (from anyio<5.0,>=3.0->gradio) (1.3.1)\n", "Requirement already satisfied: exceptiongroup>=1.0.2 in /home/obai33/.local/lib/python3.10/site-packages (from anyio<5.0,>=3.0->gradio) (1.2.1)\n", "Requirement already satisfied: certifi in /home/obai33/.local/lib/python3.10/site-packages (from httpx>=0.24.1->gradio) (2024.6.2)\n", "Requirement already satisfied: httpcore==1.* in /home/obai33/.local/lib/python3.10/site-packages (from httpx>=0.24.1->gradio) (1.0.5)\n", "Requirement already satisfied: h11<0.15,>=0.13 in /home/obai33/.local/lib/python3.10/site-packages (from httpcore==1.*->httpx>=0.24.1->gradio) (0.14.0)\n", "Requirement already satisfied: filelock in /home/obai33/.local/lib/python3.10/site-packages (from huggingface-hub>=0.19.3->gradio) (3.14.0)\n", "Requirement already satisfied: requests in /home/obai33/.local/lib/python3.10/site-packages (from huggingface-hub>=0.19.3->gradio) (2.32.3)\n", "Requirement already satisfied: tqdm>=4.42.1 in /home/obai33/.local/lib/python3.10/site-packages (from huggingface-hub>=0.19.3->gradio) (4.66.4)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /home/obai33/.local/lib/python3.10/site-packages (from pandas<3.0,>=1.0->gradio) (2.9.0.post0)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/lib/python3/dist-packages (from pandas<3.0,>=1.0->gradio) (2022.1)\n", "Requirement already satisfied: tzdata>=2022.7 in /home/obai33/.local/lib/python3.10/site-packages (from pandas<3.0,>=1.0->gradio) (2024.1)\n", "Collecting annotated-types>=0.4.0 (from pydantic>=2.0->gradio)\n", " Using cached annotated_types-0.7.0-py3-none-any.whl.metadata (15 kB)\n", "Collecting pydantic-core==2.20.1 (from pydantic>=2.0->gradio)\n", " Using cached pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n", "Collecting click>=8.0.0 (from typer<1.0,>=0.12->gradio)\n", " Using cached click-8.1.7-py3-none-any.whl.metadata (3.0 kB)\n", "Collecting shellingham>=1.3.0 (from typer<1.0,>=0.12->gradio)\n", " Using cached shellingham-1.5.4-py2.py3-none-any.whl.metadata (3.5 kB)\n", "Requirement already satisfied: rich>=10.11.0 in /home/obai33/.local/lib/python3.10/site-packages (from typer<1.0,>=0.12->gradio) (13.7.1)\n", "Collecting starlette<0.39.0,>=0.37.2 (from fastapi->gradio)\n", " Using cached starlette-0.38.2-py3-none-any.whl.metadata (5.9 kB)\n", "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.8.2->pandas<3.0,>=1.0->gradio) (1.16.0)\n", "Requirement already satisfied: markdown-it-py>=2.2.0 in /home/obai33/.local/lib/python3.10/site-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (3.0.0)\n", "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /home/obai33/.local/lib/python3.10/site-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (2.18.0)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /home/obai33/.local/lib/python3.10/site-packages (from requests->huggingface-hub>=0.19.3->gradio) (3.3.2)\n", "Requirement already satisfied: mdurl~=0.1 in /home/obai33/.local/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0,>=0.12->gradio) (0.1.2)\n", "Downloading gradio-4.41.0-py3-none-any.whl (12.6 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.6/12.6 MB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hDownloading gradio_client-1.3.0-py3-none-any.whl (318 kB)\n", "Downloading tomlkit-0.12.0-py3-none-any.whl (37 kB)\n", "Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)\n", "Downloading huggingface_hub-0.24.5-py3-none-any.whl (417 kB)\n", "Downloading importlib_resources-6.4.3-py3-none-any.whl (35 kB)\n", "Downloading orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (141 kB)\n", "Downloading pydantic-2.8.2-py3-none-any.whl (423 kB)\n", "Downloading pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.1/2.1 MB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hDownloading python_multipart-0.0.9-py3-none-any.whl (22 kB)\n", "Downloading ruff-0.6.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.2 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m10.2/10.2 MB\u001b[0m \u001b[31m7.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hDownloading semantic_version-2.10.0-py2.py3-none-any.whl (15 kB)\n", "Downloading typer-0.12.4-py3-none-any.whl (47 kB)\n", "Downloading uvicorn-0.30.6-py3-none-any.whl (62 kB)\n", "Downloading fastapi-0.112.1-py3-none-any.whl (93 kB)\n", "Downloading ffmpy-0.4.0-py3-none-any.whl (5.8 kB)\n", "Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n", "Downloading annotated_types-0.7.0-py3-none-any.whl (13 kB)\n", "Downloading click-8.1.7-py3-none-any.whl (97 kB)\n", "Downloading shellingham-1.5.4-py2.py3-none-any.whl (9.8 kB)\n", "Downloading starlette-0.38.2-py3-none-any.whl (72 kB)\n", "Downloading websockets-12.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (130 kB)\n", "Installing collected packages: pydub, websockets, tomlkit, shellingham, semantic-version, ruff, python-multipart, pydantic-core, orjson, importlib-resources, ffmpy, click, annotated-types, aiofiles, uvicorn, starlette, pydantic, huggingface-hub, typer, gradio-client, fastapi, gradio\n", "Successfully installed aiofiles-23.2.1 annotated-types-0.7.0 click-8.1.7 fastapi-0.112.1 ffmpy-0.4.0 gradio-4.41.0 gradio-client-1.3.0 huggingface-hub-0.24.5 importlib-resources-6.4.3 orjson-3.10.7 pydantic-2.8.2 pydantic-core-2.20.1 pydub-0.25.1 python-multipart-0.0.9 ruff-0.6.1 semantic-version-2.10.0 shellingham-1.5.4 starlette-0.38.2 tomlkit-0.12.0 typer-0.12.4 uvicorn-0.30.6 websockets-12.0\n" ] } ], "source": [ "!pip install gradio" ] }, { "cell_type": "code", "execution_count": 23, "id": "c850d5ae-5d43-45fb-91cc-084427440a97", "metadata": {}, "outputs": [], "source": [ "import torch\n", "import torch.nn.functional as F\n", "import torchvision\n", "import matplotlib.pyplot as plt\n", "import zipfile\n", "import os\n", "import gradio as gr\n", "from PIL import Image\n" ] }, { "cell_type": "code", "execution_count": 70, "id": "c3d9ca4d-fb14-495e-b405-8a964ecc9a51", "metadata": {}, "outputs": [], "source": [ "CHARS = \"~=\" + \" abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789,.'-!?:;\\\"\"\n", "BLANK = 0\n", "PAD = 1\n", "CHARS_DICT = {c: i for i, c in enumerate(CHARS)}\n", "TEXTLEN = 30\n", "\n", "tokens_list = list(CHARS_DICT.keys())\n", "silence_token = '|'\n", "\n", "if silence_token not in tokens_list:\n", " tokens_list.append(silence_token)\n", "\n", "\n", "def fit_picture(img):\n", " target_height = 32\n", " target_width = 400\n", " \n", " # Calculate resize dimensions\n", " aspect_ratio = img.width / img.height\n", " if aspect_ratio > (target_width / target_height):\n", " resize_width = target_width\n", " resize_height = int(target_width / aspect_ratio)\n", " else:\n", " resize_height = target_height\n", " resize_width = int(target_height * aspect_ratio)\n", " \n", " # Resize transformation\n", " resize_transform = transforms.Resize((resize_height, resize_width))\n", " \n", " # Pad transformation\n", " padding_height = (target_height - resize_height) if target_height > resize_height else 0\n", " padding_width = (target_width - resize_width) if target_width > resize_width else 0\n", " pad_transform = transforms.Pad((0, 0, padding_width, padding_height), fill=0, padding_mode='constant')\n", " \n", " transform = torchvision.transforms.Compose([\n", " torchvision.transforms.Grayscale(num_output_channels = 1),\n", " torchvision.transforms.ToTensor(),\n", " torchvision.transforms.Normalize(0.5,0.5),\n", " resize_transform,\n", " pad_transform\n", " ])\n", "\n", " fin_img = transform(img)\n", " return fin_img\n", "\n", "def load_model(filename):\n", " data = torch.load(filename)\n", " recognizer.load_state_dict(data[\"recognizer\"])\n", " optimizer.load_state_dict(data[\"optimizer\"])\n", "\n", "def ctc_decode_sequence(seq):\n", " \"\"\"Removes blanks and repetitions from the sequence.\"\"\"\n", " ret = []\n", " prev = BLANK\n", " for x in seq:\n", " if prev != BLANK and prev != x:\n", " ret.append(prev)\n", " prev = x\n", " if seq[-1] == 66:\n", " ret.append(66)\n", " return ret\n", "\n", "def ctc_decode(codes):\n", " \"\"\"Decode a batch of sequences.\"\"\"\n", " ret = []\n", " for cs in codes.T:\n", " ret.append(ctc_decode_sequence(cs))\n", " return ret\n", "\n", "\n", "def decode_text(codes):\n", " chars = [CHARS[c] for c in codes]\n", " return ''.join(chars)" ] }, { "cell_type": "code", "execution_count": 65, "id": "6722e370-e7df-4efe-aa9d-e9436d3cc08e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Device: cuda\n" ] } ], "source": [ "class Residual(torch.nn.Module):\n", " def __init__(self, in_channels, out_channels, stride, pdrop = 0.2):\n", " super().__init__()\n", " self.conv1 = torch.nn.Conv2d(in_channels, out_channels, 3, stride, 1)\n", " self.bn1 = torch.nn.BatchNorm2d(out_channels)\n", " self.conv2 = torch.nn.Conv2d(out_channels, out_channels, 3, 1, 1)\n", " self.bn2 = torch.nn.BatchNorm2d(out_channels)\n", " if in_channels != out_channels or stride != 1:\n", " self.skip = torch.nn.Conv2d(in_channels, out_channels, 1, stride, 0)\n", " else:\n", " self.skip = torch.nn.Identity()\n", " self.dropout = torch.nn.Dropout2d(pdrop)\n", "\n", " def forward(self, x):\n", " y = torch.nn.functional.relu(self.bn1(self.conv1(x)))\n", " y = torch.nn.functional.relu(self.bn2(self.conv2(y)) + self.skip(x))\n", " y = self.dropout(y)\n", " return y\n", " \n", "class TextRecognizer(torch.nn.Module):\n", " def __init__(self, labels):\n", " super().__init__()\n", " self.feature_extractor = torch.nn.Sequential(\n", " Residual(1, 32, 1),\n", " Residual(32, 32, 2),\n", " Residual(32, 32, 1),\n", " Residual(32, 64, 2),\n", " Residual(64, 64, 1),\n", " Residual(64, 128, (2,1)),\n", " Residual(128, 128, 1),\n", " Residual(128, 128, (2,1)),\n", " Residual(128, 128, (2,1)),\n", " )\n", " self.recurrent = torch.nn.LSTM(128, 128, 1 ,bidirectional = True)\n", " self.output = torch.nn.Linear(256, labels)\n", "\n", " def forward(self, x):\n", " x = self.feature_extractor(x)\n", " x = x.squeeze(2)\n", " x = x.permute(2,0,1)\n", " x,_ = self.recurrent(x)\n", " x = self.output(x)\n", " return x\n", "\n", "recognizer = TextRecognizer(len(CHARS))\n", "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", "print(\"Device:\", DEVICE)\n", "LR = 1e-3\n", "\n", "recognizer.to(DEVICE)\n", "optimizer = torch.optim.Adam(recognizer.parameters(), lr=LR)" ] }, { "cell_type": "code", "execution_count": 75, "id": "e61f1d87-4a82-4714-b4e1-33719064a735", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Running on local URL: http://127.0.0.1:7889\n", "Running on public URL: https://e1090d81e4ea8bf190.gradio.live\n", "\n", "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [] }, "execution_count": 75, "metadata": {}, "output_type": "execute_result" }, { "name": "stderr", "output_type": "stream", "text": [ "Traceback (most recent call last):\n", " File \"/home/obai33/.local/lib/python3.10/site-packages/gradio/queueing.py\", line 536, in process_events\n", " response = await route_utils.call_process_api(\n", " File \"/home/obai33/.local/lib/python3.10/site-packages/gradio/route_utils.py\", line 288, in call_process_api\n", " output = await app.get_blocks().process_api(\n", " File \"/home/obai33/.local/lib/python3.10/site-packages/gradio/blocks.py\", line 1931, in process_api\n", " result = await self.call_function(\n", " File \"/home/obai33/.local/lib/python3.10/site-packages/gradio/blocks.py\", line 1516, in call_function\n", " prediction = await anyio.to_thread.run_sync( # type: ignore\n", " File \"/home/obai33/.local/lib/python3.10/site-packages/anyio/to_thread.py\", line 56, in run_sync\n", " return await get_async_backend().run_sync_in_worker_thread(\n", " File \"/home/obai33/.local/lib/python3.10/site-packages/anyio/_backends/_asyncio.py\", line 2177, in run_sync_in_worker_thread\n", " return await future\n", " File \"/home/obai33/.local/lib/python3.10/site-packages/anyio/_backends/_asyncio.py\", line 859, in run\n", " result = context.run(func, *args)\n", " File \"/home/obai33/.local/lib/python3.10/site-packages/gradio/utils.py\", line 826, in wrapper\n", " response = f(*args, **kwargs)\n", " File \"/tmp/ipykernel_848/2152623987.py\", line 5, in ctc_read\n", " imagefin = fit_picture(image)\n", " File \"/tmp/ipykernel_848/2382022948.py\", line 19, in fit_picture\n", " aspect_ratio = img.width / img.height\n", "AttributeError: 'NoneType' object has no attribute 'width'\n", "Traceback (most recent call last):\n", " File \"/home/obai33/.local/lib/python3.10/site-packages/gradio/queueing.py\", line 536, in process_events\n", " response = await route_utils.call_process_api(\n", " File \"/home/obai33/.local/lib/python3.10/site-packages/gradio/route_utils.py\", line 288, in call_process_api\n", " output = await app.get_blocks().process_api(\n", " File \"/home/obai33/.local/lib/python3.10/site-packages/gradio/blocks.py\", line 1931, in process_api\n", " result = await self.call_function(\n", " File \"/home/obai33/.local/lib/python3.10/site-packages/gradio/blocks.py\", line 1516, in call_function\n", " prediction = await anyio.to_thread.run_sync( # type: ignore\n", " File \"/home/obai33/.local/lib/python3.10/site-packages/anyio/to_thread.py\", line 56, in run_sync\n", " return await get_async_backend().run_sync_in_worker_thread(\n", " File \"/home/obai33/.local/lib/python3.10/site-packages/anyio/_backends/_asyncio.py\", line 2177, in run_sync_in_worker_thread\n", " return await future\n", " File \"/home/obai33/.local/lib/python3.10/site-packages/anyio/_backends/_asyncio.py\", line 859, in run\n", " result = context.run(func, *args)\n", " File \"/home/obai33/.local/lib/python3.10/site-packages/gradio/utils.py\", line 826, in wrapper\n", " response = f(*args, **kwargs)\n", " File \"/tmp/ipykernel_848/2152623987.py\", line 5, in ctc_read\n", " imagefin = fit_picture(image)\n", " File \"/tmp/ipykernel_848/2382022948.py\", line 19, in fit_picture\n", " aspect_ratio = img.width / img.height\n", "AttributeError: 'NoneType' object has no attribute 'width'\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "torch.Size([1, 1, 32, 400])\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/obai33/.local/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)\n", " return F.conv2d(input, weight, bias, self.stride,\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "torch.Size([1, 1, 32, 400])\n", "torch.Size([1, 1, 32, 400])\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/obai33/.local/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)\n", " return F.conv2d(input, weight, bias, self.stride,\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "torch.Size([1, 1, 32, 400])\n", "torch.Size([1, 1, 32, 400])\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/obai33/.local/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)\n", " return F.conv2d(input, weight, bias, self.stride,\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "torch.Size([1, 1, 32, 400])\n", "torch.Size([1, 1, 32, 400])\n", "torch.Size([1, 1, 32, 400])\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Traceback (most recent call last):\n", " File \"/home/obai33/.local/lib/python3.10/site-packages/gradio/queueing.py\", line 536, in process_events\n", " response = await route_utils.call_process_api(\n", " File \"/home/obai33/.local/lib/python3.10/site-packages/gradio/route_utils.py\", line 288, in call_process_api\n", " output = await app.get_blocks().process_api(\n", " File \"/home/obai33/.local/lib/python3.10/site-packages/gradio/blocks.py\", line 1931, in process_api\n", " result = await self.call_function(\n", " File \"/home/obai33/.local/lib/python3.10/site-packages/gradio/blocks.py\", line 1516, in call_function\n", " prediction = await anyio.to_thread.run_sync( # type: ignore\n", " File \"/home/obai33/.local/lib/python3.10/site-packages/anyio/to_thread.py\", line 56, in run_sync\n", " return await get_async_backend().run_sync_in_worker_thread(\n", " File \"/home/obai33/.local/lib/python3.10/site-packages/anyio/_backends/_asyncio.py\", line 2177, in run_sync_in_worker_thread\n", " return await future\n", " File \"/home/obai33/.local/lib/python3.10/site-packages/anyio/_backends/_asyncio.py\", line 859, in run\n", " result = context.run(func, *args)\n", " File \"/home/obai33/.local/lib/python3.10/site-packages/gradio/utils.py\", line 826, in wrapper\n", " response = f(*args, **kwargs)\n", " File \"/tmp/ipykernel_848/2152623987.py\", line 5, in ctc_read\n", " imagefin = fit_picture(image)\n", " File \"/tmp/ipykernel_848/2382022948.py\", line 19, in fit_picture\n", " aspect_ratio = img.width / img.height\n", "AttributeError: 'NoneType' object has no attribute 'width'\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "torch.Size([1, 1, 32, 400])\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/obai33/.local/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)\n", " return F.conv2d(input, weight, bias, self.stride,\n", "Traceback (most recent call last):\n", " File \"/home/obai33/.local/lib/python3.10/site-packages/gradio/queueing.py\", line 536, in process_events\n", " response = await route_utils.call_process_api(\n", " File \"/home/obai33/.local/lib/python3.10/site-packages/gradio/route_utils.py\", line 288, in call_process_api\n", " output = await app.get_blocks().process_api(\n", " File \"/home/obai33/.local/lib/python3.10/site-packages/gradio/blocks.py\", line 1931, in process_api\n", " result = await self.call_function(\n", " File \"/home/obai33/.local/lib/python3.10/site-packages/gradio/blocks.py\", line 1516, in call_function\n", " prediction = await anyio.to_thread.run_sync( # type: ignore\n", " File \"/home/obai33/.local/lib/python3.10/site-packages/anyio/to_thread.py\", line 56, in run_sync\n", " return await get_async_backend().run_sync_in_worker_thread(\n", " File \"/home/obai33/.local/lib/python3.10/site-packages/anyio/_backends/_asyncio.py\", line 2177, in run_sync_in_worker_thread\n", " return await future\n", " File \"/home/obai33/.local/lib/python3.10/site-packages/anyio/_backends/_asyncio.py\", line 859, in run\n", " result = context.run(func, *args)\n", " File \"/home/obai33/.local/lib/python3.10/site-packages/gradio/utils.py\", line 826, in wrapper\n", " response = f(*args, **kwargs)\n", " File \"/tmp/ipykernel_848/2152623987.py\", line 5, in ctc_read\n", " imagefin = fit_picture(image)\n", " File \"/tmp/ipykernel_848/2382022948.py\", line 19, in fit_picture\n", " aspect_ratio = img.width / img.height\n", "AttributeError: 'NoneType' object has no attribute 'width'\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "torch.Size([1, 1, 32, 400])\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/obai33/.local/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)\n", " return F.conv2d(input, weight, bias, self.stride,\n" ] } ], "source": [ "load_model('model.pt')\n", "recognizer.eval()\n", "\n", "def ctc_read(image):\n", " imagefin = fit_picture(image)\n", " image_tensor = imagefin.unsqueeze(0).to(DEVICE)\n", " print(image_tensor.size())\n", " \n", " with torch.no_grad():\n", " scores = recognizer(image_tensor)\n", "\n", " predictions = scores.argmax(2).cpu().numpy()\n", "\n", " decoded_sequences = ctc_decode(predictions)\n", "\n", " # Convert decoded sequences to text\n", " for i in decoded_sequences:\n", " decoded_text = decode_text(i)\n", "\n", " return decoded_text\n", "\n", "\n", "# Gradio Interface\n", "iface = gr.Interface(\n", " fn=ctc_read,\n", " inputs=gr.Image(type=\"pil\"), # PIL Image input\n", " outputs=\"text\", # Text output\n", " title=\"Handwritten Text Recognition\",\n", " description=\"Upload an image, and the custome AI will extract the text.\"\n", ")\n", "\n", "iface.launch(share=True)\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 5 }