{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "id": "BQu8BQDJRTzn" }, "outputs": [], "source": [ "from google.colab import drive\n", "drive.mount('/content/drive')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "IubsuHn3RTxn" }, "outputs": [], "source": [ "!pip install -r requirements.txt" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "2f0ip2WksaKq", "outputId": "84cc86d2-9104-4ddf-91f4-c48c7c7f96a4" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting openai\n", " Downloading openai-1.3.7-py3-none-any.whl (221 kB)\n", "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/221.4 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━\u001b[0m\u001b[91m╸\u001b[0m\u001b[90m━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.2/221.4 kB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m221.4/221.4 kB\u001b[0m \u001b[31m4.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: anyio<4,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from openai) (3.7.1)\n", "Requirement already satisfied: distro<2,>=1.7.0 in /usr/lib/python3/dist-packages (from openai) (1.7.0)\n", "Collecting httpx<1,>=0.23.0 (from openai)\n", " Downloading httpx-0.25.2-py3-none-any.whl (74 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.0/75.0 kB\u001b[0m \u001b[31m10.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: pydantic<3,>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from openai) (1.10.13)\n", "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from openai) (1.3.0)\n", "Requirement already satisfied: tqdm>4 in /usr/local/lib/python3.10/dist-packages (from openai) (4.66.1)\n", "Requirement already satisfied: typing-extensions<5,>=4.5 in /usr/local/lib/python3.10/dist-packages (from openai) (4.5.0)\n", "Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.10/dist-packages (from anyio<4,>=3.5.0->openai) (3.6)\n", "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<4,>=3.5.0->openai) (1.2.0)\n", "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.23.0->openai) (2023.11.17)\n", "Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)\n", " Downloading httpcore-1.0.2-py3-none-any.whl (76 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.9/76.9 kB\u001b[0m \u001b[31m12.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)\n", " Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hInstalling collected packages: h11, httpcore, httpx, openai\n", "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "llmx 0.0.15a0 requires cohere, which is not installed.\n", "llmx 0.0.15a0 requires tiktoken, which is not installed.\u001b[0m\u001b[31m\n", "\u001b[0mSuccessfully installed h11-0.14.0 httpcore-1.0.2 httpx-0.25.2 openai-1.3.7\n" ] } ], "source": [ "!pip install --upgrade openai" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "wVSEk4MlsaNk", "outputId": "2229de07-db0b-4be9-a134-3788a9a4776b" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting openai-whisper\n", " Downloading openai-whisper-20231117.tar.gz (798 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m798.6/798.6 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", "Requirement already satisfied: triton<3,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from openai-whisper) (2.1.0)\n", "Requirement already satisfied: numba in /usr/local/lib/python3.10/dist-packages (from openai-whisper) (0.58.1)\n", "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from openai-whisper) (1.23.5)\n", "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from openai-whisper) (2.1.0+cu118)\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from openai-whisper) (4.66.1)\n", "Requirement already satisfied: more-itertools in /usr/local/lib/python3.10/dist-packages (from openai-whisper) (10.1.0)\n", "Collecting tiktoken (from openai-whisper)\n", " Downloading tiktoken-0.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m56.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from triton<3,>=2.0.0->openai-whisper) (3.13.1)\n", "Requirement already satisfied: llvmlite<0.42,>=0.41.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba->openai-whisper) (0.41.1)\n", "Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken->openai-whisper) (2023.6.3)\n", "Requirement already satisfied: requests>=2.26.0 in /usr/local/lib/python3.10/dist-packages (from tiktoken->openai-whisper) (2.31.0)\n", "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch->openai-whisper) (4.5.0)\n", "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->openai-whisper) (1.12)\n", "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->openai-whisper) (3.2.1)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->openai-whisper) (3.1.2)\n", "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch->openai-whisper) (2023.6.0)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken->openai-whisper) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken->openai-whisper) (3.6)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken->openai-whisper) (2.0.7)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken->openai-whisper) (2023.11.17)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->openai-whisper) (2.1.3)\n", "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->openai-whisper) (1.3.0)\n", "Building wheels for collected packages: openai-whisper\n", " Building wheel for openai-whisper (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for openai-whisper: filename=openai_whisper-20231117-py3-none-any.whl size=801356 sha256=1b630eb40941206e0a5d2f94e1a42a54ba50a56bf8edcff93a7e62d78f5db157\n", " Stored in directory: /root/.cache/pip/wheels/d0/85/e1/9361b4cbea7dd4b7f6702fa4c3afc94877952eeb2b62f45f56\n", "Successfully built openai-whisper\n", "Installing collected packages: tiktoken, openai-whisper\n", "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "llmx 0.0.15a0 requires cohere, which is not installed.\u001b[0m\u001b[31m\n", "\u001b[0mSuccessfully installed openai-whisper-20231117 tiktoken-0.5.1\n" ] } ], "source": [ "!pip install --upgrade openai-whisper" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "oYGTh27jsddT", "outputId": "8e9485aa-3645-4b9c-8cbd-99e5cb83bf33" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "UsageError: Line magic function `%sudo` not found.\n" ] } ], "source": [ "!sudo apt update && sudo apt install ffmpeg" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "id": "3ZrCX53_ssHy" }, "outputs": [], "source": [ "import whisper\n", "import cv2\n", "import os\n", "import urllib.request\n", "from PIL import Image\n", "from ultralytics import YOLO\n", "import torch\n", "import matplotlib.pyplot as plt\n", "from tqdm import tqdm\n", "from transformers import pipeline\n", "import moviepy.editor as mp\n", "import json\n", "import re\n", "import gradio as gr\n", "from openai import OpenAI" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": { "id": "lmWMJ_FeJDyD" }, "source": [ "# Full" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'d:\\\\M.S\\\\Boston University\\\\College\\\\Studies\\\\Sem 3 (Fall 23)\\\\Subjects\\\\CS549 - Spark! ML Practicum\\\\Gradio App'" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%pwd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "id": "YMDeEOwLtGYN" }, "outputs": [], "source": [ "def video_transcription(video_path):\n", " model = whisper.load_model('tiny')\n", " transcript = model.transcribe(video_path, verbose = False, language = 'en')\n", "\n", " return json.dumps(transcript)\n", "\n", "def action_detection(json_object, openai_key):\n", " transcript = json.loads(json_object)\n", " transcript_string = ''\n", " for segments in transcript['segments']:\n", " transcript_string+=str(segments['text']+'\\n')\n", "\n", " chunks = []\n", " output = {}\n", " count = 0\n", " split_transcript = transcript_string.split(\"\\n\")\n", " num_lines = len(split_transcript)\n", " num_chars = 0\n", " i = 0\n", " prev = 0\n", "\n", " while i < num_lines:\n", " num_chars+=len(split_transcript[i])\n", " if num_chars>=16000:\n", " chunks.append(\"\\n\".join(split_transcript[prev:i]))\n", " prev = i\n", " num_chars = 0\n", " i+=1\n", " if i == num_lines:\n", " chunks.append(\"\\n\".join(split_transcript[prev:i]))\n", "\n", " client = OpenAI(api_key = openai_key)\n", "\n", " for i in chunks:\n", " completion = client.chat.completions.create(\n", " model=\"gpt-4\",\n", " messages=[\n", " {\"role\": \"user\", \"content\": f\"You are an AI system specialized in detecting planning issues, critiquing plans, and analyzing conversations between police officers regarding how to disperse. Additionally, identify any instances suggesting 1st Amendment violations, criticizing the lack of a plan, and aggressive comments. Transcript:\\n\\n{i}\\n\\n\"},\n", " {\"role\": \"user\", \"content\": \"Give response only in the json format for example: \\{\\\"1\\\": \\\"What should we do now. I don't have a clue?\\\", \\\"2\\\": \\\"what the fuck is this\\\", \\\"3\\\":\\\"Beat the fuck out of them\\\"\\}. There can be multiple instances, find out all of them. If you do not find anything just return {\\\"None\\\":\\\"None\\\"}\"}\n", " ],\n", " seed = 42,\n", " temperature = 0\n", " )\n", "\n", "\n", " gpt_output = completion.choices[0].message.content\n", " gpt_output = dict(json.loads(gpt_output))\n", " for j in gpt_output.values():\n", " output[count] = j\n", " count+=1\n", "\n", " sent_with_time = []\n", "\n", " for sentence_to_search in output.values():\n", " pattern = re.compile(re.escape(sentence_to_search), re.IGNORECASE)\n", "\n", " matching_entries = [entry for entry in transcript['segments'] if re.search(pattern, entry['text'])]\n", "\n", " if matching_entries:\n", " for entry in matching_entries:\n", " hours_s, remainder = divmod(entry['start'], 3600)\n", " minutes_s, seconds_s = divmod(remainder, 60)\n", " hours_s = str(int(hours_s)).zfill(2)\n", " minutes_s = str(int(minutes_s)).zfill(2)\n", " seconds_s = str(int(seconds_s)).zfill(2)\n", "\n", " \n", " hours_e, remainder = divmod(entry['end'], 3600)\n", " minutes_e, seconds_e = divmod(remainder, 60)\n", " hours_e = str(int(hours_e)).zfill(2)\n", " minutes_e = str(int(minutes_e)).zfill(2)\n", " seconds_e = str(int(seconds_e)).zfill(2)\n", "\n", " sent_with_time.append(sentence_to_search + ' Start Time: ' + str(hours_s) + \":\" + str(minutes_s) + \":\" + str(seconds_s) + ' End Time: ' + str(hours_e) + \":\" + str(minutes_e) + \":\" + str(seconds_e))\n", "\n", " return sent_with_time\n", "\n", "def process_video(video_path, weights):\n", " try:\n", " # This code cell detects batons in the video\n", " current_frame = 0\n", " model = YOLO(weights)\n", " cap = cv2.VideoCapture(video_path)\n", " fps = int(cap.get(cv2.CAP_PROP_FPS))\n", " conseq_frames = 0\n", " start_time = \"\"\n", " end_time = \"\"\n", " res = []\n", "\n", " while True:\n", " ret, frame = cap.read()\n", " if not ret:\n", " break\n", "\n", " # Detecting baton on one frame per second\n", " if current_frame % fps == 0:\n", " currect_sec = current_frame/fps\n", "\n", " # Model prediction on current frame\n", " results = model(frame, verbose = False)\n", " count = 0\n", " classes = results[0].boxes.data\n", "\n", " # Formatting the time for printing\n", " hours, remainder = divmod(currect_sec, 3600)\n", " minutes, seconds = divmod(remainder, 60)\n", " hours = str(int(hours)).zfill(2)\n", " minutes = str(int(minutes)).zfill(2)\n", " seconds = str(int(seconds)).zfill(2)\n", "\n", " for i in classes:\n", "\n", " # Checking if baton is detected (i.e. if the class corresponding to baton is 1 or not)\n", " if float(i[5]) == 1:\n", " count+=1\n", "\n", " # Marking the start_time if this is the first consecutive frame a baton is detected in\n", " if count >= 1:\n", " conseq_frames+=1\n", " if conseq_frames == 1:\n", " start_time = hours + \":\" + minutes + \":\" + seconds\n", "\n", " # Marking the end time if after one or multiple consecutive frames of detection, a baton is not detected\n", " else:\n", " if conseq_frames > 0:\n", " conseq_frames = 0\n", " end_time = hours + \":\" + minutes + \":\" + seconds\n", "\n", " # Printing time intervals in which baton was detected\n", " res.append(start_time + \" to \" + end_time)\n", " start_time = \"\"\n", " end_time = \"\"\n", "\n", " current_frame += 1\n", " cap.release()\n", "\n", " return \"\\n\".join(res)\n", "\n", " except Exception as e:\n", "\n", " return e\n", "\n", "def all_funcs(openai_key,video_path, yolo_weights, pr = gr.Progress(track_tqdm = True)):\n", "\n", " video_path = video_path[0].split('/')[-1]\n", " yolo_weights = yolo_weights[0].split('/')[-1]\n", " transcript = video_transcription(video_path)\n", " sentences = action_detection(transcript, openai_key)\n", " batons = process_video(video_path, yolo_weights)\n", "\n", " print(\"ALL FUNC Executed without errors\")\n", "\n", " return sentences, batons" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "id": "eHZxd3sIZXar" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ " 96%|█████████▌| 916553/952553 [23:32<00:55, 648.89frames/s] \n" ] } ], "source": [ "json_dump = video_transcription(os.path.join('/projectnb/cs505ws/students/ksashank/', 'protest-006.mp4'))" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "out = action_detection(json_dump, \"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\")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['None Start Time: 00:17:27 End Time: 00:17:31',\n", " 'None Start Time: 00:17:32 End Time: 00:17:33',\n", " 'None Start Time: 00:21:16 End Time: 00:21:18',\n", " \"Let's hit him with the fucking hose. Start Time: 00:35:32 End Time: 00:35:33\",\n", " \"Y'all ain't got nobody to fuck with. Start Time: 00:40:38 End Time: 00:40:40\",\n", " \"I'm not fucking with you. Start Time: 00:52:59 End Time: 00:53:00\",\n", " \"I'm going to fuck this. Start Time: 01:16:28 End Time: 01:16:30\",\n", " \"I'm going to fuck this. Start Time: 01:16:30 End Time: 01:16:32\",\n", " \"I'm going to fuck this. Start Time: 01:17:00 End Time: 01:17:02\",\n", " 'Fucking crazy, huh? Start Time: 01:17:08 End Time: 01:17:10',\n", " \"What the fuck's in it? Start Time: 01:19:58 End Time: 01:20:00\",\n", " 'What the fuck are you doing? Start Time: 00:54:09 End Time: 00:54:12',\n", " 'What the fuck did you do? Start Time: 00:52:53 End Time: 00:52:55',\n", " \"What's your fucking name? Start Time: 01:23:34 End Time: 01:23:36\",\n", " \"I'm going to fuck this. Start Time: 01:16:28 End Time: 01:16:30\",\n", " \"I'm going to fuck this. Start Time: 01:16:30 End Time: 01:16:32\",\n", " \"I'm going to fuck this. Start Time: 01:17:00 End Time: 01:17:02\",\n", " 'This is fucking nuts. Start Time: 01:11:31 End Time: 01:11:33',\n", " 'This is fucking nuts. Start Time: 01:11:34 End Time: 01:11:36',\n", " \"Shit me, I'm going fuck home. Start Time: 01:14:56 End Time: 01:14:58\",\n", " 'What the fuck did I do? Start Time: 01:34:10 End Time: 01:34:13',\n", " 'None Start Time: 00:17:27 End Time: 00:17:31',\n", " 'None Start Time: 00:17:32 End Time: 00:17:33',\n", " 'None Start Time: 00:21:16 End Time: 00:21:18']" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "out" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "ename": "FileNotFoundError", "evalue": "[WinError 2] The system cannot find the file specified", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", "\u001b[1;32md:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\Gradio App\\Spark_Deployment_Final.ipynb Cell 13\u001b[0m line \u001b[0;36m1\n\u001b[1;32m----> 1\u001b[0m video_transcription(\u001b[39m\"\u001b[39;49m\u001b[39mtest.mp4\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n", "\u001b[1;32md:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\Gradio App\\Spark_Deployment_Final.ipynb Cell 13\u001b[0m line \u001b[0;36m3\n\u001b[0;32m 1\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mvideo_transcription\u001b[39m(video_path):\n\u001b[0;32m 2\u001b[0m model \u001b[39m=\u001b[39m whisper\u001b[39m.\u001b[39mload_model(\u001b[39m'\u001b[39m\u001b[39mtiny\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m----> 3\u001b[0m transcript \u001b[39m=\u001b[39m model\u001b[39m.\u001b[39;49mtranscribe(video_path, verbose \u001b[39m=\u001b[39;49m \u001b[39mFalse\u001b[39;49;00m, language \u001b[39m=\u001b[39;49m \u001b[39m'\u001b[39;49m\u001b[39men\u001b[39;49m\u001b[39m'\u001b[39;49m)\n\u001b[0;32m 5\u001b[0m \u001b[39mreturn\u001b[39;00m json\u001b[39m.\u001b[39mdumps(transcript)\n", "File \u001b[1;32md:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env2\\Lib\\site-packages\\whisper\\transcribe.py:122\u001b[0m, in \u001b[0;36mtranscribe\u001b[1;34m(model, audio, verbose, temperature, compression_ratio_threshold, logprob_threshold, no_speech_threshold, condition_on_previous_text, initial_prompt, word_timestamps, prepend_punctuations, append_punctuations, **decode_options)\u001b[0m\n\u001b[0;32m 119\u001b[0m decode_options[\u001b[39m\"\u001b[39m\u001b[39mfp16\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m\n\u001b[0;32m 121\u001b[0m \u001b[39m# Pad 30-seconds of silence to the input audio, for slicing\u001b[39;00m\n\u001b[1;32m--> 122\u001b[0m mel \u001b[39m=\u001b[39m log_mel_spectrogram(audio, model\u001b[39m.\u001b[39;49mdims\u001b[39m.\u001b[39;49mn_mels, padding\u001b[39m=\u001b[39;49mN_SAMPLES)\n\u001b[0;32m 123\u001b[0m content_frames \u001b[39m=\u001b[39m mel\u001b[39m.\u001b[39mshape[\u001b[39m-\u001b[39m\u001b[39m1\u001b[39m] \u001b[39m-\u001b[39m N_FRAMES\n\u001b[0;32m 125\u001b[0m \u001b[39mif\u001b[39;00m decode_options\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mlanguage\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m) \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n", "File \u001b[1;32md:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env2\\Lib\\site-packages\\whisper\\audio.py:140\u001b[0m, in \u001b[0;36mlog_mel_spectrogram\u001b[1;34m(audio, n_mels, padding, device)\u001b[0m\n\u001b[0;32m 138\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m torch\u001b[39m.\u001b[39mis_tensor(audio):\n\u001b[0;32m 139\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(audio, \u001b[39mstr\u001b[39m):\n\u001b[1;32m--> 140\u001b[0m audio \u001b[39m=\u001b[39m load_audio(audio)\n\u001b[0;32m 141\u001b[0m audio \u001b[39m=\u001b[39m torch\u001b[39m.\u001b[39mfrom_numpy(audio)\n\u001b[0;32m 143\u001b[0m \u001b[39mif\u001b[39;00m device \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n", "File \u001b[1;32md:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env2\\Lib\\site-packages\\whisper\\audio.py:58\u001b[0m, in \u001b[0;36mload_audio\u001b[1;34m(file, sr)\u001b[0m\n\u001b[0;32m 56\u001b[0m \u001b[39m# fmt: on\u001b[39;00m\n\u001b[0;32m 57\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m---> 58\u001b[0m out \u001b[39m=\u001b[39m run(cmd, capture_output\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m, check\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m)\u001b[39m.\u001b[39mstdout\n\u001b[0;32m 59\u001b[0m \u001b[39mexcept\u001b[39;00m CalledProcessError \u001b[39mas\u001b[39;00m e:\n\u001b[0;32m 60\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mFailed to load audio: \u001b[39m\u001b[39m{\u001b[39;00me\u001b[39m.\u001b[39mstderr\u001b[39m.\u001b[39mdecode()\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m) \u001b[39mfrom\u001b[39;00m \u001b[39me\u001b[39;00m\n", "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\subprocess.py:548\u001b[0m, in \u001b[0;36mrun\u001b[1;34m(input, capture_output, timeout, check, *popenargs, **kwargs)\u001b[0m\n\u001b[0;32m 545\u001b[0m kwargs[\u001b[39m'\u001b[39m\u001b[39mstdout\u001b[39m\u001b[39m'\u001b[39m] \u001b[39m=\u001b[39m PIPE\n\u001b[0;32m 546\u001b[0m kwargs[\u001b[39m'\u001b[39m\u001b[39mstderr\u001b[39m\u001b[39m'\u001b[39m] \u001b[39m=\u001b[39m PIPE\n\u001b[1;32m--> 548\u001b[0m \u001b[39mwith\u001b[39;00m Popen(\u001b[39m*\u001b[39;49mpopenargs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs) \u001b[39mas\u001b[39;00m process:\n\u001b[0;32m 549\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 550\u001b[0m stdout, stderr \u001b[39m=\u001b[39m process\u001b[39m.\u001b[39mcommunicate(\u001b[39minput\u001b[39m, timeout\u001b[39m=\u001b[39mtimeout)\n", "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\subprocess.py:1026\u001b[0m, in \u001b[0;36mPopen.__init__\u001b[1;34m(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, user, group, extra_groups, encoding, errors, text, umask, pipesize, process_group)\u001b[0m\n\u001b[0;32m 1022\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtext_mode:\n\u001b[0;32m 1023\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstderr \u001b[39m=\u001b[39m io\u001b[39m.\u001b[39mTextIOWrapper(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstderr,\n\u001b[0;32m 1024\u001b[0m encoding\u001b[39m=\u001b[39mencoding, errors\u001b[39m=\u001b[39merrors)\n\u001b[1;32m-> 1026\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_execute_child(args, executable, preexec_fn, close_fds,\n\u001b[0;32m 1027\u001b[0m pass_fds, cwd, env,\n\u001b[0;32m 1028\u001b[0m startupinfo, creationflags, shell,\n\u001b[0;32m 1029\u001b[0m p2cread, p2cwrite,\n\u001b[0;32m 1030\u001b[0m c2pread, c2pwrite,\n\u001b[0;32m 1031\u001b[0m errread, errwrite,\n\u001b[0;32m 1032\u001b[0m restore_signals,\n\u001b[0;32m 1033\u001b[0m gid, gids, uid, umask,\n\u001b[0;32m 1034\u001b[0m start_new_session, process_group)\n\u001b[0;32m 1035\u001b[0m \u001b[39mexcept\u001b[39;00m:\n\u001b[0;32m 1036\u001b[0m \u001b[39m# Cleanup if the child failed starting.\u001b[39;00m\n\u001b[0;32m 1037\u001b[0m \u001b[39mfor\u001b[39;00m f \u001b[39min\u001b[39;00m \u001b[39mfilter\u001b[39m(\u001b[39mNone\u001b[39;00m, (\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstdin, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstdout, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstderr)):\n", "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\subprocess.py:1538\u001b[0m, in \u001b[0;36mPopen._execute_child\u001b[1;34m(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, unused_restore_signals, unused_gid, unused_gids, unused_uid, unused_umask, unused_start_new_session, unused_process_group)\u001b[0m\n\u001b[0;32m 1536\u001b[0m \u001b[39m# Start the process\u001b[39;00m\n\u001b[0;32m 1537\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m-> 1538\u001b[0m hp, ht, pid, tid \u001b[39m=\u001b[39m _winapi\u001b[39m.\u001b[39mCreateProcess(executable, args,\n\u001b[0;32m 1539\u001b[0m \u001b[39m# no special security\u001b[39;00m\n\u001b[0;32m 1540\u001b[0m \u001b[39mNone\u001b[39;00m, \u001b[39mNone\u001b[39;00m,\n\u001b[0;32m 1541\u001b[0m \u001b[39mint\u001b[39m(\u001b[39mnot\u001b[39;00m close_fds),\n\u001b[0;32m 1542\u001b[0m creationflags,\n\u001b[0;32m 1543\u001b[0m env,\n\u001b[0;32m 1544\u001b[0m cwd,\n\u001b[0;32m 1545\u001b[0m startupinfo)\n\u001b[0;32m 1546\u001b[0m \u001b[39mfinally\u001b[39;00m:\n\u001b[0;32m 1547\u001b[0m \u001b[39m# Child is launched. Close the parent's copy of those pipe\u001b[39;00m\n\u001b[0;32m 1548\u001b[0m \u001b[39m# handles that only the child should have open. You need\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 1551\u001b[0m \u001b[39m# pipe will not close when the child process exits and the\u001b[39;00m\n\u001b[0;32m 1552\u001b[0m \u001b[39m# ReadFile will hang.\u001b[39;00m\n\u001b[0;32m 1553\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_close_pipe_fds(p2cread, p2cwrite,\n\u001b[0;32m 1554\u001b[0m c2pread, c2pwrite,\n\u001b[0;32m 1555\u001b[0m errread, errwrite)\n", "\u001b[1;31mFileNotFoundError\u001b[0m: [WinError 2] The system cannot find the file specified" ] } ], "source": [ "video_transcription(\"C:\\Users\\madip\\OneDrive\\Desktop\\test.mp4\")" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "os.path.exists(os.path.join(\"d:/M.S/Boston University/College/Studies/Sem 3 (Fall 23)/Subjects/CS549 - Spark! ML Practicum/Spark\", \"Test_Video.mp4\"))" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "model = whisper.load_model('tiny')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "ename": "FileNotFoundError", "evalue": "[WinError 2] The system cannot find the file specified", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", "\u001b[1;32md:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\Gradio App\\Spark_Deployment_Final.ipynb Cell 14\u001b[0m line \u001b[0;36m1\n\u001b[1;32m----> 1\u001b[0m transcript \u001b[39m=\u001b[39m model\u001b[39m.\u001b[39;49mtranscribe(\u001b[39m\"\u001b[39;49m\u001b[39mD:\u001b[39;49m\u001b[39m\\\u001b[39;49m\u001b[39mM.S\u001b[39;49m\u001b[39m\\\u001b[39;49m\u001b[39mBoston University\u001b[39;49m\u001b[39m\\\u001b[39;49m\u001b[39mCollege\u001b[39;49m\u001b[39m\\\u001b[39;49m\u001b[39mStudies\u001b[39;49m\u001b[39m\\\u001b[39;49m\u001b[39mSem 3 (Fall 23)\u001b[39;49m\u001b[39m\\\u001b[39;49m\u001b[39mSubjects\u001b[39;49m\u001b[39m\\\u001b[39;49m\u001b[39mCS549 - Spark! ML Practicum\u001b[39;49m\u001b[39m\\\u001b[39;49m\u001b[39mGradio App\u001b[39;49m\u001b[39m\\\u001b[39;49m\u001b[39mTest_Video.mp4\u001b[39;49m\u001b[39m\"\u001b[39;49m, verbose \u001b[39m=\u001b[39;49m \u001b[39mFalse\u001b[39;49;00m, language \u001b[39m=\u001b[39;49m \u001b[39m'\u001b[39;49m\u001b[39men\u001b[39;49m\u001b[39m'\u001b[39;49m)\n", "File \u001b[1;32md:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env\\Lib\\site-packages\\whisper\\transcribe.py:122\u001b[0m, in \u001b[0;36mtranscribe\u001b[1;34m(model, audio, verbose, temperature, compression_ratio_threshold, logprob_threshold, no_speech_threshold, condition_on_previous_text, initial_prompt, word_timestamps, prepend_punctuations, append_punctuations, **decode_options)\u001b[0m\n\u001b[0;32m 119\u001b[0m decode_options[\u001b[39m\"\u001b[39m\u001b[39mfp16\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m\n\u001b[0;32m 121\u001b[0m \u001b[39m# Pad 30-seconds of silence to the input audio, for slicing\u001b[39;00m\n\u001b[1;32m--> 122\u001b[0m mel \u001b[39m=\u001b[39m log_mel_spectrogram(audio, model\u001b[39m.\u001b[39;49mdims\u001b[39m.\u001b[39;49mn_mels, padding\u001b[39m=\u001b[39;49mN_SAMPLES)\n\u001b[0;32m 123\u001b[0m content_frames \u001b[39m=\u001b[39m mel\u001b[39m.\u001b[39mshape[\u001b[39m-\u001b[39m\u001b[39m1\u001b[39m] \u001b[39m-\u001b[39m N_FRAMES\n\u001b[0;32m 125\u001b[0m \u001b[39mif\u001b[39;00m decode_options\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mlanguage\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m) \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n", "File \u001b[1;32md:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env\\Lib\\site-packages\\whisper\\audio.py:140\u001b[0m, in \u001b[0;36mlog_mel_spectrogram\u001b[1;34m(audio, n_mels, padding, device)\u001b[0m\n\u001b[0;32m 138\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m torch\u001b[39m.\u001b[39mis_tensor(audio):\n\u001b[0;32m 139\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(audio, \u001b[39mstr\u001b[39m):\n\u001b[1;32m--> 140\u001b[0m audio \u001b[39m=\u001b[39m load_audio(audio)\n\u001b[0;32m 141\u001b[0m audio \u001b[39m=\u001b[39m torch\u001b[39m.\u001b[39mfrom_numpy(audio)\n\u001b[0;32m 143\u001b[0m \u001b[39mif\u001b[39;00m device \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n", "File \u001b[1;32md:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env\\Lib\\site-packages\\whisper\\audio.py:58\u001b[0m, in \u001b[0;36mload_audio\u001b[1;34m(file, sr)\u001b[0m\n\u001b[0;32m 56\u001b[0m \u001b[39m# fmt: on\u001b[39;00m\n\u001b[0;32m 57\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m---> 58\u001b[0m out \u001b[39m=\u001b[39m run(cmd, capture_output\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m, check\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m)\u001b[39m.\u001b[39mstdout\n\u001b[0;32m 59\u001b[0m \u001b[39mexcept\u001b[39;00m CalledProcessError \u001b[39mas\u001b[39;00m e:\n\u001b[0;32m 60\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mFailed to load audio: \u001b[39m\u001b[39m{\u001b[39;00me\u001b[39m.\u001b[39mstderr\u001b[39m.\u001b[39mdecode()\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m) \u001b[39mfrom\u001b[39;00m \u001b[39me\u001b[39;00m\n", "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\subprocess.py:548\u001b[0m, in \u001b[0;36mrun\u001b[1;34m(input, capture_output, timeout, check, *popenargs, **kwargs)\u001b[0m\n\u001b[0;32m 545\u001b[0m kwargs[\u001b[39m'\u001b[39m\u001b[39mstdout\u001b[39m\u001b[39m'\u001b[39m] \u001b[39m=\u001b[39m PIPE\n\u001b[0;32m 546\u001b[0m kwargs[\u001b[39m'\u001b[39m\u001b[39mstderr\u001b[39m\u001b[39m'\u001b[39m] \u001b[39m=\u001b[39m PIPE\n\u001b[1;32m--> 548\u001b[0m \u001b[39mwith\u001b[39;00m Popen(\u001b[39m*\u001b[39;49mpopenargs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs) \u001b[39mas\u001b[39;00m process:\n\u001b[0;32m 549\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 550\u001b[0m stdout, stderr \u001b[39m=\u001b[39m process\u001b[39m.\u001b[39mcommunicate(\u001b[39minput\u001b[39m, timeout\u001b[39m=\u001b[39mtimeout)\n", "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\subprocess.py:1026\u001b[0m, in \u001b[0;36mPopen.__init__\u001b[1;34m(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, user, group, extra_groups, encoding, errors, text, umask, pipesize, process_group)\u001b[0m\n\u001b[0;32m 1022\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtext_mode:\n\u001b[0;32m 1023\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstderr \u001b[39m=\u001b[39m io\u001b[39m.\u001b[39mTextIOWrapper(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstderr,\n\u001b[0;32m 1024\u001b[0m encoding\u001b[39m=\u001b[39mencoding, errors\u001b[39m=\u001b[39merrors)\n\u001b[1;32m-> 1026\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_execute_child(args, executable, preexec_fn, close_fds,\n\u001b[0;32m 1027\u001b[0m pass_fds, cwd, env,\n\u001b[0;32m 1028\u001b[0m startupinfo, creationflags, shell,\n\u001b[0;32m 1029\u001b[0m p2cread, p2cwrite,\n\u001b[0;32m 1030\u001b[0m c2pread, c2pwrite,\n\u001b[0;32m 1031\u001b[0m errread, errwrite,\n\u001b[0;32m 1032\u001b[0m restore_signals,\n\u001b[0;32m 1033\u001b[0m gid, gids, uid, umask,\n\u001b[0;32m 1034\u001b[0m start_new_session, process_group)\n\u001b[0;32m 1035\u001b[0m \u001b[39mexcept\u001b[39;00m:\n\u001b[0;32m 1036\u001b[0m \u001b[39m# Cleanup if the child failed starting.\u001b[39;00m\n\u001b[0;32m 1037\u001b[0m \u001b[39mfor\u001b[39;00m f \u001b[39min\u001b[39;00m \u001b[39mfilter\u001b[39m(\u001b[39mNone\u001b[39;00m, (\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstdin, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstdout, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstderr)):\n", "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\subprocess.py:1538\u001b[0m, in \u001b[0;36mPopen._execute_child\u001b[1;34m(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, unused_restore_signals, unused_gid, unused_gids, unused_uid, unused_umask, unused_start_new_session, unused_process_group)\u001b[0m\n\u001b[0;32m 1536\u001b[0m \u001b[39m# Start the process\u001b[39;00m\n\u001b[0;32m 1537\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m-> 1538\u001b[0m hp, ht, pid, tid \u001b[39m=\u001b[39m _winapi\u001b[39m.\u001b[39;49mCreateProcess(executable, args,\n\u001b[0;32m 1539\u001b[0m \u001b[39m# no special security\u001b[39;49;00m\n\u001b[0;32m 1540\u001b[0m \u001b[39mNone\u001b[39;49;00m, \u001b[39mNone\u001b[39;49;00m,\n\u001b[0;32m 1541\u001b[0m \u001b[39mint\u001b[39;49m(\u001b[39mnot\u001b[39;49;00m close_fds),\n\u001b[0;32m 1542\u001b[0m creationflags,\n\u001b[0;32m 1543\u001b[0m env,\n\u001b[0;32m 1544\u001b[0m cwd,\n\u001b[0;32m 1545\u001b[0m startupinfo)\n\u001b[0;32m 1546\u001b[0m \u001b[39mfinally\u001b[39;00m:\n\u001b[0;32m 1547\u001b[0m \u001b[39m# Child is launched. Close the parent's copy of those pipe\u001b[39;00m\n\u001b[0;32m 1548\u001b[0m \u001b[39m# handles that only the child should have open. You need\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 1551\u001b[0m \u001b[39m# pipe will not close when the child process exits and the\u001b[39;00m\n\u001b[0;32m 1552\u001b[0m \u001b[39m# ReadFile will hang.\u001b[39;00m\n\u001b[0;32m 1553\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_close_pipe_fds(p2cread, p2cwrite,\n\u001b[0;32m 1554\u001b[0m c2pread, c2pwrite,\n\u001b[0;32m 1555\u001b[0m errread, errwrite)\n", "\u001b[1;31mFileNotFoundError\u001b[0m: [WinError 2] The system cannot find the file specified" ] } ], "source": [ "transcript = model.transcribe(\"D:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\Gradio App\\Test_Video.mp4\", verbose = False, language = 'en')" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "os.path.exists(\"D:/M.S/Boston University/College/Studies/Sem 3 (Fall 23)/Subjects/CS549 - Spark! ML Practicum/Spark/Test_Video.mp4\")" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "os.path.exists(\"D:/M.S/Boston University/College/Studies/Sem 3 (Fall 23)/Subjects/CS549 - Spark! ML Practicum/Spark/OWL-Detections/YOLOv8 Best Weights.pt\")" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "os.path.exists(\"Test_Video.mp4\")" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "ename": "FileNotFoundError", "evalue": "[WinError 2] The system cannot find the file specified", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", "\u001b[1;32md:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\Gradio App\\Spark_Deployment_Final.ipynb Cell 19\u001b[0m line \u001b[0;36m1\n\u001b[1;32m----> 1\u001b[0m all_funcs(\u001b[39m\"\u001b[39;49m\u001b[39msk-MZd4k8qiN2Qh5MQ3Q8vRT3BlbkFJpeXherRIQCJBYle63q72\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mtest.mp4\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mD:/M.S/Boston University/College/Studies/Sem 3 (Fall 23)/Subjects/CS549 - Spark! ML Practicum/Spark/OWL-Detections/YOLOv8 Best Weights.pt\u001b[39;49m\u001b[39m\"\u001b[39;49m, pr \u001b[39m=\u001b[39;49m gr\u001b[39m.\u001b[39;49mProgress(track_tqdm \u001b[39m=\u001b[39;49m \u001b[39mTrue\u001b[39;49;00m))\n", "\u001b[1;32md:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\Gradio App\\Spark_Deployment_Final.ipynb Cell 19\u001b[0m line \u001b[0;36m1\n\u001b[0;32m 143\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mall_funcs\u001b[39m(openai_key,video_path, yolo_weights, pr \u001b[39m=\u001b[39m gr\u001b[39m.\u001b[39mProgress(track_tqdm \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m)):\n\u001b[0;32m 144\u001b[0m \n\u001b[0;32m 145\u001b[0m \u001b[39m#video_path = video_path[0].split('/')[-1]\u001b[39;00m\n\u001b[0;32m 146\u001b[0m \u001b[39m#yolo_weights = yolo_weights[0].split('/')[-1]\u001b[39;00m\n\u001b[1;32m--> 147\u001b[0m transcript \u001b[39m=\u001b[39m video_transcription(video_path)\n\u001b[0;32m 148\u001b[0m sentences \u001b[39m=\u001b[39m action_detection(transcript, openai_key)\n\u001b[0;32m 149\u001b[0m batons \u001b[39m=\u001b[39m process_video(video_path, yolo_weights)\n", "\u001b[1;32md:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\Gradio App\\Spark_Deployment_Final.ipynb Cell 19\u001b[0m line \u001b[0;36m3\n\u001b[0;32m 1\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mvideo_transcription\u001b[39m(video_path):\n\u001b[0;32m 2\u001b[0m model \u001b[39m=\u001b[39m whisper\u001b[39m.\u001b[39mload_model(\u001b[39m'\u001b[39m\u001b[39mtiny\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m----> 3\u001b[0m transcript \u001b[39m=\u001b[39m model\u001b[39m.\u001b[39;49mtranscribe(video_path, verbose \u001b[39m=\u001b[39;49m \u001b[39mFalse\u001b[39;49;00m, language \u001b[39m=\u001b[39;49m \u001b[39m'\u001b[39;49m\u001b[39men\u001b[39;49m\u001b[39m'\u001b[39;49m)\n\u001b[0;32m 5\u001b[0m \u001b[39mreturn\u001b[39;00m json\u001b[39m.\u001b[39mdumps(transcript)\n", "File \u001b[1;32md:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env2\\Lib\\site-packages\\whisper\\transcribe.py:122\u001b[0m, in \u001b[0;36mtranscribe\u001b[1;34m(model, audio, verbose, temperature, compression_ratio_threshold, logprob_threshold, no_speech_threshold, condition_on_previous_text, initial_prompt, word_timestamps, prepend_punctuations, append_punctuations, **decode_options)\u001b[0m\n\u001b[0;32m 119\u001b[0m decode_options[\u001b[39m\"\u001b[39m\u001b[39mfp16\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m\n\u001b[0;32m 121\u001b[0m \u001b[39m# Pad 30-seconds of silence to the input audio, for slicing\u001b[39;00m\n\u001b[1;32m--> 122\u001b[0m mel \u001b[39m=\u001b[39m log_mel_spectrogram(audio, model\u001b[39m.\u001b[39;49mdims\u001b[39m.\u001b[39;49mn_mels, padding\u001b[39m=\u001b[39;49mN_SAMPLES)\n\u001b[0;32m 123\u001b[0m content_frames \u001b[39m=\u001b[39m mel\u001b[39m.\u001b[39mshape[\u001b[39m-\u001b[39m\u001b[39m1\u001b[39m] \u001b[39m-\u001b[39m N_FRAMES\n\u001b[0;32m 125\u001b[0m \u001b[39mif\u001b[39;00m decode_options\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mlanguage\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m) \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n", "File \u001b[1;32md:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env2\\Lib\\site-packages\\whisper\\audio.py:140\u001b[0m, in \u001b[0;36mlog_mel_spectrogram\u001b[1;34m(audio, n_mels, padding, device)\u001b[0m\n\u001b[0;32m 138\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m torch\u001b[39m.\u001b[39mis_tensor(audio):\n\u001b[0;32m 139\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(audio, \u001b[39mstr\u001b[39m):\n\u001b[1;32m--> 140\u001b[0m audio \u001b[39m=\u001b[39m load_audio(audio)\n\u001b[0;32m 141\u001b[0m audio \u001b[39m=\u001b[39m torch\u001b[39m.\u001b[39mfrom_numpy(audio)\n\u001b[0;32m 143\u001b[0m \u001b[39mif\u001b[39;00m device \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n", "File \u001b[1;32md:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env2\\Lib\\site-packages\\whisper\\audio.py:58\u001b[0m, in \u001b[0;36mload_audio\u001b[1;34m(file, sr)\u001b[0m\n\u001b[0;32m 56\u001b[0m \u001b[39m# fmt: on\u001b[39;00m\n\u001b[0;32m 57\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m---> 58\u001b[0m out \u001b[39m=\u001b[39m run(cmd, capture_output\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m, check\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m)\u001b[39m.\u001b[39mstdout\n\u001b[0;32m 59\u001b[0m \u001b[39mexcept\u001b[39;00m CalledProcessError \u001b[39mas\u001b[39;00m e:\n\u001b[0;32m 60\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mFailed to load audio: \u001b[39m\u001b[39m{\u001b[39;00me\u001b[39m.\u001b[39mstderr\u001b[39m.\u001b[39mdecode()\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m) \u001b[39mfrom\u001b[39;00m \u001b[39me\u001b[39;00m\n", "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\subprocess.py:548\u001b[0m, in \u001b[0;36mrun\u001b[1;34m(input, capture_output, timeout, check, *popenargs, **kwargs)\u001b[0m\n\u001b[0;32m 545\u001b[0m kwargs[\u001b[39m'\u001b[39m\u001b[39mstdout\u001b[39m\u001b[39m'\u001b[39m] \u001b[39m=\u001b[39m PIPE\n\u001b[0;32m 546\u001b[0m kwargs[\u001b[39m'\u001b[39m\u001b[39mstderr\u001b[39m\u001b[39m'\u001b[39m] \u001b[39m=\u001b[39m PIPE\n\u001b[1;32m--> 548\u001b[0m \u001b[39mwith\u001b[39;00m Popen(\u001b[39m*\u001b[39;49mpopenargs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs) \u001b[39mas\u001b[39;00m process:\n\u001b[0;32m 549\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 550\u001b[0m stdout, stderr \u001b[39m=\u001b[39m process\u001b[39m.\u001b[39mcommunicate(\u001b[39minput\u001b[39m, timeout\u001b[39m=\u001b[39mtimeout)\n", "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\subprocess.py:1026\u001b[0m, in \u001b[0;36mPopen.__init__\u001b[1;34m(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, user, group, extra_groups, encoding, errors, text, umask, pipesize, process_group)\u001b[0m\n\u001b[0;32m 1022\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtext_mode:\n\u001b[0;32m 1023\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstderr \u001b[39m=\u001b[39m io\u001b[39m.\u001b[39mTextIOWrapper(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstderr,\n\u001b[0;32m 1024\u001b[0m encoding\u001b[39m=\u001b[39mencoding, errors\u001b[39m=\u001b[39merrors)\n\u001b[1;32m-> 1026\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_execute_child(args, executable, preexec_fn, close_fds,\n\u001b[0;32m 1027\u001b[0m pass_fds, cwd, env,\n\u001b[0;32m 1028\u001b[0m startupinfo, creationflags, shell,\n\u001b[0;32m 1029\u001b[0m p2cread, p2cwrite,\n\u001b[0;32m 1030\u001b[0m c2pread, c2pwrite,\n\u001b[0;32m 1031\u001b[0m errread, errwrite,\n\u001b[0;32m 1032\u001b[0m restore_signals,\n\u001b[0;32m 1033\u001b[0m gid, gids, uid, umask,\n\u001b[0;32m 1034\u001b[0m start_new_session, process_group)\n\u001b[0;32m 1035\u001b[0m \u001b[39mexcept\u001b[39;00m:\n\u001b[0;32m 1036\u001b[0m \u001b[39m# Cleanup if the child failed starting.\u001b[39;00m\n\u001b[0;32m 1037\u001b[0m \u001b[39mfor\u001b[39;00m f \u001b[39min\u001b[39;00m \u001b[39mfilter\u001b[39m(\u001b[39mNone\u001b[39;00m, (\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstdin, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstdout, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstderr)):\n", "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\subprocess.py:1538\u001b[0m, in \u001b[0;36mPopen._execute_child\u001b[1;34m(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, unused_restore_signals, unused_gid, unused_gids, unused_uid, unused_umask, unused_start_new_session, unused_process_group)\u001b[0m\n\u001b[0;32m 1536\u001b[0m \u001b[39m# Start the process\u001b[39;00m\n\u001b[0;32m 1537\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m-> 1538\u001b[0m hp, ht, pid, tid \u001b[39m=\u001b[39m _winapi\u001b[39m.\u001b[39mCreateProcess(executable, args,\n\u001b[0;32m 1539\u001b[0m \u001b[39m# no special security\u001b[39;00m\n\u001b[0;32m 1540\u001b[0m \u001b[39mNone\u001b[39;00m, \u001b[39mNone\u001b[39;00m,\n\u001b[0;32m 1541\u001b[0m \u001b[39mint\u001b[39m(\u001b[39mnot\u001b[39;00m close_fds),\n\u001b[0;32m 1542\u001b[0m creationflags,\n\u001b[0;32m 1543\u001b[0m env,\n\u001b[0;32m 1544\u001b[0m cwd,\n\u001b[0;32m 1545\u001b[0m startupinfo)\n\u001b[0;32m 1546\u001b[0m \u001b[39mfinally\u001b[39;00m:\n\u001b[0;32m 1547\u001b[0m \u001b[39m# Child is launched. Close the parent's copy of those pipe\u001b[39;00m\n\u001b[0;32m 1548\u001b[0m \u001b[39m# handles that only the child should have open. You need\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 1551\u001b[0m \u001b[39m# pipe will not close when the child process exits and the\u001b[39;00m\n\u001b[0;32m 1552\u001b[0m \u001b[39m# ReadFile will hang.\u001b[39;00m\n\u001b[0;32m 1553\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_close_pipe_fds(p2cread, p2cwrite,\n\u001b[0;32m 1554\u001b[0m c2pread, c2pwrite,\n\u001b[0;32m 1555\u001b[0m errread, errwrite)\n", "\u001b[1;31mFileNotFoundError\u001b[0m: [WinError 2] The system cannot find the file specified" ] } ], "source": [ "all_funcs(\"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\", \"test.mp4\", \"D:/M.S/Boston University/College/Studies/Sem 3 (Fall 23)/Subjects/CS549 - Spark! ML Practicum/Spark/OWL-Detections/YOLOv8 Best Weights.pt\", pr = gr.Progress(track_tqdm = True))" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 626 }, "id": "ZbZKUNl3Mttf", "outputId": "6c0912aa-f2d3-49f7-db58-473905111653" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Running on local URL: http://127.0.0.1:7860\n", "\n", "To create a public link, set `share=True` in `launch()`.\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" }, { "name": "stderr", "output_type": "stream", "text": [ "Traceback (most recent call last):\n", " File \"d:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env2\\Lib\\site-packages\\gradio\\queueing.py\", line 456, in call_prediction\n", " output = await route_utils.call_process_api(\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env2\\Lib\\site-packages\\gradio\\route_utils.py\", line 232, in call_process_api\n", " output = await app.get_blocks().process_api(\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env2\\Lib\\site-packages\\gradio\\blocks.py\", line 1522, in process_api\n", " result = await self.call_function(\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env2\\Lib\\site-packages\\gradio\\blocks.py\", line 1144, in call_function\n", " prediction = await anyio.to_thread.run_sync(\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env2\\Lib\\site-packages\\anyio\\to_thread.py\", line 33, in run_sync\n", " return await get_asynclib().run_sync_in_worker_thread(\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env2\\Lib\\site-packages\\anyio\\_backends\\_asyncio.py\", line 877, in run_sync_in_worker_thread\n", " return await future\n", " ^^^^^^^^^^^^\n", " File \"d:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env2\\Lib\\site-packages\\anyio\\_backends\\_asyncio.py\", line 807, in run\n", " result = context.run(func, *args)\n", " ^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env2\\Lib\\site-packages\\gradio\\utils.py\", line 674, in wrapper\n", " response = f(*args, **kwargs)\n", " ^^^^^^^^^^^^^^^^^^\n", " File \"d:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env2\\Lib\\site-packages\\gradio\\utils.py\", line 674, in wrapper\n", " response = f(*args, **kwargs)\n", " ^^^^^^^^^^^^^^^^^^\n", " File \"C:\\Users\\madip\\AppData\\Local\\Temp\\ipykernel_26328\\3366298026.py\", line 147, in all_funcs\n", " transcript = video_transcription(video_path)\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"C:\\Users\\madip\\AppData\\Local\\Temp\\ipykernel_26328\\3366298026.py\", line 3, in video_transcription\n", " transcript = model.transcribe(video_path, verbose = False, language = 'en')\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env2\\Lib\\site-packages\\whisper\\transcribe.py\", line 122, in transcribe\n", " mel = log_mel_spectrogram(audio, model.dims.n_mels, padding=N_SAMPLES)\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env2\\Lib\\site-packages\\whisper\\audio.py\", line 141, in log_mel_spectrogram\n", " audio = torch.from_numpy(audio)\n", " ^^^^^^^^^^^^^^^^^^^^^^^\n", "TypeError: expected np.ndarray (got list)\n", "Traceback (most recent call last):\n", " File \"d:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env2\\Lib\\site-packages\\gradio\\queueing.py\", line 456, in call_prediction\n", " output = await route_utils.call_process_api(\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env2\\Lib\\site-packages\\gradio\\route_utils.py\", line 232, in call_process_api\n", " output = await app.get_blocks().process_api(\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env2\\Lib\\site-packages\\gradio\\blocks.py\", line 1522, in process_api\n", " result = await self.call_function(\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env2\\Lib\\site-packages\\gradio\\blocks.py\", line 1144, in call_function\n", " prediction = await anyio.to_thread.run_sync(\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env2\\Lib\\site-packages\\anyio\\to_thread.py\", line 33, in run_sync\n", " return await get_asynclib().run_sync_in_worker_thread(\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env2\\Lib\\site-packages\\anyio\\_backends\\_asyncio.py\", line 877, in run_sync_in_worker_thread\n", " return await future\n", " ^^^^^^^^^^^^\n", " File \"d:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env2\\Lib\\site-packages\\anyio\\_backends\\_asyncio.py\", line 807, in run\n", " result = context.run(func, *args)\n", " ^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env2\\Lib\\site-packages\\gradio\\utils.py\", line 674, in wrapper\n", " response = f(*args, **kwargs)\n", " ^^^^^^^^^^^^^^^^^^\n", " File \"d:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env2\\Lib\\site-packages\\gradio\\utils.py\", line 674, in wrapper\n", " response = f(*args, **kwargs)\n", " ^^^^^^^^^^^^^^^^^^\n", " File \"C:\\Users\\madip\\AppData\\Local\\Temp\\ipykernel_26328\\3366298026.py\", line 147, in all_funcs\n", " transcript = video_transcription(video_path)\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"C:\\Users\\madip\\AppData\\Local\\Temp\\ipykernel_26328\\3366298026.py\", line 3, in video_transcription\n", " transcript = model.transcribe(video_path, verbose = False, language = 'en')\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env2\\Lib\\site-packages\\whisper\\transcribe.py\", line 122, in transcribe\n", " mel = log_mel_spectrogram(audio, model.dims.n_mels, padding=N_SAMPLES)\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env2\\Lib\\site-packages\\whisper\\audio.py\", line 141, in log_mel_spectrogram\n", " audio = torch.from_numpy(audio)\n", " ^^^^^^^^^^^^^^^^^^^^^^^\n", "TypeError: expected np.ndarray (got list)\n", "\n", "The above exception was the direct cause of the following exception:\n", "\n", "Traceback (most recent call last):\n", " File \"d:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env2\\Lib\\site-packages\\gradio\\queueing.py\", line 501, in process_events\n", " response = await self.call_prediction(awake_events, batch)\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"d:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env2\\Lib\\site-packages\\gradio\\queueing.py\", line 465, in call_prediction\n", " raise Exception(str(error) if show_error else None) from error\n", "Exception: None\n" ] } ], "source": [ "btn = gr.Interface(\n", " fn = all_funcs,\n", " inputs = [\"text\", gr.Files(label = \"Select Video File\"), gr.Files(label = \"Select YOLOv8 Weights File\")],\n", " outputs=[gr.Textbox(label = \"Audio Analysis Time Stamps\", lines = 20), gr.Textbox(label = \"Baton Detection Timestamps\", lines = 20)]\n", ")\n", "\n", "btn.launch()\n" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": { "id": "NMF48OxVJHLp" }, "source": [ "# Baton Detection" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "8VSlkVeNJQo4" }, "outputs": [], "source": [ "def process_video(video_path, weights):\n", " try:\n", " # This code cell detects batons in the video\n", " current_frame = 0\n", " model = YOLO(weights)\n", " cap = cv2.VideoCapture(video_path)\n", " fps = int(cap.get(cv2.CAP_PROP_FPS))\n", " conseq_frames = 0\n", " start_time = \"\"\n", " end_time = \"\"\n", " res = []\n", "\n", " while True:\n", " ret, frame = cap.read()\n", " if not ret:\n", " break\n", "\n", " # Detecting baton on one frame per second\n", " if current_frame % fps == 0:\n", " currect_sec = current_frame/fps\n", "\n", " # Model prediction on current frame\n", " results = model(frame, verbose = False)\n", " count = 0\n", " classes = results[0].boxes.data\n", "\n", " # Formatting the time for printing\n", " hours, remainder = divmod(currect_sec, 3600)\n", " minutes, seconds = divmod(remainder, 60)\n", " hours = str(int(hours)).zfill(2)\n", " minutes = str(int(minutes)).zfill(2)\n", " seconds = str(int(seconds)).zfill(2)\n", "\n", " for i in classes:\n", "\n", " # Checking if baton is detected (i.e. if the class corresponding to baton is 1 or not)\n", " if float(i[5]) == 1:\n", " count+=1\n", "\n", " # Marking the start_time if this is the first consecutive frame a baton is detected in\n", " if count >= 1:\n", " conseq_frames+=1\n", " if conseq_frames == 1:\n", " start_time = hours + \":\" + minutes + \":\" + seconds\n", "\n", " # Marking the end time if after one or multiple consecutive frames of detection, a baton is not detected\n", " else:\n", " if conseq_frames > 0:\n", " conseq_frames = 0\n", " end_time = hours + \":\" + minutes + \":\" + seconds\n", "\n", " # Printing time intervals in which baton was detected\n", " res.append(start_time + \" to \" + end_time)\n", " start_time = \"\"\n", " end_time = \"\"\n", "\n", " current_frame += 1\n", " cap.release()\n", "\n", " return \"\\n\".join(res)\n", "\n", " except Exception as e:\n", "\n", " return e" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 626 }, "id": "uSBK_3VBJQmC", "outputId": "a18e4eeb-6b99-46c0-99b6-ae3a6e68ae99" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).\n", "\n", "Colab notebook detected. To show errors in colab notebook, set debug=True in launch()\n", "Running on public URL: https://06b7f8c10c60967e6b.gradio.live\n", "\n", "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "with gr.Blocks() as demo:\n", "\n", " video_path = gr.Textbox(label = \"Enter Path to Video\")\n", " #openai_keys = gr.Textbox(label = \"Enter your OpenAI Key\")\n", " weights = gr.Textbox(label = \"Enter Path to YOLOv8 Weights\")\n", " #sentences = gr.Textbox(label = \"Sentences Detected\")\n", " batons = gr.Textbox(label = \"Batons Detected\")\n", " btn = gr.Button(value = \"Process Video\")\n", " btn.click(process_video, inputs = [video_path, weights], outputs = batons)\n", "\n", "demo.launch()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "Iqdjv7QowrZW" }, "outputs": [], "source": [ "/content/drive/MyDrive/Spark Project/Test_Video.mp4" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "v9-4b-gfrbwa" }, "outputs": [], "source": [ "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" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "nPh9wSJvwvAt" }, "outputs": [], "source": [ "/content/drive/MyDrive/Spark Project/Data (For YOLOv8 Training)/Option 3 - Roboflow (60 Images)/YOLOv8 Best Weights.pt" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "CvuZJI3-LGOU" }, "outputs": [], "source": [ "process_video(\"/content/drive/MyDrive/Spark Project/Test_Video.mp4\", \"/content/drive/MyDrive/Spark Project/Data (For YOLOv8 Training)/Option 3 - Roboflow (60 Images)/YOLOv8 Best Weights.pt\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "t7ZZYQp_tbN4" }, "outputs": [], "source": [ "a = video_transcription(\"/content/drive/MyDrive/Spark Project/Test_Video.mp4\")\n", "a" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "mtv7izc3HQHP", "outputId": "d7fed4ac-3d97-4580-bd21-f698f84f9615" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting ultralytics\n", " Downloading ultralytics-8.0.214-py3-none-any.whl (645 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m645.5/645.5 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: matplotlib>=3.3.0 in /usr/local/lib/python3.10/dist-packages (from ultralytics) (3.7.1)\n", "Requirement already satisfied: numpy>=1.22.2 in /usr/local/lib/python3.10/dist-packages (from ultralytics) (1.23.5)\n", "Requirement already satisfied: opencv-python>=4.6.0 in /usr/local/lib/python3.10/dist-packages (from ultralytics) (4.8.0.76)\n", "Requirement already satisfied: pillow>=7.1.2 in /usr/local/lib/python3.10/dist-packages (from ultralytics) (9.4.0)\n", "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.10/dist-packages (from ultralytics) (6.0.1)\n", "Requirement already satisfied: requests>=2.23.0 in /usr/local/lib/python3.10/dist-packages (from ultralytics) (2.31.0)\n", "Requirement already satisfied: scipy>=1.4.1 in /usr/local/lib/python3.10/dist-packages (from ultralytics) (1.11.3)\n", "Requirement already satisfied: torch>=1.8.0 in /usr/local/lib/python3.10/dist-packages (from ultralytics) (2.1.0+cu118)\n", "Requirement already satisfied: torchvision>=0.9.0 in /usr/local/lib/python3.10/dist-packages (from ultralytics) (0.16.0+cu118)\n", "Requirement already satisfied: tqdm>=4.64.0 in /usr/local/lib/python3.10/dist-packages (from ultralytics) (4.66.1)\n", "Requirement already satisfied: pandas>=1.1.4 in /usr/local/lib/python3.10/dist-packages (from ultralytics) (1.5.3)\n", "Requirement already satisfied: seaborn>=0.11.0 in /usr/local/lib/python3.10/dist-packages (from ultralytics) (0.12.2)\n", "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from ultralytics) (5.9.5)\n", "Requirement already satisfied: py-cpuinfo in /usr/local/lib/python3.10/dist-packages (from ultralytics) (9.0.0)\n", "Collecting thop>=0.1.1 (from ultralytics)\n", " Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)\n", "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.3.0->ultralytics) (1.2.0)\n", "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.3.0->ultralytics) (0.12.1)\n", "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.3.0->ultralytics) (4.44.3)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.3.0->ultralytics) (1.4.5)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.3.0->ultralytics) (23.2)\n", "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.3.0->ultralytics) (3.1.1)\n", "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.3.0->ultralytics) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.1.4->ultralytics) (2023.3.post1)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.23.0->ultralytics) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.23.0->ultralytics) (3.4)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.23.0->ultralytics) (2.0.7)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.23.0->ultralytics) (2023.7.22)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.8.0->ultralytics) (3.13.1)\n", "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch>=1.8.0->ultralytics) (4.5.0)\n", "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.8.0->ultralytics) (1.12)\n", "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.8.0->ultralytics) (3.2.1)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.8.0->ultralytics) (3.1.2)\n", "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch>=1.8.0->ultralytics) (2023.6.0)\n", "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.8.0->ultralytics) (2.1.0)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib>=3.3.0->ultralytics) (1.16.0)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.8.0->ultralytics) (2.1.3)\n", "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.8.0->ultralytics) (1.3.0)\n", "Installing collected packages: thop, ultralytics\n", "Successfully installed thop-0.1.1.post2209072238 ultralytics-8.0.214\n" ] } ], "source": [ "!pip install ultralytics" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/aakashbhatnagar/Documents/masters/spark/myenv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Running on local URL: http://0.0.0.0:4000\n", "\n", "To create a public link, set `share=True` in `launch()`.\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" }, { "name": "stdout", "output_type": "stream", "text": [ "['/private/var/folders/73/1kdzs7qn01v55rn5825j1cjr0000gn/T/gradio/e53fc2e5df5c8554be70323db9b7b28b2ea8b756/test.zip']\n", "example 3 copy.mp4\n", "['/private/var/folders/73/1kdzs7qn01v55rn5825j1cjr0000gn/T/gradio/d7b854323dc4c999fd9de0b88f09374e3de732e8/YOLOv8 Best We.pt']\n", "[00:00.000 --> 00:04.000] He's got a kid, he's got a baby in the car, we've got to get him out, he's got a baby in the car.\n", "[00:04.000 --> 00:05.000] He's coughing.\n", "[00:05.000 --> 00:06.000] Fucking idiot.\n", "{'text': \" He's got a kid, he's got a baby in the car, we've got to get him out, he's got a baby in the car. He's coughing. Fucking idiot.\", 'segments': [{'id': 0, 'seek': 0, 'start': 0.0, 'end': 4.0, 'text': \" He's got a kid, he's got a baby in the car, we've got to get him out, he's got a baby in the car.\", 'tokens': [50364, 634, 311, 658, 257, 1636, 11, 415, 311, 658, 257, 3186, 294, 264, 1032, 11, 321, 600, 658, 281, 483, 796, 484, 11, 415, 311, 658, 257, 3186, 294, 264, 1032, 13, 50564], 'temperature': 0.0, 'avg_logprob': -0.2445144860640816, 'compression_ratio': 1.4269662921348314, 'no_speech_prob': 0.23536445200443268}, {'id': 1, 'seek': 0, 'start': 4.0, 'end': 5.0, 'text': \" He's coughing.\", 'tokens': [50564, 634, 311, 39375, 13, 50614], 'temperature': 0.0, 'avg_logprob': -0.2445144860640816, 'compression_ratio': 1.4269662921348314, 'no_speech_prob': 0.23536445200443268}, {'id': 2, 'seek': 0, 'start': 5.0, 'end': 6.0, 'text': ' Fucking idiot.', 'tokens': [50614, 33342, 14270, 13, 50664], 'temperature': 0.0, 'avg_logprob': -0.2445144860640816, 'compression_ratio': 1.4269662921348314, 'no_speech_prob': 0.23536445200443268}], 'language': 'en'}\n", "{\"text\": \" He's got a kid, he's got a baby in the car, we've got to get him out, he's got a baby in the car. He's coughing. Fucking idiot.\", \"segments\": [{\"id\": 0, \"seek\": 0, \"start\": 0.0, \"end\": 4.0, \"text\": \" He's got a kid, he's got a baby in the car, we've got to get him out, he's got a baby in the car.\", \"tokens\": [50364, 634, 311, 658, 257, 1636, 11, 415, 311, 658, 257, 3186, 294, 264, 1032, 11, 321, 600, 658, 281, 483, 796, 484, 11, 415, 311, 658, 257, 3186, 294, 264, 1032, 13, 50564], \"temperature\": 0.0, \"avg_logprob\": -0.2445144860640816, \"compression_ratio\": 1.4269662921348314, \"no_speech_prob\": 0.23536445200443268}, {\"id\": 1, \"seek\": 0, \"start\": 4.0, \"end\": 5.0, \"text\": \" He's coughing.\", \"tokens\": [50564, 634, 311, 39375, 13, 50614], \"temperature\": 0.0, \"avg_logprob\": -0.2445144860640816, \"compression_ratio\": 1.4269662921348314, \"no_speech_prob\": 0.23536445200443268}, {\"id\": 2, \"seek\": 0, \"start\": 5.0, \"end\": 6.0, \"text\": \" Fucking idiot.\", \"tokens\": [50614, 33342, 14270, 13, 50664], \"temperature\": 0.0, \"avg_logprob\": -0.2445144860640816, \"compression_ratio\": 1.4269662921348314, \"no_speech_prob\": 0.23536445200443268}], \"language\": \"en\"}\n", "He's got a kid, he's got a baby in the car, we've got to get him out, he's got a baby in the car. Start Time: 00:00:00 End Time: 00:00:04\n", "Fucking idiot. Start Time: 00:00:05 End Time: 00:00:06\n", "batons {'Video 1': '00:00:02 to 00:00:04'}\n", "__MACOSX/._example 3 copy.mp4\n", "['/private/var/folders/73/1kdzs7qn01v55rn5825j1cjr0000gn/T/gradio/d7b854323dc4c999fd9de0b88f09374e3de732e8/YOLOv8 Best We.pt']\n", "Failed to load audio: ffmpeg version 6.0 Copyright (c) 2000-2023 the FFmpeg developers\n", " built with Apple clang version 15.0.0 (clang-1500.0.40.1)\n", " configuration: --prefix=/opt/homebrew/Cellar/ffmpeg/6.0_1 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags='-Wl,-ld_classic' --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libspeex --enable-libsoxr --enable-libzmq --enable-libzimg --disable-libjack --disable-indev=jack --enable-videotoolbox --enable-audiotoolbox --enable-neon\n", " libavutil 58. 2.100 / 58. 2.100\n", " libavcodec 60. 3.100 / 60. 3.100\n", " libavformat 60. 3.100 / 60. 3.100\n", " libavdevice 60. 1.100 / 60. 1.100\n", " libavfilter 9. 3.100 / 9. 3.100\n", " libswscale 7. 1.100 / 7. 1.100\n", " libswresample 4. 10.100 / 4. 10.100\n", " libpostproc 57. 1.100 / 57. 1.100\n", "[h263 @ 0x15972c610] Format h263 detected only with low score of 25, misdetection possible!\n", "[h263 @ 0x15972d0b0] H.263 SAC not supported\n", "[h263 @ 0x15972d0b0] header damaged\n", "[h263 @ 0x15972c610] Could not find codec parameters for stream 0 (Video: h263, none): unspecified size\n", "Consider increasing the value for the 'analyzeduration' (0) and 'probesize' (5000000) options\n", "Input #0, h263, from '__MACOSX/._example 3 copy.mp4':\n", " Duration: N/A, bitrate: N/A\n", " Stream #0:0: Video: h263, none, 25 tbr, 1200k tbn\n", "Output #0, s16le, to 'pipe:':\n", "[out#0/s16le @ 0x159605190] Output file does not contain any stream\n", "\n", "example 3.mp4\n", "['/private/var/folders/73/1kdzs7qn01v55rn5825j1cjr0000gn/T/gradio/d7b854323dc4c999fd9de0b88f09374e3de732e8/YOLOv8 Best We.pt']\n", "[00:00.000 --> 00:04.000] He's got a kid, he's got a baby in the car, we've got to get him out, he's got a baby in the car.\n", "[00:04.000 --> 00:05.000] He's coughing.\n", "[00:05.000 --> 00:06.000] Fucking idiot.\n", "{'text': \" He's got a kid, he's got a baby in the car, we've got to get him out, he's got a baby in the car. He's coughing. Fucking idiot.\", 'segments': [{'id': 0, 'seek': 0, 'start': 0.0, 'end': 4.0, 'text': \" He's got a kid, he's got a baby in the car, we've got to get him out, he's got a baby in the car.\", 'tokens': [50364, 634, 311, 658, 257, 1636, 11, 415, 311, 658, 257, 3186, 294, 264, 1032, 11, 321, 600, 658, 281, 483, 796, 484, 11, 415, 311, 658, 257, 3186, 294, 264, 1032, 13, 50564], 'temperature': 0.0, 'avg_logprob': -0.2445144860640816, 'compression_ratio': 1.4269662921348314, 'no_speech_prob': 0.23536445200443268}, {'id': 1, 'seek': 0, 'start': 4.0, 'end': 5.0, 'text': \" He's coughing.\", 'tokens': [50564, 634, 311, 39375, 13, 50614], 'temperature': 0.0, 'avg_logprob': -0.2445144860640816, 'compression_ratio': 1.4269662921348314, 'no_speech_prob': 0.23536445200443268}, {'id': 2, 'seek': 0, 'start': 5.0, 'end': 6.0, 'text': ' Fucking idiot.', 'tokens': [50614, 33342, 14270, 13, 50664], 'temperature': 0.0, 'avg_logprob': -0.2445144860640816, 'compression_ratio': 1.4269662921348314, 'no_speech_prob': 0.23536445200443268}], 'language': 'en'}\n", "{\"text\": \" He's got a kid, he's got a baby in the car, we've got to get him out, he's got a baby in the car. He's coughing. Fucking idiot.\", \"segments\": [{\"id\": 0, \"seek\": 0, \"start\": 0.0, \"end\": 4.0, \"text\": \" He's got a kid, he's got a baby in the car, we've got to get him out, he's got a baby in the car.\", \"tokens\": [50364, 634, 311, 658, 257, 1636, 11, 415, 311, 658, 257, 3186, 294, 264, 1032, 11, 321, 600, 658, 281, 483, 796, 484, 11, 415, 311, 658, 257, 3186, 294, 264, 1032, 13, 50564], \"temperature\": 0.0, \"avg_logprob\": -0.2445144860640816, \"compression_ratio\": 1.4269662921348314, \"no_speech_prob\": 0.23536445200443268}, {\"id\": 1, \"seek\": 0, \"start\": 4.0, \"end\": 5.0, \"text\": \" He's coughing.\", \"tokens\": [50564, 634, 311, 39375, 13, 50614], \"temperature\": 0.0, \"avg_logprob\": -0.2445144860640816, \"compression_ratio\": 1.4269662921348314, \"no_speech_prob\": 0.23536445200443268}, {\"id\": 2, \"seek\": 0, \"start\": 5.0, \"end\": 6.0, \"text\": \" Fucking idiot.\", \"tokens\": [50614, 33342, 14270, 13, 50664], \"temperature\": 0.0, \"avg_logprob\": -0.2445144860640816, \"compression_ratio\": 1.4269662921348314, \"no_speech_prob\": 0.23536445200443268}], \"language\": \"en\"}\n", "He's coughing. Start Time: 00:00:04 End Time: 00:00:05\n", "Fucking idiot. Start Time: 00:00:05 End Time: 00:00:06\n", "batons {'Video 1': '00:00:02 to 00:00:04', 'Video 2': '00:00:02 to 00:00:04'}\n", "__MACOSX/._example 3.mp4\n", "['/private/var/folders/73/1kdzs7qn01v55rn5825j1cjr0000gn/T/gradio/d7b854323dc4c999fd9de0b88f09374e3de732e8/YOLOv8 Best We.pt']\n", "Failed to load audio: ffmpeg version 6.0 Copyright (c) 2000-2023 the FFmpeg developers\n", " built with Apple clang version 15.0.0 (clang-1500.0.40.1)\n", " configuration: --prefix=/opt/homebrew/Cellar/ffmpeg/6.0_1 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags='-Wl,-ld_classic' --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libspeex --enable-libsoxr --enable-libzmq --enable-libzimg --disable-libjack --disable-indev=jack --enable-videotoolbox --enable-audiotoolbox --enable-neon\n", " libavutil 58. 2.100 / 58. 2.100\n", " libavcodec 60. 3.100 / 60. 3.100\n", " libavformat 60. 3.100 / 60. 3.100\n", " libavdevice 60. 1.100 / 60. 1.100\n", " libavfilter 9. 3.100 / 9. 3.100\n", " libswscale 7. 1.100 / 7. 1.100\n", " libswresample 4. 10.100 / 4. 10.100\n", " libpostproc 57. 1.100 / 57. 1.100\n", "[h263 @ 0x136b074c0] Format h263 detected only with low score of 25, misdetection possible!\n", "[h263 @ 0x136b07750] H.263 SAC not supported\n", "[h263 @ 0x136b07750] header damaged\n", "[h263 @ 0x136b074c0] Could not find codec parameters for stream 0 (Video: h263, none): unspecified size\n", "Consider increasing the value for the 'analyzeduration' (0) and 'probesize' (5000000) options\n", "Input #0, h263, from '__MACOSX/._example 3.mp4':\n", " Duration: N/A, bitrate: N/A\n", " Stream #0:0: Video: h263, none, 25 tbr, 1200k tbn\n", "Output #0, s16le, to 'pipe:':\n", "[out#0/s16le @ 0x135e0ec50] Output file does not contain any stream\n", "\n", "ALL FUNC Executed without errors\n" ] } ], "source": [ "import whisper\n", "import cv2\n", "import os\n", "import urllib.request\n", "from PIL import Image\n", "from ultralytics import YOLO\n", "import torch\n", "import matplotlib.pyplot as plt\n", "from tqdm import tqdm\n", "from transformers import pipeline\n", "import moviepy.editor as mp\n", "import json\n", "import re\n", "import gradio as gr\n", "from openai import OpenAI\n", "\n", "from langchain.llms import OpenAI\n", "from langchain.prompts import PromptTemplate\n", "from langchain.chains import LLMChain\n", "\n", "from langchain.chat_models import ChatOpenAI\n", "from langchain.prompts import HumanMessagePromptTemplate\n", "from langchain.schema.messages import SystemMessage\n", "from langchain.prompts import ChatPromptTemplate\n", "\n", "def video_transcription(video_path):\n", " model = whisper.load_model('medium')\n", " transcript = model.transcribe(video_path, verbose = True, language = 'en')\n", " print(transcript)\n", "\n", " return json.dumps(transcript)\n", "\n", "def action_detection(json_object, openai_key):\n", " transcript = json.loads(json_object)\n", " transcript_string = ''\n", " for segments in transcript['segments']:\n", " transcript_string+=str(segments['text']+'\\n')\n", "\n", " chunks = []\n", " output = {}\n", " count = 0\n", " split_transcript = transcript_string.split(\"\\n\")\n", " num_lines = len(split_transcript)\n", " num_chars = 0\n", " i = 0\n", " prev = 0\n", "\n", " while i < num_lines:\n", " num_chars+=len(split_transcript[i])\n", " if num_chars>=16000:\n", " chunks.append(\"\\n\".join(split_transcript[prev:i]))\n", " prev = i\n", " num_chars = 0\n", " i+=1\n", " if i == num_lines:\n", " chunks.append(\"\\n\".join(split_transcript[prev:i]))\n", "\n", " # client = OpenAI(api_key = openai_key)\n", " llm = OpenAI(openai_api_key=openai_key, model=\"gpt-4\")\n", " chat_template = ChatPromptTemplate.from_messages(\n", " [\n", " SystemMessage(\n", " content=(\n", " \"You are an AI system specialized in detecting planning issues, critiquing plans, and analyzing conversations between police officers regarding how to disperse.\" \n", " \"Additionally, identify any instances suggesting 1st Amendment violations, criticizing the lack of a plan, and aggressive comments. Transcript:\\n\\n{transcript_}\\n\\n.\" \n", " \"Give response only in the json format for example: \\{\\\"1\\\": \\\"What should we do now. I don't have a clue?\\\", \\\"2\\\": \\\"what the fuck is this\\\", \\\"3\\\":\\\"Beat the fuck out of them\\\"\\}.\"\n", " \"There can be multiple instances, find out all of them. If you do not find anything just return {\\\"None\\\":\\\"None\\\"}\"\n", " )\n", " ),\n", " HumanMessagePromptTemplate.from_template(\"{transcript_}\"),\n", " ]\n", ")\n", "\n", "\n", " for i in chunks:\n", " prompt = PromptTemplate.from_template(\n", " \"You are an AI system specialized in detecting planning issues, critiquing plans, and analyzing conversations between police officers regarding how to disperse. Additionally, identify any instances suggesting 1st Amendment violations, criticizing the lack of a plan, and aggressive comments. Transcript:\\n\\n{i}\\n\\n. Give response only in the json format for example: \\{\\\"1\\\": \\\"What should we do now. I don't have a clue?\\\", \\\"2\\\": \\\"what the fuck is this\\\", \\\"3\\\":\\\"Beat the fuck out of them\\\"\\}. There can be multiple instances, find out all of them. If you do not find anything just return {\\\"None\\\":\\\"None\\\"}\"\n", " )\n", " \n", " llm = ChatOpenAI(openai_api_key=openai_key)\n", " p = chat_template.format_messages(transcript_=i)\n", " gpt_output = llm(p).content\n", " \n", " # print(gpt_output)\n", " # gpt_output = completion.choices[0].message.content\n", " # print(gpt_output)\n", " \n", " \n", " \n", " \n", " gpt_output = dict(json.loads(gpt_output))\n", " for j in gpt_output.values():\n", " output[count] = j\n", " count+=1\n", "\n", " sent_with_time = []\n", "\n", " for sentence_to_search in output.values():\n", " pattern = re.compile(re.escape(sentence_to_search), re.IGNORECASE)\n", "\n", " matching_entries = [entry for entry in transcript['segments'] if re.search(pattern, entry['text'])]\n", "\n", " if matching_entries:\n", " for entry in matching_entries:\n", " hours_s, remainder = divmod(entry['start'], 3600)\n", " minutes_s, seconds_s = divmod(remainder, 60)\n", " hours_s = str(int(hours_s)).zfill(2)\n", " minutes_s = str(int(minutes_s)).zfill(2)\n", " seconds_s = str(int(seconds_s)).zfill(2)\n", "\n", " \n", " hours_e, remainder = divmod(entry['end'], 3600)\n", " minutes_e, seconds_e = divmod(remainder, 60)\n", " hours_e = str(int(hours_e)).zfill(2)\n", " minutes_e = str(int(minutes_e)).zfill(2)\n", " seconds_e = str(int(seconds_e)).zfill(2)\n", "\n", " sent_with_time.append(sentence_to_search + ' Start Time: ' + str(hours_s) + \":\" + str(minutes_s) + \":\" + str(seconds_s) + ' End Time: ' + str(hours_e) + \":\" + str(minutes_e) + \":\" + str(seconds_e))\n", "\n", " return \"\\n\".join(sent_with_time)\n", "\n", "def process_video(video_path, weights):\n", " try:\n", " # This code cell detects batons in the video\n", " current_frame = 0\n", " model = YOLO(weights)\n", " cap = cv2.VideoCapture(video_path)\n", " fps = int(cap.get(cv2.CAP_PROP_FPS))\n", " conseq_frames = 0\n", " start_time = \"\"\n", " end_time = \"\"\n", " res = []\n", "\n", " while True:\n", " ret, frame = cap.read()\n", " if not ret:\n", " break\n", "\n", " # Detecting baton on one frame per second\n", " if current_frame % fps == 0:\n", " currect_sec = current_frame/fps\n", "\n", " # Model prediction on current frame\n", " results = model(frame, verbose = False)\n", " count = 0\n", " classes = results[0].boxes.data\n", "\n", " # Formatting the time for printing\n", " hours, remainder = divmod(currect_sec, 3600)\n", " minutes, seconds = divmod(remainder, 60)\n", " hours = str(int(hours)).zfill(2)\n", " minutes = str(int(minutes)).zfill(2)\n", " seconds = str(int(seconds)).zfill(2)\n", "\n", " for i in classes:\n", "\n", " # Checking if baton is detected (i.e. if the class corresponding to baton is 1 or not)\n", " if float(i[5]) == 1:\n", " count+=1\n", "\n", " # Marking the start_time if this is the first consecutive frame a baton is detected in\n", " if count >= 1:\n", " conseq_frames+=1\n", " if conseq_frames == 1:\n", " start_time = hours + \":\" + minutes + \":\" + seconds\n", "\n", " # Marking the end time if after one or multiple consecutive frames of detection, a baton is not detected\n", " else:\n", " if conseq_frames > 0:\n", " conseq_frames = 0\n", " end_time = hours + \":\" + minutes + \":\" + seconds\n", "\n", " # Printing time intervals in which baton was detected\n", " res.append(start_time + \" to \" + end_time)\n", " start_time = \"\"\n", " end_time = \"\"\n", "\n", " current_frame += 1\n", " cap.release()\n", "\n", " return \"\\n\".join(res)\n", "\n", " except Exception as e:\n", "\n", " return e\n", "\n", "# def all_funcs(openai_key,video_path, yolo_weights, pr = gr.Progress(track_tqdm = True)):\n", "\n", " \n", "# video_path = video_path[0].split('/')[-1]\n", "# yolo_weights = yolo_weights[0].split('/')[-1]\n", "# transcript = video_transcription(video_path)\n", "# sentences = action_detection(transcript, openai_key)\n", "# batons = process_video(video_path, yolo_weights)\n", "\n", "# print(\"ALL FUNC Executed without errors\")\n", "\n", "# return sentences, batons\n", "\n", "import zipfile\n", "import smtplib\n", "import ssl\n", "from email.message import EmailMessage\n", "\n", "def all_funcs(openai_key, zip_path, yolo_weights, email, pr = gr.Progress(track_tqdm = True)):\n", "\n", " sentences = {}\n", " batons = {}\n", " count = 1\n", " \n", " print(zip_path)\n", " with zipfile.ZipFile(zip_path[0].split(\"/\")[-1], \"r\") as zip_ref:\n", " for filename in zip_ref.namelist():\n", " # Inn 2 lines mein error aa sakta hai\n", " \n", " zip_ref.extract(filename)\n", " # video_path = filename[0].split('/')[-1]\n", " video_path = filename\n", " print(video_path)\n", "\n", " yolo_weights = yolo_weights\n", " print(yolo_weights)\n", " try:\n", " transcript = video_transcription(video_path)\n", " print(transcript)\n", " video_name = \"Video \" + str(count)\n", " sentences[video_name] = action_detection(transcript, openai_key)\n", " print(sentences[video_name])\n", " batons[video_name] = process_video(video_path, yolo_weights[0])\n", " print(\"batons \", batons)\n", " count+=1\n", " except Exception as e:\n", " print(e)\n", " continue\n", "\n", " email_sender = 'bodycam1211@gmail.com'\n", " email_password = 'evmt luaz mgoi iapl'\n", " email_receiver = email\n", "\n", " # Set the subject and body of the email\n", " subject = 'Timestamps Detection Complete'\n", "\n", " result = \"\"\n", " for i in sentences.keys():\n", " result = result + i + \"\\n\"\n", " result = \"-\" + result + sentences[i] + \"\\n\"\n", " result = result + \"Batons time stamp: \" + \"\\n\"\n", " result = result + batons[i] + \"\\n\\n\"\n", " \n", " body = \"Here are the results of your detected timestamps:\\n\" + result\n", "\n", " em = EmailMessage()\n", " em['From'] = email_sender\n", " em['To'] = email_receiver\n", " em['Subject'] = subject\n", " em.set_content(body)\n", "\n", " # Add SSL (layer of security)\n", " context = ssl.create_default_context()\n", "\n", " # Log in and send the email\n", " with smtplib.SMTP_SSL('smtp.gmail.com', 465, context=context) as smtp:\n", " smtp.login(email_sender, email_password)\n", " smtp.sendmail(email_sender, email_receiver, em.as_string())\n", " \n", " print(\"ALL FUNC Executed without errors\")\n", "\n", " return sentences, batons\n", "\n", "\n", "btn = gr.Interface(\n", " fn = all_funcs,\n", " inputs = [\"text\", gr.Files(label = \"Select Zip File\"), gr.Files(label = \"Select YOLOv8 Weights File\"), \"text\"],\n", " outputs=[gr.Textbox(label = \"Audio Analysis Time Stamps\", lines = 20), gr.Textbox(label = \"Baton Detection Timestamps\", lines = 20)]\n", ")\n", "\n", "btn.launch(server_name=\"0.0.0.0\", server_port=4000)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[]\n" ] } ], "source": [ "import zipfile\n", "with zipfile.ZipFile(\"test-a.zip\", \"r\") as zip_ref:\n", " print(zip_ref.namelist())" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['example 3 copy.mp4', '__MACOSX/._example 3 copy.mp4', 'example 3.mp4', '__MACOSX/._example 3.mp4']\n", "example 3 copy.mp4\n", "__MACOSX/._example 3 copy.mp4\n", "example 3.mp4\n", "__MACOSX/._example 3.mp4\n" ] } ], "source": [ "with zipfile.ZipFile(\"test.zip\", \"r\") as zip_ref:\n", " print(zip_ref.namelist())\n", " for filename in zip_ref.namelist():\n", " # Inn 2 lines mein error aa sakta hai\n", " \n", " zip_ref.extract(filename)\n", " # video_path = filename[0].split('/')[-1]\n", " video_path = filename\n", " print(video_path)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Dockerfile \u001b[34mflagged\u001b[m\u001b[m\n", "Spark_Deployment.ipynb requirements.txt\n", "Spark_Deployment_Final.ipynb test-a.zip\n", "YOLOv8 Best We.pt zip_file_with_email.py\n", "app.py\n" ] } ], "source": [ "!ls" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "accelerator": "GPU", "colab": { "gpuType": "T4", "provenance": [] }, "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.9" } }, "nbformat": 4, "nbformat_minor": 0 }