{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "id": "BQu8BQDJRTzn" }, "outputs": [], "source": [ "from google.colab import drive\n", "drive.mount('/content/drive')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "IubsuHn3RTxn" }, "outputs": [], "source": [ "!pip install -r requirements.txt" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "2f0ip2WksaKq", "outputId": "84cc86d2-9104-4ddf-91f4-c48c7c7f96a4" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting openai\n", " Downloading openai-1.3.7-py3-none-any.whl (221 kB)\n", "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/221.4 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━\u001b[0m\u001b[91m╸\u001b[0m\u001b[90m━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.2/221.4 kB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m221.4/221.4 kB\u001b[0m \u001b[31m4.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: anyio<4,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from openai) (3.7.1)\n", "Requirement already satisfied: distro<2,>=1.7.0 in /usr/lib/python3/dist-packages (from openai) (1.7.0)\n", "Collecting httpx<1,>=0.23.0 (from openai)\n", " Downloading httpx-0.25.2-py3-none-any.whl (74 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.0/75.0 kB\u001b[0m \u001b[31m10.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: pydantic<3,>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from openai) (1.10.13)\n", "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from openai) (1.3.0)\n", "Requirement already satisfied: tqdm>4 in /usr/local/lib/python3.10/dist-packages (from openai) (4.66.1)\n", "Requirement already satisfied: typing-extensions<5,>=4.5 in /usr/local/lib/python3.10/dist-packages (from openai) (4.5.0)\n", "Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.10/dist-packages (from anyio<4,>=3.5.0->openai) (3.6)\n", "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<4,>=3.5.0->openai) (1.2.0)\n", "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.23.0->openai) (2023.11.17)\n", "Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)\n", " Downloading httpcore-1.0.2-py3-none-any.whl (76 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.9/76.9 kB\u001b[0m \u001b[31m12.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)\n", " Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hInstalling collected packages: h11, httpcore, httpx, openai\n", "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "llmx 0.0.15a0 requires cohere, which is not installed.\n", "llmx 0.0.15a0 requires tiktoken, which is not installed.\u001b[0m\u001b[31m\n", "\u001b[0mSuccessfully installed h11-0.14.0 httpcore-1.0.2 httpx-0.25.2 openai-1.3.7\n" ] } ], "source": [ "!pip install --upgrade openai" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "wVSEk4MlsaNk", "outputId": "2229de07-db0b-4be9-a134-3788a9a4776b" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting openai-whisper\n", " Downloading openai-whisper-20231117.tar.gz (798 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m798.6/798.6 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", "Requirement already satisfied: triton<3,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from openai-whisper) (2.1.0)\n", "Requirement already satisfied: numba in /usr/local/lib/python3.10/dist-packages (from openai-whisper) (0.58.1)\n", "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from openai-whisper) (1.23.5)\n", "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from openai-whisper) (2.1.0+cu118)\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from openai-whisper) (4.66.1)\n", "Requirement already satisfied: more-itertools in /usr/local/lib/python3.10/dist-packages (from openai-whisper) (10.1.0)\n", "Collecting tiktoken (from openai-whisper)\n", " Downloading tiktoken-0.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m56.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from triton<3,>=2.0.0->openai-whisper) (3.13.1)\n", "Requirement already satisfied: llvmlite<0.42,>=0.41.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba->openai-whisper) (0.41.1)\n", "Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken->openai-whisper) (2023.6.3)\n", "Requirement already satisfied: requests>=2.26.0 in /usr/local/lib/python3.10/dist-packages (from tiktoken->openai-whisper) (2.31.0)\n", "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch->openai-whisper) (4.5.0)\n", "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->openai-whisper) (1.12)\n", "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->openai-whisper) (3.2.1)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->openai-whisper) (3.1.2)\n", "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch->openai-whisper) (2023.6.0)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken->openai-whisper) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken->openai-whisper) (3.6)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken->openai-whisper) (2.0.7)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken->openai-whisper) (2023.11.17)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->openai-whisper) (2.1.3)\n", "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->openai-whisper) (1.3.0)\n", "Building wheels for collected packages: openai-whisper\n", " Building wheel for openai-whisper (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for openai-whisper: filename=openai_whisper-20231117-py3-none-any.whl size=801356 sha256=1b630eb40941206e0a5d2f94e1a42a54ba50a56bf8edcff93a7e62d78f5db157\n", " Stored in directory: /root/.cache/pip/wheels/d0/85/e1/9361b4cbea7dd4b7f6702fa4c3afc94877952eeb2b62f45f56\n", "Successfully built openai-whisper\n", "Installing collected packages: tiktoken, openai-whisper\n", "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "llmx 0.0.15a0 requires cohere, which is not installed.\u001b[0m\u001b[31m\n", "\u001b[0mSuccessfully installed openai-whisper-20231117 tiktoken-0.5.1\n" ] } ], "source": [ "!pip install --upgrade openai-whisper" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "oYGTh27jsddT", "outputId": "8e9485aa-3645-4b9c-8cbd-99e5cb83bf33" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "UsageError: Line magic function `%sudo` not found.\n" ] } ], "source": [ "!sudo apt update && sudo apt install ffmpeg" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "id": "3ZrCX53_ssHy" }, "outputs": [], "source": [ "import whisper\n", "import cv2\n", "import os\n", "import urllib.request\n", "from PIL import Image\n", "from ultralytics import YOLO\n", "import torch\n", "import matplotlib.pyplot as plt\n", "from tqdm import tqdm\n", "from transformers import pipeline\n", "import moviepy.editor as mp\n", "import json\n", "import re\n", "import gradio as gr\n", "from openai import OpenAI" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": { "id": "lmWMJ_FeJDyD" }, "source": [ "# Full" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'d:\\\\M.S\\\\Boston University\\\\College\\\\Studies\\\\Sem 3 (Fall 23)\\\\Subjects\\\\CS549 - Spark! ML Practicum\\\\Gradio App'" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%pwd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "id": "YMDeEOwLtGYN" }, "outputs": [], "source": [ "def video_transcription(video_path):\n", " model = whisper.load_model('tiny')\n", " transcript = model.transcribe(video_path, verbose = False, language = 'en')\n", "\n", " return json.dumps(transcript)\n", "\n", "def action_detection(json_object, openai_key):\n", " transcript = json.loads(json_object)\n", " transcript_string = ''\n", " for segments in transcript['segments']:\n", " transcript_string+=str(segments['text']+'\\n')\n", "\n", " chunks = []\n", " output = {}\n", " count = 0\n", " split_transcript = transcript_string.split(\"\\n\")\n", " num_lines = len(split_transcript)\n", " num_chars = 0\n", " i = 0\n", " prev = 0\n", "\n", " while i < num_lines:\n", " num_chars+=len(split_transcript[i])\n", " if num_chars>=16000:\n", " chunks.append(\"\\n\".join(split_transcript[prev:i]))\n", " prev = i\n", " num_chars = 0\n", " i+=1\n", " if i == num_lines:\n", " chunks.append(\"\\n\".join(split_transcript[prev:i]))\n", "\n", " client = OpenAI(api_key = openai_key)\n", "\n", " for i in chunks:\n", " completion = client.chat.completions.create(\n", " model=\"gpt-4\",\n", " messages=[\n", " {\"role\": \"user\", \"content\": f\"You are an AI system specialized in detecting planning issues, critiquing plans, and analyzing conversations between police officers regarding how to disperse. Additionally, identify any instances suggesting 1st Amendment violations, criticizing the lack of a plan, and aggressive comments. Transcript:\\n\\n{i}\\n\\n\"},\n", " {\"role\": \"user\", \"content\": \"Give response only in the json format for example: \\{\\\"1\\\": \\\"What should we do now. I don't have a clue?\\\", \\\"2\\\": \\\"what the fuck is this\\\", \\\"3\\\":\\\"Beat the fuck out of them\\\"\\}. There can be multiple instances, find out all of them. If you do not find anything just return {\\\"None\\\":\\\"None\\\"}\"}\n", " ],\n", " seed = 42,\n", " temperature = 0\n", " )\n", "\n", "\n", " gpt_output = completion.choices[0].message.content\n", " gpt_output = dict(json.loads(gpt_output))\n", " for j in gpt_output.values():\n", " output[count] = j\n", " count+=1\n", "\n", " sent_with_time = []\n", "\n", " for sentence_to_search in output.values():\n", " pattern = re.compile(re.escape(sentence_to_search), re.IGNORECASE)\n", "\n", " matching_entries = [entry for entry in transcript['segments'] if re.search(pattern, entry['text'])]\n", "\n", " if matching_entries:\n", " for entry in matching_entries:\n", " hours_s, remainder = divmod(entry['start'], 3600)\n", " minutes_s, seconds_s = divmod(remainder, 60)\n", " hours_s = str(int(hours_s)).zfill(2)\n", " minutes_s = str(int(minutes_s)).zfill(2)\n", " seconds_s = str(int(seconds_s)).zfill(2)\n", "\n", " \n", " hours_e, remainder = divmod(entry['end'], 3600)\n", " minutes_e, seconds_e = divmod(remainder, 60)\n", " hours_e = str(int(hours_e)).zfill(2)\n", " minutes_e = str(int(minutes_e)).zfill(2)\n", " seconds_e = str(int(seconds_e)).zfill(2)\n", "\n", " sent_with_time.append(sentence_to_search + ' Start Time: ' + str(hours_s) + \":\" + str(minutes_s) + \":\" + str(seconds_s) + ' End Time: ' + str(hours_e) + \":\" + str(minutes_e) + \":\" + str(seconds_e))\n", "\n", " return sent_with_time\n", "\n", "def process_video(video_path, weights):\n", " try:\n", " # This code cell detects batons in the video\n", " current_frame = 0\n", " model = YOLO(weights)\n", " cap = cv2.VideoCapture(video_path)\n", " fps = int(cap.get(cv2.CAP_PROP_FPS))\n", " conseq_frames = 0\n", " start_time = \"\"\n", " end_time = \"\"\n", " res = []\n", "\n", " while True:\n", " ret, frame = cap.read()\n", " if not ret:\n", " break\n", "\n", " # Detecting baton on one frame per second\n", " if current_frame % fps == 0:\n", " currect_sec = current_frame/fps\n", "\n", " # Model prediction on current frame\n", " results = model(frame, verbose = False)\n", " count = 0\n", " classes = results[0].boxes.data\n", "\n", " # Formatting the time for printing\n", " hours, remainder = divmod(currect_sec, 3600)\n", " minutes, seconds = divmod(remainder, 60)\n", " hours = str(int(hours)).zfill(2)\n", " minutes = str(int(minutes)).zfill(2)\n", " seconds = str(int(seconds)).zfill(2)\n", "\n", " for i in classes:\n", "\n", " # Checking if baton is detected (i.e. if the class corresponding to baton is 1 or not)\n", " if float(i[5]) == 1:\n", " count+=1\n", "\n", " # Marking the start_time if this is the first consecutive frame a baton is detected in\n", " if count >= 1:\n", " conseq_frames+=1\n", " if conseq_frames == 1:\n", " start_time = hours + \":\" + minutes + \":\" + seconds\n", "\n", " # Marking the end time if after one or multiple consecutive frames of detection, a baton is not detected\n", " else:\n", " if conseq_frames > 0:\n", " conseq_frames = 0\n", " end_time = hours + \":\" + minutes + \":\" + seconds\n", "\n", " # Printing time intervals in which baton was detected\n", " res.append(start_time + \" to \" + end_time)\n", " start_time = \"\"\n", " end_time = \"\"\n", "\n", " current_frame += 1\n", " cap.release()\n", "\n", " return \"\\n\".join(res)\n", "\n", " except Exception as e:\n", "\n", " return e\n", "\n", "def all_funcs(openai_key,video_path, yolo_weights, pr = gr.Progress(track_tqdm = True)):\n", "\n", " video_path = video_path[0].split('/')[-1]\n", " yolo_weights = yolo_weights[0].split('/')[-1]\n", " transcript = video_transcription(video_path)\n", " sentences = action_detection(transcript, openai_key)\n", " batons = process_video(video_path, yolo_weights)\n", "\n", " print(\"ALL FUNC Executed without errors\")\n", "\n", " return sentences, batons" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "id": "eHZxd3sIZXar" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ " 96%|█████████▌| 916553/952553 [23:32<00:55, 648.89frames/s] \n" ] } ], "source": [ "json_dump = video_transcription(os.path.join('/projectnb/cs505ws/students/ksashank/', 'protest-006.mp4'))" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "out = action_detection(json_dump, \"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\")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['None Start Time: 00:17:27 End Time: 00:17:31',\n", " 'None Start Time: 00:17:32 End Time: 00:17:33',\n", " 'None Start Time: 00:21:16 End Time: 00:21:18',\n", " \"Let's hit him with the fucking hose. Start Time: 00:35:32 End Time: 00:35:33\",\n", " \"Y'all ain't got nobody to fuck with. Start Time: 00:40:38 End Time: 00:40:40\",\n", " \"I'm not fucking with you. Start Time: 00:52:59 End Time: 00:53:00\",\n", " \"I'm going to fuck this. Start Time: 01:16:28 End Time: 01:16:30\",\n", " \"I'm going to fuck this. Start Time: 01:16:30 End Time: 01:16:32\",\n", " \"I'm going to fuck this. Start Time: 01:17:00 End Time: 01:17:02\",\n", " 'Fucking crazy, huh? Start Time: 01:17:08 End Time: 01:17:10',\n", " \"What the fuck's in it? Start Time: 01:19:58 End Time: 01:20:00\",\n", " 'What the fuck are you doing? Start Time: 00:54:09 End Time: 00:54:12',\n", " 'What the fuck did you do? Start Time: 00:52:53 End Time: 00:52:55',\n", " \"What's your fucking name? Start Time: 01:23:34 End Time: 01:23:36\",\n", " \"I'm going to fuck this. Start Time: 01:16:28 End Time: 01:16:30\",\n", " \"I'm going to fuck this. Start Time: 01:16:30 End Time: 01:16:32\",\n", " \"I'm going to fuck this. Start Time: 01:17:00 End Time: 01:17:02\",\n", " 'This is fucking nuts. Start Time: 01:11:31 End Time: 01:11:33',\n", " 'This is fucking nuts. Start Time: 01:11:34 End Time: 01:11:36',\n", " \"Shit me, I'm going fuck home. Start Time: 01:14:56 End Time: 01:14:58\",\n", " 'What the fuck did I do? Start Time: 01:34:10 End Time: 01:34:13',\n", " 'None Start Time: 00:17:27 End Time: 00:17:31',\n", " 'None Start Time: 00:17:32 End Time: 00:17:33',\n", " 'None Start Time: 00:21:16 End Time: 00:21:18']" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "out" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "ename": "FileNotFoundError", "evalue": "[WinError 2] The system cannot find the file specified", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", "\u001b[1;32md:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\Gradio App\\Spark_Deployment_Final.ipynb Cell 13\u001b[0m line \u001b[0;36m1\n\u001b[1;32m----> 1\u001b[0m video_transcription(\u001b[39m\"\u001b[39;49m\u001b[39mtest.mp4\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n", "\u001b[1;32md:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\Gradio App\\Spark_Deployment_Final.ipynb Cell 13\u001b[0m line \u001b[0;36m3\n\u001b[0;32m 1\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mvideo_transcription\u001b[39m(video_path):\n\u001b[0;32m 2\u001b[0m model \u001b[39m=\u001b[39m whisper\u001b[39m.\u001b[39mload_model(\u001b[39m'\u001b[39m\u001b[39mtiny\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m----> 3\u001b[0m transcript \u001b[39m=\u001b[39m model\u001b[39m.\u001b[39;49mtranscribe(video_path, verbose \u001b[39m=\u001b[39;49m \u001b[39mFalse\u001b[39;49;00m, language \u001b[39m=\u001b[39;49m \u001b[39m'\u001b[39;49m\u001b[39men\u001b[39;49m\u001b[39m'\u001b[39;49m)\n\u001b[0;32m 5\u001b[0m \u001b[39mreturn\u001b[39;00m json\u001b[39m.\u001b[39mdumps(transcript)\n", "File \u001b[1;32md:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env2\\Lib\\site-packages\\whisper\\transcribe.py:122\u001b[0m, in \u001b[0;36mtranscribe\u001b[1;34m(model, audio, verbose, temperature, compression_ratio_threshold, logprob_threshold, no_speech_threshold, condition_on_previous_text, initial_prompt, word_timestamps, prepend_punctuations, append_punctuations, **decode_options)\u001b[0m\n\u001b[0;32m 119\u001b[0m decode_options[\u001b[39m\"\u001b[39m\u001b[39mfp16\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m\n\u001b[0;32m 121\u001b[0m \u001b[39m# Pad 30-seconds of silence to the input audio, for slicing\u001b[39;00m\n\u001b[1;32m--> 122\u001b[0m mel \u001b[39m=\u001b[39m log_mel_spectrogram(audio, model\u001b[39m.\u001b[39;49mdims\u001b[39m.\u001b[39;49mn_mels, padding\u001b[39m=\u001b[39;49mN_SAMPLES)\n\u001b[0;32m 123\u001b[0m content_frames \u001b[39m=\u001b[39m mel\u001b[39m.\u001b[39mshape[\u001b[39m-\u001b[39m\u001b[39m1\u001b[39m] \u001b[39m-\u001b[39m N_FRAMES\n\u001b[0;32m 125\u001b[0m \u001b[39mif\u001b[39;00m decode_options\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mlanguage\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m) \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n", "File \u001b[1;32md:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env2\\Lib\\site-packages\\whisper\\audio.py:140\u001b[0m, in \u001b[0;36mlog_mel_spectrogram\u001b[1;34m(audio, n_mels, padding, device)\u001b[0m\n\u001b[0;32m 138\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m torch\u001b[39m.\u001b[39mis_tensor(audio):\n\u001b[0;32m 139\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(audio, \u001b[39mstr\u001b[39m):\n\u001b[1;32m--> 140\u001b[0m audio \u001b[39m=\u001b[39m load_audio(audio)\n\u001b[0;32m 141\u001b[0m audio \u001b[39m=\u001b[39m torch\u001b[39m.\u001b[39mfrom_numpy(audio)\n\u001b[0;32m 143\u001b[0m \u001b[39mif\u001b[39;00m device \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n", "File \u001b[1;32md:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env2\\Lib\\site-packages\\whisper\\audio.py:58\u001b[0m, in \u001b[0;36mload_audio\u001b[1;34m(file, sr)\u001b[0m\n\u001b[0;32m 56\u001b[0m \u001b[39m# fmt: on\u001b[39;00m\n\u001b[0;32m 57\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m---> 58\u001b[0m out \u001b[39m=\u001b[39m run(cmd, capture_output\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m, check\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m)\u001b[39m.\u001b[39mstdout\n\u001b[0;32m 59\u001b[0m \u001b[39mexcept\u001b[39;00m CalledProcessError \u001b[39mas\u001b[39;00m e:\n\u001b[0;32m 60\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mFailed to load audio: \u001b[39m\u001b[39m{\u001b[39;00me\u001b[39m.\u001b[39mstderr\u001b[39m.\u001b[39mdecode()\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m) \u001b[39mfrom\u001b[39;00m \u001b[39me\u001b[39;00m\n", "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\subprocess.py:548\u001b[0m, in \u001b[0;36mrun\u001b[1;34m(input, capture_output, timeout, check, *popenargs, **kwargs)\u001b[0m\n\u001b[0;32m 545\u001b[0m kwargs[\u001b[39m'\u001b[39m\u001b[39mstdout\u001b[39m\u001b[39m'\u001b[39m] \u001b[39m=\u001b[39m PIPE\n\u001b[0;32m 546\u001b[0m kwargs[\u001b[39m'\u001b[39m\u001b[39mstderr\u001b[39m\u001b[39m'\u001b[39m] \u001b[39m=\u001b[39m PIPE\n\u001b[1;32m--> 548\u001b[0m \u001b[39mwith\u001b[39;00m Popen(\u001b[39m*\u001b[39;49mpopenargs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs) \u001b[39mas\u001b[39;00m process:\n\u001b[0;32m 549\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 550\u001b[0m stdout, stderr \u001b[39m=\u001b[39m process\u001b[39m.\u001b[39mcommunicate(\u001b[39minput\u001b[39m, timeout\u001b[39m=\u001b[39mtimeout)\n", "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\subprocess.py:1026\u001b[0m, in \u001b[0;36mPopen.__init__\u001b[1;34m(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, user, group, extra_groups, encoding, errors, text, umask, pipesize, process_group)\u001b[0m\n\u001b[0;32m 1022\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtext_mode:\n\u001b[0;32m 1023\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstderr \u001b[39m=\u001b[39m io\u001b[39m.\u001b[39mTextIOWrapper(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstderr,\n\u001b[0;32m 1024\u001b[0m encoding\u001b[39m=\u001b[39mencoding, errors\u001b[39m=\u001b[39merrors)\n\u001b[1;32m-> 1026\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_execute_child(args, executable, preexec_fn, close_fds,\n\u001b[0;32m 1027\u001b[0m pass_fds, cwd, env,\n\u001b[0;32m 1028\u001b[0m startupinfo, creationflags, shell,\n\u001b[0;32m 1029\u001b[0m p2cread, p2cwrite,\n\u001b[0;32m 1030\u001b[0m c2pread, c2pwrite,\n\u001b[0;32m 1031\u001b[0m errread, errwrite,\n\u001b[0;32m 1032\u001b[0m restore_signals,\n\u001b[0;32m 1033\u001b[0m gid, gids, uid, umask,\n\u001b[0;32m 1034\u001b[0m start_new_session, process_group)\n\u001b[0;32m 1035\u001b[0m \u001b[39mexcept\u001b[39;00m:\n\u001b[0;32m 1036\u001b[0m \u001b[39m# Cleanup if the child failed starting.\u001b[39;00m\n\u001b[0;32m 1037\u001b[0m \u001b[39mfor\u001b[39;00m f \u001b[39min\u001b[39;00m \u001b[39mfilter\u001b[39m(\u001b[39mNone\u001b[39;00m, (\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstdin, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstdout, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstderr)):\n", "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\subprocess.py:1538\u001b[0m, in \u001b[0;36mPopen._execute_child\u001b[1;34m(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, unused_restore_signals, unused_gid, unused_gids, unused_uid, unused_umask, unused_start_new_session, unused_process_group)\u001b[0m\n\u001b[0;32m 1536\u001b[0m \u001b[39m# Start the process\u001b[39;00m\n\u001b[0;32m 1537\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m-> 1538\u001b[0m hp, ht, pid, tid \u001b[39m=\u001b[39m _winapi\u001b[39m.\u001b[39mCreateProcess(executable, args,\n\u001b[0;32m 1539\u001b[0m \u001b[39m# no special security\u001b[39;00m\n\u001b[0;32m 1540\u001b[0m \u001b[39mNone\u001b[39;00m, \u001b[39mNone\u001b[39;00m,\n\u001b[0;32m 1541\u001b[0m \u001b[39mint\u001b[39m(\u001b[39mnot\u001b[39;00m close_fds),\n\u001b[0;32m 1542\u001b[0m creationflags,\n\u001b[0;32m 1543\u001b[0m env,\n\u001b[0;32m 1544\u001b[0m cwd,\n\u001b[0;32m 1545\u001b[0m startupinfo)\n\u001b[0;32m 1546\u001b[0m \u001b[39mfinally\u001b[39;00m:\n\u001b[0;32m 1547\u001b[0m \u001b[39m# Child is launched. Close the parent's copy of those pipe\u001b[39;00m\n\u001b[0;32m 1548\u001b[0m \u001b[39m# handles that only the child should have open. You need\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 1551\u001b[0m \u001b[39m# pipe will not close when the child process exits and the\u001b[39;00m\n\u001b[0;32m 1552\u001b[0m \u001b[39m# ReadFile will hang.\u001b[39;00m\n\u001b[0;32m 1553\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_close_pipe_fds(p2cread, p2cwrite,\n\u001b[0;32m 1554\u001b[0m c2pread, c2pwrite,\n\u001b[0;32m 1555\u001b[0m errread, errwrite)\n", "\u001b[1;31mFileNotFoundError\u001b[0m: [WinError 2] The system cannot find the file specified" ] } ], "source": [ "video_transcription(\"C:\\Users\\madip\\OneDrive\\Desktop\\test.mp4\")" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "os.path.exists(os.path.join(\"d:/M.S/Boston University/College/Studies/Sem 3 (Fall 23)/Subjects/CS549 - Spark! ML Practicum/Spark\", \"Test_Video.mp4\"))" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "model = whisper.load_model('tiny')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "ename": "FileNotFoundError", "evalue": "[WinError 2] The system cannot find the file specified", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", "\u001b[1;32md:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\Gradio App\\Spark_Deployment_Final.ipynb Cell 14\u001b[0m line \u001b[0;36m1\n\u001b[1;32m----> 1\u001b[0m transcript \u001b[39m=\u001b[39m model\u001b[39m.\u001b[39;49mtranscribe(\u001b[39m\"\u001b[39;49m\u001b[39mD:\u001b[39;49m\u001b[39m\\\u001b[39;49m\u001b[39mM.S\u001b[39;49m\u001b[39m\\\u001b[39;49m\u001b[39mBoston University\u001b[39;49m\u001b[39m\\\u001b[39;49m\u001b[39mCollege\u001b[39;49m\u001b[39m\\\u001b[39;49m\u001b[39mStudies\u001b[39;49m\u001b[39m\\\u001b[39;49m\u001b[39mSem 3 (Fall 23)\u001b[39;49m\u001b[39m\\\u001b[39;49m\u001b[39mSubjects\u001b[39;49m\u001b[39m\\\u001b[39;49m\u001b[39mCS549 - Spark! ML Practicum\u001b[39;49m\u001b[39m\\\u001b[39;49m\u001b[39mGradio App\u001b[39;49m\u001b[39m\\\u001b[39;49m\u001b[39mTest_Video.mp4\u001b[39;49m\u001b[39m\"\u001b[39;49m, verbose \u001b[39m=\u001b[39;49m \u001b[39mFalse\u001b[39;49;00m, language \u001b[39m=\u001b[39;49m \u001b[39m'\u001b[39;49m\u001b[39men\u001b[39;49m\u001b[39m'\u001b[39;49m)\n", "File \u001b[1;32md:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env\\Lib\\site-packages\\whisper\\transcribe.py:122\u001b[0m, in \u001b[0;36mtranscribe\u001b[1;34m(model, audio, verbose, temperature, compression_ratio_threshold, logprob_threshold, no_speech_threshold, condition_on_previous_text, initial_prompt, word_timestamps, prepend_punctuations, append_punctuations, **decode_options)\u001b[0m\n\u001b[0;32m 119\u001b[0m decode_options[\u001b[39m\"\u001b[39m\u001b[39mfp16\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m\n\u001b[0;32m 121\u001b[0m \u001b[39m# Pad 30-seconds of silence to the input audio, for slicing\u001b[39;00m\n\u001b[1;32m--> 122\u001b[0m mel \u001b[39m=\u001b[39m log_mel_spectrogram(audio, model\u001b[39m.\u001b[39;49mdims\u001b[39m.\u001b[39;49mn_mels, padding\u001b[39m=\u001b[39;49mN_SAMPLES)\n\u001b[0;32m 123\u001b[0m content_frames \u001b[39m=\u001b[39m mel\u001b[39m.\u001b[39mshape[\u001b[39m-\u001b[39m\u001b[39m1\u001b[39m] \u001b[39m-\u001b[39m N_FRAMES\n\u001b[0;32m 125\u001b[0m \u001b[39mif\u001b[39;00m decode_options\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mlanguage\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m) \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n", "File \u001b[1;32md:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env\\Lib\\site-packages\\whisper\\audio.py:140\u001b[0m, in \u001b[0;36mlog_mel_spectrogram\u001b[1;34m(audio, n_mels, padding, device)\u001b[0m\n\u001b[0;32m 138\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m torch\u001b[39m.\u001b[39mis_tensor(audio):\n\u001b[0;32m 139\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(audio, \u001b[39mstr\u001b[39m):\n\u001b[1;32m--> 140\u001b[0m audio \u001b[39m=\u001b[39m load_audio(audio)\n\u001b[0;32m 141\u001b[0m audio \u001b[39m=\u001b[39m torch\u001b[39m.\u001b[39mfrom_numpy(audio)\n\u001b[0;32m 143\u001b[0m \u001b[39mif\u001b[39;00m device \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n", "File \u001b[1;32md:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env\\Lib\\site-packages\\whisper\\audio.py:58\u001b[0m, in \u001b[0;36mload_audio\u001b[1;34m(file, sr)\u001b[0m\n\u001b[0;32m 56\u001b[0m \u001b[39m# fmt: on\u001b[39;00m\n\u001b[0;32m 57\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m---> 58\u001b[0m out \u001b[39m=\u001b[39m run(cmd, capture_output\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m, check\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m)\u001b[39m.\u001b[39mstdout\n\u001b[0;32m 59\u001b[0m \u001b[39mexcept\u001b[39;00m CalledProcessError \u001b[39mas\u001b[39;00m e:\n\u001b[0;32m 60\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mFailed to load audio: \u001b[39m\u001b[39m{\u001b[39;00me\u001b[39m.\u001b[39mstderr\u001b[39m.\u001b[39mdecode()\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m) \u001b[39mfrom\u001b[39;00m \u001b[39me\u001b[39;00m\n", "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\subprocess.py:548\u001b[0m, in \u001b[0;36mrun\u001b[1;34m(input, capture_output, timeout, check, *popenargs, **kwargs)\u001b[0m\n\u001b[0;32m 545\u001b[0m kwargs[\u001b[39m'\u001b[39m\u001b[39mstdout\u001b[39m\u001b[39m'\u001b[39m] \u001b[39m=\u001b[39m PIPE\n\u001b[0;32m 546\u001b[0m kwargs[\u001b[39m'\u001b[39m\u001b[39mstderr\u001b[39m\u001b[39m'\u001b[39m] \u001b[39m=\u001b[39m PIPE\n\u001b[1;32m--> 548\u001b[0m \u001b[39mwith\u001b[39;00m Popen(\u001b[39m*\u001b[39;49mpopenargs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs) \u001b[39mas\u001b[39;00m process:\n\u001b[0;32m 549\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 550\u001b[0m stdout, stderr \u001b[39m=\u001b[39m process\u001b[39m.\u001b[39mcommunicate(\u001b[39minput\u001b[39m, timeout\u001b[39m=\u001b[39mtimeout)\n", "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\subprocess.py:1026\u001b[0m, in \u001b[0;36mPopen.__init__\u001b[1;34m(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, user, group, extra_groups, encoding, errors, text, umask, pipesize, process_group)\u001b[0m\n\u001b[0;32m 1022\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtext_mode:\n\u001b[0;32m 1023\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstderr \u001b[39m=\u001b[39m io\u001b[39m.\u001b[39mTextIOWrapper(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstderr,\n\u001b[0;32m 1024\u001b[0m encoding\u001b[39m=\u001b[39mencoding, errors\u001b[39m=\u001b[39merrors)\n\u001b[1;32m-> 1026\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_execute_child(args, executable, preexec_fn, close_fds,\n\u001b[0;32m 1027\u001b[0m pass_fds, cwd, env,\n\u001b[0;32m 1028\u001b[0m startupinfo, creationflags, shell,\n\u001b[0;32m 1029\u001b[0m p2cread, p2cwrite,\n\u001b[0;32m 1030\u001b[0m c2pread, c2pwrite,\n\u001b[0;32m 1031\u001b[0m errread, errwrite,\n\u001b[0;32m 1032\u001b[0m restore_signals,\n\u001b[0;32m 1033\u001b[0m gid, gids, uid, umask,\n\u001b[0;32m 1034\u001b[0m start_new_session, process_group)\n\u001b[0;32m 1035\u001b[0m \u001b[39mexcept\u001b[39;00m:\n\u001b[0;32m 1036\u001b[0m \u001b[39m# Cleanup if the child failed starting.\u001b[39;00m\n\u001b[0;32m 1037\u001b[0m \u001b[39mfor\u001b[39;00m f \u001b[39min\u001b[39;00m \u001b[39mfilter\u001b[39m(\u001b[39mNone\u001b[39;00m, (\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstdin, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstdout, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstderr)):\n", "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\subprocess.py:1538\u001b[0m, in \u001b[0;36mPopen._execute_child\u001b[1;34m(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, unused_restore_signals, unused_gid, unused_gids, unused_uid, unused_umask, unused_start_new_session, unused_process_group)\u001b[0m\n\u001b[0;32m 1536\u001b[0m \u001b[39m# Start the process\u001b[39;00m\n\u001b[0;32m 1537\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m-> 1538\u001b[0m hp, ht, pid, tid \u001b[39m=\u001b[39m _winapi\u001b[39m.\u001b[39;49mCreateProcess(executable, args,\n\u001b[0;32m 1539\u001b[0m \u001b[39m# no special security\u001b[39;49;00m\n\u001b[0;32m 1540\u001b[0m \u001b[39mNone\u001b[39;49;00m, \u001b[39mNone\u001b[39;49;00m,\n\u001b[0;32m 1541\u001b[0m \u001b[39mint\u001b[39;49m(\u001b[39mnot\u001b[39;49;00m close_fds),\n\u001b[0;32m 1542\u001b[0m creationflags,\n\u001b[0;32m 1543\u001b[0m env,\n\u001b[0;32m 1544\u001b[0m cwd,\n\u001b[0;32m 1545\u001b[0m startupinfo)\n\u001b[0;32m 1546\u001b[0m \u001b[39mfinally\u001b[39;00m:\n\u001b[0;32m 1547\u001b[0m \u001b[39m# Child is launched. Close the parent's copy of those pipe\u001b[39;00m\n\u001b[0;32m 1548\u001b[0m \u001b[39m# handles that only the child should have open. You need\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 1551\u001b[0m \u001b[39m# pipe will not close when the child process exits and the\u001b[39;00m\n\u001b[0;32m 1552\u001b[0m \u001b[39m# ReadFile will hang.\u001b[39;00m\n\u001b[0;32m 1553\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_close_pipe_fds(p2cread, p2cwrite,\n\u001b[0;32m 1554\u001b[0m c2pread, c2pwrite,\n\u001b[0;32m 1555\u001b[0m errread, errwrite)\n", "\u001b[1;31mFileNotFoundError\u001b[0m: [WinError 2] The system cannot find the file specified" ] } ], "source": [ "transcript = model.transcribe(\"D:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\Gradio App\\Test_Video.mp4\", verbose = False, language = 'en')" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "os.path.exists(\"D:/M.S/Boston University/College/Studies/Sem 3 (Fall 23)/Subjects/CS549 - Spark! ML Practicum/Spark/Test_Video.mp4\")" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "os.path.exists(\"D:/M.S/Boston University/College/Studies/Sem 3 (Fall 23)/Subjects/CS549 - Spark! ML Practicum/Spark/OWL-Detections/YOLOv8 Best Weights.pt\")" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "os.path.exists(\"Test_Video.mp4\")" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "ename": "FileNotFoundError", "evalue": "[WinError 2] The system cannot find the file specified", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", "\u001b[1;32md:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\Gradio App\\Spark_Deployment_Final.ipynb Cell 19\u001b[0m line \u001b[0;36m1\n\u001b[1;32m----> 1\u001b[0m all_funcs(\u001b[39m\"\u001b[39;49m\u001b[39msk-MZd4k8qiN2Qh5MQ3Q8vRT3BlbkFJpeXherRIQCJBYle63q72\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mtest.mp4\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mD:/M.S/Boston University/College/Studies/Sem 3 (Fall 23)/Subjects/CS549 - Spark! ML Practicum/Spark/OWL-Detections/YOLOv8 Best Weights.pt\u001b[39;49m\u001b[39m\"\u001b[39;49m, pr \u001b[39m=\u001b[39;49m gr\u001b[39m.\u001b[39;49mProgress(track_tqdm \u001b[39m=\u001b[39;49m \u001b[39mTrue\u001b[39;49;00m))\n", "\u001b[1;32md:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\Gradio App\\Spark_Deployment_Final.ipynb Cell 19\u001b[0m line \u001b[0;36m1\n\u001b[0;32m 143\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mall_funcs\u001b[39m(openai_key,video_path, yolo_weights, pr \u001b[39m=\u001b[39m gr\u001b[39m.\u001b[39mProgress(track_tqdm \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m)):\n\u001b[0;32m 144\u001b[0m \n\u001b[0;32m 145\u001b[0m \u001b[39m#video_path = video_path[0].split('/')[-1]\u001b[39;00m\n\u001b[0;32m 146\u001b[0m \u001b[39m#yolo_weights = yolo_weights[0].split('/')[-1]\u001b[39;00m\n\u001b[1;32m--> 147\u001b[0m transcript \u001b[39m=\u001b[39m video_transcription(video_path)\n\u001b[0;32m 148\u001b[0m sentences \u001b[39m=\u001b[39m action_detection(transcript, openai_key)\n\u001b[0;32m 149\u001b[0m batons \u001b[39m=\u001b[39m process_video(video_path, yolo_weights)\n", "\u001b[1;32md:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\Gradio App\\Spark_Deployment_Final.ipynb Cell 19\u001b[0m line \u001b[0;36m3\n\u001b[0;32m 1\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mvideo_transcription\u001b[39m(video_path):\n\u001b[0;32m 2\u001b[0m model \u001b[39m=\u001b[39m whisper\u001b[39m.\u001b[39mload_model(\u001b[39m'\u001b[39m\u001b[39mtiny\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m----> 3\u001b[0m transcript \u001b[39m=\u001b[39m model\u001b[39m.\u001b[39;49mtranscribe(video_path, verbose \u001b[39m=\u001b[39;49m \u001b[39mFalse\u001b[39;49;00m, language \u001b[39m=\u001b[39;49m \u001b[39m'\u001b[39;49m\u001b[39men\u001b[39;49m\u001b[39m'\u001b[39;49m)\n\u001b[0;32m 5\u001b[0m \u001b[39mreturn\u001b[39;00m json\u001b[39m.\u001b[39mdumps(transcript)\n", "File \u001b[1;32md:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env2\\Lib\\site-packages\\whisper\\transcribe.py:122\u001b[0m, in \u001b[0;36mtranscribe\u001b[1;34m(model, audio, verbose, temperature, compression_ratio_threshold, logprob_threshold, no_speech_threshold, condition_on_previous_text, initial_prompt, word_timestamps, prepend_punctuations, append_punctuations, **decode_options)\u001b[0m\n\u001b[0;32m 119\u001b[0m decode_options[\u001b[39m\"\u001b[39m\u001b[39mfp16\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m\n\u001b[0;32m 121\u001b[0m \u001b[39m# Pad 30-seconds of silence to the input audio, for slicing\u001b[39;00m\n\u001b[1;32m--> 122\u001b[0m mel \u001b[39m=\u001b[39m log_mel_spectrogram(audio, model\u001b[39m.\u001b[39;49mdims\u001b[39m.\u001b[39;49mn_mels, padding\u001b[39m=\u001b[39;49mN_SAMPLES)\n\u001b[0;32m 123\u001b[0m content_frames \u001b[39m=\u001b[39m mel\u001b[39m.\u001b[39mshape[\u001b[39m-\u001b[39m\u001b[39m1\u001b[39m] \u001b[39m-\u001b[39m N_FRAMES\n\u001b[0;32m 125\u001b[0m \u001b[39mif\u001b[39;00m decode_options\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mlanguage\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m) \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n", "File \u001b[1;32md:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env2\\Lib\\site-packages\\whisper\\audio.py:140\u001b[0m, in \u001b[0;36mlog_mel_spectrogram\u001b[1;34m(audio, n_mels, padding, device)\u001b[0m\n\u001b[0;32m 138\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m torch\u001b[39m.\u001b[39mis_tensor(audio):\n\u001b[0;32m 139\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(audio, \u001b[39mstr\u001b[39m):\n\u001b[1;32m--> 140\u001b[0m audio \u001b[39m=\u001b[39m load_audio(audio)\n\u001b[0;32m 141\u001b[0m audio \u001b[39m=\u001b[39m torch\u001b[39m.\u001b[39mfrom_numpy(audio)\n\u001b[0;32m 143\u001b[0m \u001b[39mif\u001b[39;00m device \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n", "File \u001b[1;32md:\\M.S\\Boston University\\College\\Studies\\Sem 3 (Fall 23)\\Subjects\\CS549 - Spark! ML Practicum\\spark-env2\\Lib\\site-packages\\whisper\\audio.py:58\u001b[0m, in \u001b[0;36mload_audio\u001b[1;34m(file, sr)\u001b[0m\n\u001b[0;32m 56\u001b[0m \u001b[39m# fmt: on\u001b[39;00m\n\u001b[0;32m 57\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m---> 58\u001b[0m out \u001b[39m=\u001b[39m run(cmd, capture_output\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m, check\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m)\u001b[39m.\u001b[39mstdout\n\u001b[0;32m 59\u001b[0m \u001b[39mexcept\u001b[39;00m CalledProcessError \u001b[39mas\u001b[39;00m e:\n\u001b[0;32m 60\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mFailed to load audio: \u001b[39m\u001b[39m{\u001b[39;00me\u001b[39m.\u001b[39mstderr\u001b[39m.\u001b[39mdecode()\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m) \u001b[39mfrom\u001b[39;00m \u001b[39me\u001b[39;00m\n", "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\subprocess.py:548\u001b[0m, in \u001b[0;36mrun\u001b[1;34m(input, capture_output, timeout, check, *popenargs, **kwargs)\u001b[0m\n\u001b[0;32m 545\u001b[0m kwargs[\u001b[39m'\u001b[39m\u001b[39mstdout\u001b[39m\u001b[39m'\u001b[39m] \u001b[39m=\u001b[39m PIPE\n\u001b[0;32m 546\u001b[0m kwargs[\u001b[39m'\u001b[39m\u001b[39mstderr\u001b[39m\u001b[39m'\u001b[39m] \u001b[39m=\u001b[39m PIPE\n\u001b[1;32m--> 548\u001b[0m \u001b[39mwith\u001b[39;00m Popen(\u001b[39m*\u001b[39;49mpopenargs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs) \u001b[39mas\u001b[39;00m process:\n\u001b[0;32m 549\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 550\u001b[0m stdout, stderr \u001b[39m=\u001b[39m process\u001b[39m.\u001b[39mcommunicate(\u001b[39minput\u001b[39m, timeout\u001b[39m=\u001b[39mtimeout)\n", "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\subprocess.py:1026\u001b[0m, in \u001b[0;36mPopen.__init__\u001b[1;34m(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, user, group, extra_groups, encoding, errors, text, umask, pipesize, process_group)\u001b[0m\n\u001b[0;32m 1022\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtext_mode:\n\u001b[0;32m 1023\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstderr \u001b[39m=\u001b[39m io\u001b[39m.\u001b[39mTextIOWrapper(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstderr,\n\u001b[0;32m 1024\u001b[0m encoding\u001b[39m=\u001b[39mencoding, errors\u001b[39m=\u001b[39merrors)\n\u001b[1;32m-> 1026\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_execute_child(args, executable, preexec_fn, close_fds,\n\u001b[0;32m 1027\u001b[0m pass_fds, cwd, env,\n\u001b[0;32m 1028\u001b[0m startupinfo, creationflags, shell,\n\u001b[0;32m 1029\u001b[0m p2cread, p2cwrite,\n\u001b[0;32m 1030\u001b[0m c2pread, c2pwrite,\n\u001b[0;32m 1031\u001b[0m errread, errwrite,\n\u001b[0;32m 1032\u001b[0m restore_signals,\n\u001b[0;32m 1033\u001b[0m gid, gids, uid, umask,\n\u001b[0;32m 1034\u001b[0m start_new_session, process_group)\n\u001b[0;32m 1035\u001b[0m \u001b[39mexcept\u001b[39;00m:\n\u001b[0;32m 1036\u001b[0m \u001b[39m# Cleanup if the child failed starting.\u001b[39;00m\n\u001b[0;32m 1037\u001b[0m \u001b[39mfor\u001b[39;00m f \u001b[39min\u001b[39;00m \u001b[39mfilter\u001b[39m(\u001b[39mNone\u001b[39;00m, (\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstdin, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstdout, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstderr)):\n", "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\subprocess.py:1538\u001b[0m, in \u001b[0;36mPopen._execute_child\u001b[1;34m(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, unused_restore_signals, unused_gid, unused_gids, unused_uid, unused_umask, unused_start_new_session, unused_process_group)\u001b[0m\n\u001b[0;32m 1536\u001b[0m \u001b[39m# Start the process\u001b[39;00m\n\u001b[0;32m 1537\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m-> 1538\u001b[0m hp, ht, pid, tid \u001b[39m=\u001b[39m _winapi\u001b[39m.\u001b[39mCreateProcess(executable, args,\n\u001b[0;32m 1539\u001b[0m \u001b[39m# no special security\u001b[39;00m\n\u001b[0;32m 1540\u001b[0m \u001b[39mNone\u001b[39;00m, \u001b[39mNone\u001b[39;00m,\n\u001b[0;32m 1541\u001b[0m \u001b[39mint\u001b[39m(\u001b[39mnot\u001b[39;00m close_fds),\n\u001b[0;32m 1542\u001b[0m creationflags,\n\u001b[0;32m 1543\u001b[0m env,\n\u001b[0;32m 1544\u001b[0m cwd,\n\u001b[0;32m 1545\u001b[0m startupinfo)\n\u001b[0;32m 1546\u001b[0m \u001b[39mfinally\u001b[39;00m:\n\u001b[0;32m 1547\u001b[0m \u001b[39m# Child is launched. Close the parent's copy of those pipe\u001b[39;00m\n\u001b[0;32m 1548\u001b[0m \u001b[39m# handles that only the child should have open. You need\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 1551\u001b[0m \u001b[39m# pipe will not close when the child process exits and the\u001b[39;00m\n\u001b[0;32m 1552\u001b[0m \u001b[39m# ReadFile will hang.\u001b[39;00m\n\u001b[0;32m 1553\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_close_pipe_fds(p2cread, p2cwrite,\n\u001b[0;32m 1554\u001b[0m c2pread, c2pwrite,\n\u001b[0;32m 1555\u001b[0m errread, errwrite)\n", "\u001b[1;31mFileNotFoundError\u001b[0m: [WinError 2] The system cannot find the file specified" ] } ], "source": [ "all_funcs(\"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\", \"test.mp4\", \"D:/M.S/Boston University/College/Studies/Sem 3 (Fall 23)/Subjects/CS549 - Spark! ML Practicum/Spark/OWL-Detections/YOLOv8 Best Weights.pt\", pr = gr.Progress(track_tqdm = True))" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 626 }, "id": "ZbZKUNl3Mttf", "outputId": "6c0912aa-f2d3-49f7-db58-473905111653" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Running on local URL: http://127.0.0.1:7860\n", "\n", "To create a public link, set `share=True` in `launch()`.\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "