File size: 3,625 Bytes
6934a38 aaba65a c72eec6 6934a38 2a1a32a 6934a38 2a1a32a 6934a38 2a1a32a 6934a38 2a1a32a 6934a38 2a1a32a 6934a38 2a1a32a 6934a38 2a1a32a 6934a38 2a1a32a 6934a38 2a1a32a 6934a38 2a1a32a 6934a38 2a1a32a 6934a38 2a1a32a 6934a38 2a1a32a 6934a38 2a1a32a c72eec6 6934a38 2a1a32a 6934a38 2a1a32a 6934a38 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
# Schema version for tracking updates to the schema format
schema-version: "v0.5.0"
# Component Information
component-name: odtp-pyannote-whisper
component-version: "v0.1.1"
component-license: Apache 2.0
component-type: ephemeral
component-description: Transcribe or translate audio files using Whisper and Pyannote for speaker diarization
component-authors:
- name: Carlos Vivar Rios
orcid: null
component-repository:
url: "https://github.com/odtp-org/odtp-pyannote-whisper"
doi: null
component-docker-image: null
tags:
- audio
- transcription
- translation
- whisper
- pyannote
# Tool Information
tools:
- tool-name: whisper
tool-authors:
- name: OpenAI
orcid: null
tool-version: latest
tool-repository:
url: "https://github.com/openai/whisper"
doi: null
tool-license: MIT
- tool-name: pyannote
tool-authors:
- name: Hervé Bredin
orcid: null
tool-version: latest
tool-repository:
url: "https://github.com/pyannote/pyannote-audio"
doi: null
tool-license: MIT
# Secrets (ENV variables)
secrets:
- name: HF_TOKEN
description: Hugging Face API token for accessing pyannote models
type: str
# Build Arguments (if any)
build-args: null
# Exposed Ports
ports: null
# Parameters for the Component
parameters:
- name: MODEL
default-value: large-v3
datatype: str
description: Whisper model to use for transcription/translation
parameter-bounds: null
options:
- tiny
- base
- small
- medium
- large
- large-v2
- large-v3
allow-custom-value: false
- name: TASK
default-value: transcribe
datatype: str
description: Task to perform (transcribe or translate)
parameter-bounds: null
options:
- transcribe
- translate
allow-custom-value: false
- name: LANGUAGE
default-value: auto
datatype: str
description: Source language code (use 'auto' for auto-detection)
parameter-bounds: null
options:
- auto
- en
- es
- fr
- de
- it
- pt
- nl
- ja
- zh
- ru
allow-custom-value: true
# Data Inputs
data-inputs:
- name: INPUT_FILE
type: .wav
path: /odtp/odtp-input
description: Input audio file in WAV format
naming-convention: null
# Data Outputs
data-outputs:
- name: OUTPUT_FILE
type: .srt
path: /odtp/odtp-output
description: Transcription/translation output in SRT format with speaker diarization
naming-convention: null
- name: OUTPUT_JSON_FILE
type: .json
path: /odtp/odtp-output
description: Transcription/translation output in JSON format with speaker diarization
naming-convention: null
- name: OUTPUT_AUDIO_FILE
type: .wav
path: /odtp/odtp-output
description: Audio in wav format
naming-convention: null
- name: OUTPUT_PARAGRAPHS_FILE
type: .json
path: /odtp/odtp-output
description: Markdown file with the paragraphs containing speaker diarization and transcription/translation
naming-convention: null
- name: OUTPUT_MD_FILE
type: .md
path: /odtp/odtp-output
description: Markdown file with the speaker diarization and transcription/translation
naming-convention: null
- name: OUTPUT_PDF_FILE
type: .pdf
path: /odtp/odtp-output
description: PDF file with the speaker diarization and transcription/translation
naming-convention: null
# Validation Schemas (Future Development)
schema-input: null
schema-output: null
# Device Requirements
devices:
- type: gpu
required: true |