|
|
|
schema-version: "v0.5.0" |
|
|
|
|
|
component-name: odtp-pyannote-whisper |
|
component-version: "v0.1.1" |
|
component-license: Apache 2.0 |
|
component-type: ephemeral |
|
component-description: Transcribe or translate audio files using Whisper and Pyannote for speaker diarization |
|
component-authors: |
|
- name: Carlos Vivar Rios |
|
orcid: null |
|
component-repository: |
|
url: "https://github.com/odtp-org/odtp-pyannote-whisper" |
|
doi: null |
|
component-docker-image: null |
|
tags: |
|
- audio |
|
- transcription |
|
- translation |
|
- whisper |
|
- pyannote |
|
|
|
|
|
tools: |
|
- tool-name: whisper |
|
tool-authors: |
|
- name: OpenAI |
|
orcid: null |
|
tool-version: latest |
|
tool-repository: |
|
url: "https://github.com/openai/whisper" |
|
doi: null |
|
tool-license: MIT |
|
|
|
- tool-name: pyannote |
|
tool-authors: |
|
- name: Hervé Bredin |
|
orcid: null |
|
tool-version: latest |
|
tool-repository: |
|
url: "https://github.com/pyannote/pyannote-audio" |
|
doi: null |
|
tool-license: MIT |
|
|
|
|
|
secrets: |
|
- name: HF_TOKEN |
|
description: Hugging Face API token for accessing pyannote models |
|
type: str |
|
|
|
|
|
build-args: null |
|
|
|
|
|
ports: null |
|
|
|
|
|
parameters: |
|
- name: MODEL |
|
default-value: large-v3 |
|
datatype: str |
|
description: Whisper model to use for transcription/translation |
|
parameter-bounds: null |
|
options: |
|
- tiny |
|
- base |
|
- small |
|
- medium |
|
- large |
|
- large-v2 |
|
- large-v3 |
|
allow-custom-value: false |
|
|
|
- name: TASK |
|
default-value: transcribe |
|
datatype: str |
|
description: Task to perform (transcribe or translate) |
|
parameter-bounds: null |
|
options: |
|
- transcribe |
|
- translate |
|
allow-custom-value: false |
|
|
|
- name: LANGUAGE |
|
default-value: auto |
|
datatype: str |
|
description: Source language code (use 'auto' for auto-detection) |
|
parameter-bounds: null |
|
options: |
|
- auto |
|
- en |
|
- es |
|
- fr |
|
- de |
|
- it |
|
- pt |
|
- nl |
|
- ja |
|
- zh |
|
- ru |
|
allow-custom-value: true |
|
|
|
|
|
data-inputs: |
|
- name: INPUT_FILE |
|
type: .wav |
|
path: /odtp/odtp-input |
|
description: Input audio file in WAV format |
|
naming-convention: null |
|
|
|
|
|
data-outputs: |
|
- name: OUTPUT_FILE |
|
type: .srt |
|
path: /odtp/odtp-output |
|
description: Transcription/translation output in SRT format with speaker diarization |
|
naming-convention: null |
|
|
|
- name: OUTPUT_JSON_FILE |
|
type: .json |
|
path: /odtp/odtp-output |
|
description: Transcription/translation output in JSON format with speaker diarization |
|
naming-convention: null |
|
|
|
- name: OUTPUT_AUDIO_FILE |
|
type: .wav |
|
path: /odtp/odtp-output |
|
description: Audio in wav format |
|
naming-convention: null |
|
|
|
- name: OUTPUT_PARAGRAPHS_FILE |
|
type: .json |
|
path: /odtp/odtp-output |
|
description: Markdown file with the paragraphs containing speaker diarization and transcription/translation |
|
naming-convention: null |
|
|
|
- name: OUTPUT_MD_FILE |
|
type: .md |
|
path: /odtp/odtp-output |
|
description: Markdown file with the speaker diarization and transcription/translation |
|
naming-convention: null |
|
|
|
- name: OUTPUT_PDF_FILE |
|
type: .pdf |
|
path: /odtp/odtp-output |
|
description: PDF file with the speaker diarization and transcription/translation |
|
naming-convention: null |
|
|
|
|
|
schema-input: null |
|
schema-output: null |
|
|
|
|
|
devices: |
|
- type: gpu |
|
required: true |