Spaces:

katospiegel
/

odtp-pyannote-whisper

Running

App Files Files Community

odtp-pyannote-whisper / odtp.yml

katospiegel

feat: v0.1.1

aaba65a 22 days ago

raw

history blame contribute delete

3.63 kB

	# Schema version for tracking updates to the schema format
	schema-version: "v0.5.0"

	# Component Information
	component-name: odtp-pyannote-whisper
	component-version: "v0.1.1"
	component-license: Apache 2.0
	component-type: ephemeral
	component-description: Transcribe or translate audio files using Whisper and Pyannote for speaker diarization
	component-authors:
	- name: Carlos Vivar Rios
	orcid: null
	component-repository:
	url: "https://github.com/odtp-org/odtp-pyannote-whisper"
	doi: null
	component-docker-image: null
	tags:
	- audio
	- transcription
	- translation
	- whisper
	- pyannote

	# Tool Information
	tools:
	- tool-name: whisper
	tool-authors:
	- name: OpenAI
	orcid: null
	tool-version: latest
	tool-repository:
	url: "https://github.com/openai/whisper"
	doi: null
	tool-license: MIT

	- tool-name: pyannote
	tool-authors:
	- name: Hervé Bredin
	orcid: null
	tool-version: latest
	tool-repository:
	url: "https://github.com/pyannote/pyannote-audio"
	doi: null
	tool-license: MIT

	# Secrets (ENV variables)
	secrets:
	- name: HF_TOKEN
	description: Hugging Face API token for accessing pyannote models
	type: str

	# Build Arguments (if any)
	build-args: null

	# Exposed Ports
	ports: null

	# Parameters for the Component
	parameters:
	- name: MODEL
	default-value: large-v3
	datatype: str
	description: Whisper model to use for transcription/translation
	parameter-bounds: null
	options:
	- tiny
	- base
	- small
	- medium
	- large
	- large-v2
	- large-v3
	allow-custom-value: false

	- name: TASK
	default-value: transcribe
	datatype: str
	description: Task to perform (transcribe or translate)
	parameter-bounds: null
	options:
	- transcribe
	- translate
	allow-custom-value: false

	- name: LANGUAGE
	default-value: auto
	datatype: str
	description: Source language code (use 'auto' for auto-detection)
	parameter-bounds: null
	options:
	- auto
	- en
	- es
	- fr
	- de
	- it
	- pt
	- nl
	- ja
	- zh
	- ru
	allow-custom-value: true

	# Data Inputs
	data-inputs:
	- name: INPUT_FILE
	type: .wav
	path: /odtp/odtp-input
	description: Input audio file in WAV format
	naming-convention: null

	# Data Outputs
	data-outputs:
	- name: OUTPUT_FILE
	type: .srt
	path: /odtp/odtp-output
	description: Transcription/translation output in SRT format with speaker diarization
	naming-convention: null

	- name: OUTPUT_JSON_FILE
	type: .json
	path: /odtp/odtp-output
	description: Transcription/translation output in JSON format with speaker diarization
	naming-convention: null

	- name: OUTPUT_AUDIO_FILE
	type: .wav
	path: /odtp/odtp-output
	description: Audio in wav format
	naming-convention: null

	- name: OUTPUT_PARAGRAPHS_FILE
	type: .json
	path: /odtp/odtp-output
	description: Markdown file with the paragraphs containing speaker diarization and transcription/translation
	naming-convention: null

	- name: OUTPUT_MD_FILE
	type: .md
	path: /odtp/odtp-output
	description: Markdown file with the speaker diarization and transcription/translation
	naming-convention: null

	- name: OUTPUT_PDF_FILE
	type: .pdf
	path: /odtp/odtp-output
	description: PDF file with the speaker diarization and transcription/translation
	naming-convention: null

	# Validation Schemas (Future Development)
	schema-input: null
	schema-output: null

	# Device Requirements
	devices:
	- type: gpu
	required: true