Spaces:

Omkar008
/

whisper_transcription_api

Sleeping

App Files Files Community

Omkar008 commited on about 1 month ago

Commit

9f559c6

verified ·

1 Parent(s): f18ccfe

adding the code files

Browse files

Files changed (8) hide show

Dockerfile +50 -0
config.py +13 -0
controllers/transcription_controller.py +15 -0
main.py +13 -0
models/schema.py +11 -0
requirements.txt +51 -0
routers/transcription.py +15 -0
services/whisper_service.py +46 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,50 @@

+# Use Python base image
+FROM python:3.12.2
+# Prevent timezone prompt during package installation
+ENV DEBIAN_FRONTEND=noninteractive
+ENV TZ=UTC
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    ffmpeg \
+    git \
+    curl \
+    build-essential \
+    cargo \
+    pkg-config \
+    libssl-dev \
+    && rm -rf /var/lib/apt/lists/*
+# Install Rust for setuptools-rust
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
+ENV PATH="/root/.cargo/bin:${PATH}"
+# Set working directory
+WORKDIR /app
+# Copy application code
+COPY . /app
+# Install Python dependencies
+RUN pip install --no-cache-dir -U pip setuptools wheel
+RUN pip install --no-cache-dir setuptools-rust
+RUN pip install --no-cache-dir torch==2.0.1+cu118 torchaudio==2.0.2+cu118 -f https://download.pytorch.org/whl/torch_stable.html
+RUN pip install --no-cache-dir -r requirements.txt
+# Create and switch to a non-root user
+RUN useradd -m -u 1000 user
+USER user
+# Ensure ownership of /app directory
+COPY --chown=user . /app/
+# Environment variables for PyTorch and Whisper
+ENV TORCH_DEVICE="cuda"
+ENV FORCE_FP32="false"
+# Expose port
+EXPOSE 7860
+# Start the FastAPI application
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "5"]

config.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from pydantic_settings import BaseSettings
+from dotenv import load_dotenv
+load_dotenv()
+import os
+class Settings(BaseSettings):
+    WHISPER_MODEL: str = "base"  # Options: tiny, base, small, medium, large, large-v2, turbo
+    TEMP_UPLOAD_DIR: str = "temp_uploads"
+    TORCH_DEVICE: str = os.getenv("TORCH_DEVICE", "cpu")
+    FORCE_FP32: bool = os.getenv("FORCE_FP32", "false").lower() == "true"
+settings = Settings()

controllers/transcription_controller.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from fastapi import HTTPException
+from services.whisper_service import WhisperService
+from models.schema import TranscriptionResponse
+class TranscriptionController:
+    def __init__(self):
+        self.whisper_service = WhisperService()
+    async def transcribe_audio(self, audio_file: bytes, output_language: str = None) -> TranscriptionResponse:
+        try:
+            result = await self.whisper_service.transcribe(audio_file, output_language)
+            return TranscriptionResponse(**result)
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=str(e))

main.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from fastapi import FastAPI
+from routers import transcription
+app = FastAPI(title="Whisper Transcription API",docs_url="/")
+app.include_router(transcription.router, prefix="/api/v1", tags=["transcription"])
+@app.get("/")
+async def root():
+    return {
+        "message": "Welcome to Whisper Transcription API",
+        "docs_url": "/docs"
+    }

models/schema.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from pydantic import BaseModel
+from typing import Optional
+class TranscriptionRequest(BaseModel):
+    output_language: str = "en"
+class TranscriptionResponse(BaseModel):
+    text: str
+    language: Optional[str]
+    segments: Optional[list]

requirements.txt ADDED Viewed

	@@ -0,0 +1,51 @@

+accelerate==1.4.0
+audioread==3.0.1
+certifi==2025.1.31
+cffi==1.17.1
+charset-normalizer==3.4.1
+decorator==5.2.1
+filelock==3.13.1
+fsspec==2024.6.1
+huggingface-hub==0.29.1
+idna==3.10
+Jinja2==3.1.4
+joblib==1.4.2
+lazy_loader==0.4
+librosa==0.10.2.post1
+llvmlite==0.44.0
+MarkupSafe==2.1.5
+mpmath==1.3.0
+msgpack==1.1.0
+networkx==3.3
+numba==0.61.0
+numpy==2.1.2
+packaging==24.2
+pillow==11.0.0
+platformdirs==4.3.6
+pooch==1.8.2
+psutil==7.0.0
+pycparser==2.22
+PyYAML==6.0.2
+regex==2024.11.6
+requests==2.32.3
+safetensors==0.5.2
+scikit-learn==1.6.1
+scipy==1.15.2
+setuptools==70.2.0
+soundfile==0.13.1
+soxr==0.5.0.post1
+sympy==1.13.1
+threadpoolctl==3.5.0
+tokenizers==0.21.0
+torch==2.6.0
+torchaudio==2.6.0
+torchvision==0.21.0
+tqdm==4.67.1
+transformers==4.49.0
+typing_extensions==4.12.2
+urllib3==2.3.0
+pydantic~=2.10.6
+fastapi~=0.115.8
+pydantic-settings~=2.8.0
+openai-whisper~=20240930
+python-dotenv~=1.0.1

routers/transcription.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from fastapi import APIRouter, UploadFile, File, Form
+from typing import Optional
+from controllers.transcription_controller import TranscriptionController
+from models.schema import TranscriptionResponse
+router = APIRouter()
+controller = TranscriptionController()
+@router.post("/transcribe", response_model=TranscriptionResponse)
+async def transcribe_audio(
+    audio_file: UploadFile = File(...),
+    output_language: Optional[str] = Form(None)
+):
+    contents = await audio_file.read()
+    return await controller.transcribe_audio(contents, output_language)

services/whisper_service.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import whisper
+import tempfile
+import os
+import torch
+from config import settings
+class WhisperService:
+    def __init__(self):
+        if settings.TORCH_DEVICE == "cuda" and not torch.cuda.is_available():
+            print("WARNING: CUDA requested but not available. Falling back to CPU.")
+            self.device = "cpu"
+        else:
+            self.device = settings.TORCH_DEVICE
+        self.model = whisper.load_model(settings.WHISPER_MODEL)
+        if settings.FORCE_FP32 or self.device == "cpu":
+            self.model = self.model.float()
+    async def transcribe(self, audio_file: bytes, output_language: str = None) -> dict:
+        try:
+            # Create a temporary file to store the uploaded audio
+            with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_audio:
+                temp_audio.write(audio_file)
+                temp_audio_path = temp_audio.name
+            try:
+                # Transcribe the audio
+                transcription_options = {"fp16": not settings.FORCE_FP32 and self.device == "cuda"}
+                if output_language:
+                    transcription_options["language"] = output_language
+                result = self.model.transcribe(temp_audio_path, **transcription_options)
+                return {
+                    "text": result["text"],
+                    "language": result.get("language"),
+                    "segments": result.get("segments")
+                }
+            finally:
+                # Clean up the temporary file
+                if os.path.exists(temp_audio_path):
+                    os.remove(temp_audio_path)
+        except Exception as e:
+            raise Exception(f"Transcription failed: {str(e)}")