AshDavid12 commited on
Commit
d1f9a9c
·
1 Parent(s): b7e558a

hf home change

Browse files
Files changed (3) hide show
  1. Dockerfile +3 -8
  2. infer.py +23 -80
  3. requirements.txt +1 -3
Dockerfile CHANGED
@@ -1,5 +1,5 @@
1
  # Use an official Python runtime as a base image
2
- from python:3.11.1-buster
3
 
4
  # Set the working directory
5
  WORKDIR /app
@@ -10,7 +10,6 @@ RUN mkdir -p /app/hf_cache && chmod -R 777 /app/hf_cache
10
  # Set the environment variable for the Hugging Face cache
11
  ENV HF_HOME=/app/hf_cache
12
 
13
-
14
  # Copy the requirements.txt file and install the dependencies
15
  COPY requirements.txt .
16
  RUN pip install --no-cache-dir -r requirements.txt
@@ -18,9 +17,5 @@ RUN pip install --no-cache-dir -r requirements.txt
18
  # Copy the current directory contents into the container at /app
19
  COPY . .
20
 
21
- # Hugging Face Spaces will expose port 7860 by default for web applications
22
- EXPOSE 7860
23
-
24
- # Command to run the transcription script or API server on Hugging Face
25
- CMD ["uvicorn", "infer:app", "--host", "0.0.0.0", "--port", "7860"]
26
-
 
1
  # Use an official Python runtime as a base image
2
+ FROM python:3.11.1-buster
3
 
4
  # Set the working directory
5
  WORKDIR /app
 
10
  # Set the environment variable for the Hugging Face cache
11
  ENV HF_HOME=/app/hf_cache
12
 
 
13
  # Copy the requirements.txt file and install the dependencies
14
  COPY requirements.txt .
15
  RUN pip install --no-cache-dir -r requirements.txt
 
17
  # Copy the current directory contents into the container at /app
18
  COPY . .
19
 
20
+ # Command to run the Python transcription script directly
21
+ CMD ["python", "whisper_test.py"]
 
 
 
 
infer.py CHANGED
@@ -1,95 +1,38 @@
1
  import torch
2
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
3
- import soundfile as sf
4
- from fastapi import FastAPI, File, UploadFile, Form
5
- import uvicorn
6
  import requests
7
- import os
8
- from datetime import datetime
9
-
10
- # Initialize FastAPI app
11
- app = FastAPI()
12
-
13
- # Print initialization of the application
14
- print("FastAPI application started.")
15
 
16
- # Load the Whisper model and processor
17
  model_name = "openai/whisper-base"
18
- print(f"Loading Whisper model: {model_name}")
19
-
20
- try:
21
- processor = WhisperProcessor.from_pretrained(model_name)
22
- model = WhisperForConditionalGeneration.from_pretrained(model_name)
23
- print(f"Model {model_name} successfully loaded.")
24
- except Exception as e:
25
- print(f"Error loading the model: {e}")
26
- raise e
27
 
28
- # Move model to the appropriate device (GPU if available)
29
  device = "cuda" if torch.cuda.is_available() else "cpu"
30
  model.to(device)
31
- print(f"Model is using device: {device}")
32
-
33
-
34
- @app.post("/transcribe/")
35
- def transcribe_audio_url(audio_url: str = Form(...)):
36
- # Download the audio file from the provided URL
37
- try:
38
- response = requests.get(audio_url)
39
- if response.status_code != 200:
40
- return {"error": f"Failed to download audio from URL. Status code: {response.status_code}"}
41
- print(f"Successfully downloaded audio from URL: {audio_url}")
42
- audio_data = io.BytesIO(response.content) # Store audio data in memory
43
- except Exception as e:
44
- print(f"Error downloading the audio file: {e}")
45
- return {"error": f"Error downloading the audio file: {e}"}
46
-
47
- # Process the audio
48
- try:
49
- audio_input, _ = sf.read(audio_data) # Read the audio from the in-memory BytesIO
50
- print(f"Audio file from URL successfully read.")
51
- except Exception as e:
52
- print(f"Error reading the audio file: {e}")
53
- return {"error": f"Error reading the audio file: {e}"}
54
-
55
- # Preprocess the audio for Whisper
56
- try:
57
- inputs = processor(audio_input, return_tensors="pt", sampling_rate=16000)
58
- print(f"Audio file preprocessed for transcription.")
59
- except Exception as e:
60
- print(f"Error processing the audio file: {e}")
61
- return {"error": f"Error processing the audio file: {e}"}
62
 
63
- # Move inputs to the appropriate device
64
- inputs = {key: value.to(device) for key, value in inputs.items()}
65
- print("Inputs moved to the appropriate device.")
66
 
67
- # Generate the transcription
68
- try:
69
- with torch.no_grad():
70
- predicted_ids = model.generate(inputs["input_features"])
71
- print("Transcription successfully generated.")
72
- except Exception as e:
73
- print(f"Error during transcription generation: {e}")
74
- return {"error": f"Error during transcription generation: {e}"}
75
 
76
- # Decode the transcription
77
- try:
78
- transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
79
- print("Transcription successfully decoded.")
80
- except Exception as e:
81
- print(f"Error decoding the transcription: {e}")
82
- return {"error": f"Error decoding the transcription: {e}"}
83
 
84
- return {"transcription": transcription}
 
 
85
 
86
- @app.get("/")
87
- def read_root():
88
- return {"message": "Welcome to the Whisper transcription API"}
89
 
90
- if __name__ == "__main__":
91
- # Print when starting the FastAPI server
92
- print("Starting FastAPI server with Uvicorn...")
93
 
94
- # Run the FastAPI app on the default port (7860)
95
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
  import torch
2
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
 
 
 
3
  import requests
4
+ import soundfile as sf
5
+ import io
 
 
 
 
 
 
6
 
7
+ # Load the Whisper model and processor from Hugging Face Model Hub
8
  model_name = "openai/whisper-base"
9
+ processor = WhisperProcessor.from_pretrained(model_name)
10
+ model = WhisperForConditionalGeneration.from_pretrained(model_name)
 
 
 
 
 
 
 
11
 
12
+ # Use GPU if available, otherwise use CPU
13
  device = "cuda" if torch.cuda.is_available() else "cpu"
14
  model.to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
+ # URL of the audio file
17
+ audio_url = "https://www.signalogic.com/melp/EngSamples/Orig/male.wav"
 
18
 
19
+ # Download the audio file
20
+ response = requests.get(audio_url)
21
+ audio_data = io.BytesIO(response.content)
 
 
 
 
 
22
 
23
+ # Read the audio using soundfile
24
+ audio_input, _ = sf.read(audio_data)
 
 
 
 
 
25
 
26
+ # Preprocess the audio for Whisper
27
+ inputs = processor(audio_input, return_tensors="pt", sampling_rate=16000)
28
+ inputs = {key: value.to(device) for key, value in inputs.items()}
29
 
30
+ # Generate the transcription
31
+ with torch.no_grad():
32
+ predicted_ids = model.generate(inputs["input_features"])
33
 
34
+ # Decode the transcription
35
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
 
36
 
37
+ # Print the transcription result
38
+ print("Transcription:", transcription)
requirements.txt CHANGED
@@ -1,8 +1,6 @@
1
- fastapi
2
- uvicorn
3
  torch
4
  whisper
5
- python-multipart
6
  requests
7
  transformers
8
  soundfile
 
 
 
 
1
  torch
2
  whisper
 
3
  requests
4
  transformers
5
  soundfile
6
+