Update app.py
Browse files
app.py
CHANGED
@@ -18,14 +18,23 @@ device = 0 if torch.cuda.is_available() else -1 # Use GPU if available, otherwi
|
|
18 |
# Load model and processor
|
19 |
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
|
20 |
processor = WhisperProcessor.from_pretrained("openai/whisper-small")
|
|
|
21 |
|
22 |
# Set forced language to Portuguese (pt)
|
23 |
-
forced_language_token_id =
|
24 |
model.config.forced_decoder_ids = [[2, forced_language_token_id]] # `2` refers to the decoder start token.
|
25 |
|
26 |
-
# Initialize the pipeline
|
27 |
-
asr_pipeline = pipeline(task="automatic-speech-recognition", model=model, processor=processor, device=device)
|
28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
# Basic GET endpoint
|
31 |
@app.get("/")
|
|
|
18 |
# Load model and processor
|
19 |
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
|
20 |
processor = WhisperProcessor.from_pretrained("openai/whisper-small")
|
21 |
+
tokenizer = processor.tokenizer # Explicitly extract the tokenizer from the processor
|
22 |
|
23 |
# Set forced language to Portuguese (pt)
|
24 |
+
forced_language_token_id = tokenizer.convert_tokens_to_ids("<|pt|>")
|
25 |
model.config.forced_decoder_ids = [[2, forced_language_token_id]] # `2` refers to the decoder start token.
|
26 |
|
|
|
|
|
27 |
|
28 |
+
# Initialize the pipeline
|
29 |
+
# asr_pipeline = pipeline(task="automatic-speech-recognition", model=model, processor=processor, device=device)
|
30 |
+
# Initialize the pipeline
|
31 |
+
asr_pipeline = pipeline(
|
32 |
+
task="automatic-speech-recognition",
|
33 |
+
model=model,
|
34 |
+
tokenizer=tokenizer, # Pass the tokenizer explicitly
|
35 |
+
feature_extractor=processor.feature_extractor, # Pass the feature extractor explicitly
|
36 |
+
device=device
|
37 |
+
)
|
38 |
|
39 |
# Basic GET endpoint
|
40 |
@app.get("/")
|