|
--- |
|
license: mit |
|
datasets: |
|
- mozilla-foundation/common_voice_17_0 |
|
language: |
|
- ru |
|
base_model: |
|
- dvislobokov/whisper-large-v3-turbo-russian |
|
pipeline_tag: automatic-speech-recognition |
|
--- |
|
|
|
## Example of use this model with faster-whisper |
|
|
|
```python |
|
import io |
|
import json |
|
import logging |
|
import sys |
|
import time |
|
from datetime import datetime |
|
from faster_whisper import WhisperModel |
|
from pydub import AudioSegment |
|
|
|
logging.basicConfig( |
|
level=logging.INFO, |
|
format='%(asctime)s - %(levelname)s - %(message)s', |
|
handlers=[ |
|
logging.FileHandler('faster-whisper.log'), |
|
logging.StreamHandler(sys.stdout) |
|
] |
|
) |
|
model = WhisperModel("/path/to/dvislobokov/faster-whisper-large-v3-turbo-russian", "cpu") |
|
|
|
audio = AudioSegment.from_wav("ezyZip.wav") |
|
chunk_length = 30 * 1000 # in milliseconds |
|
chunks = [audio[i:i + chunk_length] for i in range(0, len(audio), chunk_length)] |
|
|
|
|
|
logging.info(f'Start transcribe at {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}') |
|
start = time.time() |
|
|
|
text = [] |
|
for i, chunk in enumerate(chunks): |
|
buffer = io.BytesIO() |
|
chunk.export(buffer, format="wav") |
|
segments, info = model.transcribe(buffer, language="ru") |
|
text.append("".join(segment.text for segment in segments)) |
|
end = time.time() |
|
logging.info(f'Finish transcribe at {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}') |
|
logging.info(f'Total time: {end - start}') |
|
logging.info(f'Text: {text}') |
|
``` |
|
|
|
|
|
|