Spaces:
Sleeping
Sleeping
added coments
Browse files- .gitignore +2 -1
- __pycache__/app.cpython-311.pyc +0 -0
- app.py +39 -13
- audio.mp3 +0 -0
- forms.py +0 -9
- main.py +0 -85
- models.py +0 -56
- server.log +0 -0
- test.py +0 -62
- test_audio.mp3 +0 -0
.gitignore
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
venv
|
2 |
ol.py
|
3 |
server.zip
|
4 |
-
whisper_cahce
|
|
|
|
1 |
venv
|
2 |
ol.py
|
3 |
server.zip
|
4 |
+
whisper_cahce
|
5 |
+
others
|
__pycache__/app.cpython-311.pyc
CHANGED
Binary files a/__pycache__/app.cpython-311.pyc and b/__pycache__/app.cpython-311.pyc differ
|
|
app.py
CHANGED
@@ -17,16 +17,20 @@ from utils import (
|
|
17 |
pad_or_trim,
|
18 |
)
|
19 |
|
|
|
20 |
logging.basicConfig(
|
21 |
level=logging.INFO,
|
22 |
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
23 |
handlers=[logging.StreamHandler()]
|
24 |
)
|
25 |
|
|
|
26 |
os.environ['NUMBA_CACHE_DIR'] = '/tmp'
|
27 |
|
|
|
28 |
app = FastAPI(port=8000)
|
29 |
|
|
|
30 |
app.add_middleware(
|
31 |
CORSMiddleware,
|
32 |
allow_origins=["*"],
|
@@ -35,18 +39,22 @@ app.add_middleware(
|
|
35 |
allow_headers=["*"],
|
36 |
)
|
37 |
|
38 |
-
|
39 |
-
if not os.path.exists(filepath):
|
40 |
-
raise FileNotFoundError(f"Model file not found at {filepath}")
|
41 |
-
|
42 |
cache_dir = "/tmp/whisper_cache"
|
43 |
os.makedirs(cache_dir, exist_ok=True)
|
44 |
whisper_model = whisper.load_model("tiny", download_root=cache_dir)
|
45 |
|
46 |
-
|
|
|
|
|
|
|
47 |
|
|
|
48 |
@contextmanager
|
49 |
def temporary_audio_file(audio_bytes):
|
|
|
|
|
|
|
50 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
|
51 |
tmp_file.write(audio_bytes)
|
52 |
tmp_file.flush()
|
@@ -57,20 +65,21 @@ def temporary_audio_file(audio_bytes):
|
|
57 |
if os.path.exists(tmp_filename):
|
58 |
os.remove(tmp_filename)
|
59 |
|
|
|
60 |
@app.get("/")
|
61 |
async def read_root():
|
62 |
return {"message": "Welcome to the Defects_model API"}
|
63 |
|
64 |
-
filepath = os.path.abspath("cnn_1_v6_final_model.h5")
|
65 |
-
if not os.path.exists(filepath):
|
66 |
-
raise FileNotFoundError(f"Model file not found at {filepath}")
|
67 |
|
68 |
model = keras.models.load_model(filepath, compile=False)
|
69 |
target_shape = (32, 200)
|
70 |
|
71 |
-
|
72 |
@app.post("/save-audio")
|
73 |
async def save_audio(file: UploadFile = File(...)):
|
|
|
|
|
|
|
74 |
if not file.content_type.startswith("audio/"):
|
75 |
raise HTTPException(status_code=400, detail="Invalid file type")
|
76 |
|
@@ -87,27 +96,36 @@ async def save_audio(file: UploadFile = File(...)):
|
|
87 |
except Exception as e:
|
88 |
return JSONResponse(content={"error": str(e)}, status_code=500)
|
89 |
|
|
|
90 |
log_file_path = os.path.join("/tmp", "server.log")
|
91 |
|
|
|
92 |
logging.basicConfig(
|
93 |
level=logging.INFO,
|
94 |
format="%(asctime)s - %(levelname)s - %(message)s",
|
95 |
handlers=[logging.StreamHandler()]
|
96 |
)
|
97 |
|
98 |
-
|
99 |
-
|
100 |
@app.post("/process-audio")
|
101 |
async def process_audio(
|
102 |
audio: UploadFile = File(...),
|
103 |
phrase: str = Form(...)
|
104 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
if audio.content_type != "audio/mpeg":
|
106 |
raise HTTPException(
|
107 |
status_code=400, detail="Invalid file type. Only MP3 files are supported."
|
108 |
)
|
109 |
|
110 |
try:
|
|
|
111 |
audio_bytes = await audio.read()
|
112 |
|
113 |
if not audio_bytes:
|
@@ -115,9 +133,11 @@ async def process_audio(
|
|
115 |
|
116 |
logging.info(f"Received audio bytes: {len(audio_bytes)} bytes")
|
117 |
|
|
|
118 |
with temporary_audio_file(audio_bytes) as tmp_filename:
|
119 |
logging.info(f"Temporary file created: {tmp_filename}")
|
120 |
|
|
|
121 |
audio_data, sample_rate = librosa.load(tmp_filename, sr=None)
|
122 |
logging.info(
|
123 |
f"Audio loaded: sample rate = {sample_rate}, data shape = {audio_data.shape}"
|
@@ -125,32 +145,38 @@ async def process_audio(
|
|
125 |
if not audio_data.any() or sample_rate == 0:
|
126 |
raise ValueError("Empty or invalid audio data.")
|
127 |
|
|
|
128 |
features = extract_features(audio_data, sample_rate)
|
129 |
logging.info(f"Features extracted: shape = {features.shape}")
|
130 |
|
|
|
131 |
target_shape = (1, model.input_shape[1])
|
132 |
features = pad_or_trim(features, target_shape[1])
|
133 |
features = np.expand_dims(features, axis=0)
|
134 |
|
|
|
135 |
prediction = model.predict(features)
|
136 |
logging.info(f"Prediction: {prediction}")
|
137 |
|
|
|
138 |
transcription_result = whisper_model.transcribe(tmp_filename, language="russian")
|
139 |
transcribed_text = transcription_result["text"].lower().strip()
|
140 |
|
141 |
-
#
|
142 |
transcribed_text_clean = re.sub(r'[^\w\s]', '', transcribed_text)
|
143 |
logging.info(f"Transcribed text (cleaned): {transcribed_text_clean}")
|
144 |
|
145 |
-
#
|
146 |
lev_distance = Levenshtein.distance(transcribed_text_clean, phrase.lower().strip())
|
147 |
phrase_length = max(len(transcribed_text_clean), len(phrase))
|
148 |
|
|
|
149 |
max_acceptable_distance = 0.5 * phrase_length
|
150 |
match_phrase = lev_distance <= max_acceptable_distance
|
151 |
|
152 |
logging.info(f"Expected phrase: {phrase}, Is correct: {match_phrase}, Transcribed text: {transcribed_text_clean}, Levenshtein distance: {lev_distance}")
|
153 |
|
|
|
154 |
return {
|
155 |
"prediction": prediction.tolist(),
|
156 |
"match_phrase": match_phrase
|
|
|
17 |
pad_or_trim,
|
18 |
)
|
19 |
|
20 |
+
#вывод в консоль для просмотри на hugging face
|
21 |
logging.basicConfig(
|
22 |
level=logging.INFO,
|
23 |
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
24 |
handlers=[logging.StreamHandler()]
|
25 |
)
|
26 |
|
27 |
+
# Установка временной директории для кэша Numba
|
28 |
os.environ['NUMBA_CACHE_DIR'] = '/tmp'
|
29 |
|
30 |
+
# Инициализация FastAPI приложения
|
31 |
app = FastAPI(port=8000)
|
32 |
|
33 |
+
# Настройка CORS (Cross-Origin Resource Sharing) для обработки запросов с разных доменов
|
34 |
app.add_middleware(
|
35 |
CORSMiddleware,
|
36 |
allow_origins=["*"],
|
|
|
39 |
allow_headers=["*"],
|
40 |
)
|
41 |
|
42 |
+
# Инициализация и загрузка модели Whisper для распознавания речи
|
|
|
|
|
|
|
43 |
cache_dir = "/tmp/whisper_cache"
|
44 |
os.makedirs(cache_dir, exist_ok=True)
|
45 |
whisper_model = whisper.load_model("tiny", download_root=cache_dir)
|
46 |
|
47 |
+
# загрузка параметров модели
|
48 |
+
filepath = os.path.abspath("cnn_1_v6_final_model.h5")
|
49 |
+
if not os.path.exists(filepath):
|
50 |
+
raise FileNotFoundError(f"Model file not found at {filepath}")
|
51 |
|
52 |
+
# Контекстный менеджер для временных аудио файлов
|
53 |
@contextmanager
|
54 |
def temporary_audio_file(audio_bytes):
|
55 |
+
"""
|
56 |
+
Создает временный файл для хранения аудио данных и автоматически удаляет его после использования
|
57 |
+
"""
|
58 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
|
59 |
tmp_file.write(audio_bytes)
|
60 |
tmp_file.flush()
|
|
|
65 |
if os.path.exists(tmp_filename):
|
66 |
os.remove(tmp_filename)
|
67 |
|
68 |
+
# Корневой endpoint
|
69 |
@app.get("/")
|
70 |
async def read_root():
|
71 |
return {"message": "Welcome to the Defects_model API"}
|
72 |
|
|
|
|
|
|
|
73 |
|
74 |
model = keras.models.load_model(filepath, compile=False)
|
75 |
target_shape = (32, 200)
|
76 |
|
77 |
+
# Endpoint для сохранения аудио файлов
|
78 |
@app.post("/save-audio")
|
79 |
async def save_audio(file: UploadFile = File(...)):
|
80 |
+
"""
|
81 |
+
Обработчик для сохранения загруженных аудио файлов
|
82 |
+
"""
|
83 |
if not file.content_type.startswith("audio/"):
|
84 |
raise HTTPException(status_code=400, detail="Invalid file type")
|
85 |
|
|
|
96 |
except Exception as e:
|
97 |
return JSONResponse(content={"error": str(e)}, status_code=500)
|
98 |
|
99 |
+
# Настройка пути для файла логов
|
100 |
log_file_path = os.path.join("/tmp", "server.log")
|
101 |
|
102 |
+
# Настройка логирования для отслеживания работы сервера
|
103 |
logging.basicConfig(
|
104 |
level=logging.INFO,
|
105 |
format="%(asctime)s - %(levelname)s - %(message)s",
|
106 |
handlers=[logging.StreamHandler()]
|
107 |
)
|
108 |
|
109 |
+
# Основной endpoint для обработки аудио
|
|
|
110 |
@app.post("/process-audio")
|
111 |
async def process_audio(
|
112 |
audio: UploadFile = File(...),
|
113 |
phrase: str = Form(...)
|
114 |
):
|
115 |
+
"""
|
116 |
+
Главный обработчик для анализа аудио файлов:
|
117 |
+
- Делает предсказание моделью
|
118 |
+
- Прогоняет аудио через openai-whisper для проверки фразы
|
119 |
+
- Сравнивает полученный текст с ожидаемой фразой
|
120 |
+
"""
|
121 |
+
# Проверка формата файла
|
122 |
if audio.content_type != "audio/mpeg":
|
123 |
raise HTTPException(
|
124 |
status_code=400, detail="Invalid file type. Only MP3 files are supported."
|
125 |
)
|
126 |
|
127 |
try:
|
128 |
+
# Чтение аудио файла
|
129 |
audio_bytes = await audio.read()
|
130 |
|
131 |
if not audio_bytes:
|
|
|
133 |
|
134 |
logging.info(f"Received audio bytes: {len(audio_bytes)} bytes")
|
135 |
|
136 |
+
# Обработка аудио во временном файле
|
137 |
with temporary_audio_file(audio_bytes) as tmp_filename:
|
138 |
logging.info(f"Temporary file created: {tmp_filename}")
|
139 |
|
140 |
+
# Загрузка аудио данных
|
141 |
audio_data, sample_rate = librosa.load(tmp_filename, sr=None)
|
142 |
logging.info(
|
143 |
f"Audio loaded: sample rate = {sample_rate}, data shape = {audio_data.shape}"
|
|
|
145 |
if not audio_data.any() or sample_rate == 0:
|
146 |
raise ValueError("Empty or invalid audio data.")
|
147 |
|
148 |
+
# Извлечение признаков из аудио
|
149 |
features = extract_features(audio_data, sample_rate)
|
150 |
logging.info(f"Features extracted: shape = {features.shape}")
|
151 |
|
152 |
+
# Подготовка данных для модели
|
153 |
target_shape = (1, model.input_shape[1])
|
154 |
features = pad_or_trim(features, target_shape[1])
|
155 |
features = np.expand_dims(features, axis=0)
|
156 |
|
157 |
+
# Получение предсказания от модели
|
158 |
prediction = model.predict(features)
|
159 |
logging.info(f"Prediction: {prediction}")
|
160 |
|
161 |
+
# Транскрибация аудио с помощью Whisper
|
162 |
transcription_result = whisper_model.transcribe(tmp_filename, language="russian")
|
163 |
transcribed_text = transcription_result["text"].lower().strip()
|
164 |
|
165 |
+
# Очистка транскрибированного текста
|
166 |
transcribed_text_clean = re.sub(r'[^\w\s]', '', transcribed_text)
|
167 |
logging.info(f"Transcribed text (cleaned): {transcribed_text_clean}")
|
168 |
|
169 |
+
# Сравнение с ожидаемой фразой
|
170 |
lev_distance = Levenshtein.distance(transcribed_text_clean, phrase.lower().strip())
|
171 |
phrase_length = max(len(transcribed_text_clean), len(phrase))
|
172 |
|
173 |
+
# Определение допустимого расстояния Левенштейна
|
174 |
max_acceptable_distance = 0.5 * phrase_length
|
175 |
match_phrase = lev_distance <= max_acceptable_distance
|
176 |
|
177 |
logging.info(f"Expected phrase: {phrase}, Is correct: {match_phrase}, Transcribed text: {transcribed_text_clean}, Levenshtein distance: {lev_distance}")
|
178 |
|
179 |
+
# Возврат результатов
|
180 |
return {
|
181 |
"prediction": prediction.tolist(),
|
182 |
"match_phrase": match_phrase
|
audio.mp3
DELETED
Binary file (190 kB)
|
|
forms.py
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
from pydantic import BaseModel
|
2 |
-
|
3 |
-
class UserRegistration(BaseModel):
|
4 |
-
login: str
|
5 |
-
password: str
|
6 |
-
|
7 |
-
class UserLoginForm(BaseModel):
|
8 |
-
login: str
|
9 |
-
password: str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main.py
DELETED
@@ -1,85 +0,0 @@
|
|
1 |
-
from fastapi import FastAPI, File, UploadFile, HTTPException
|
2 |
-
from models import User, Course, connection
|
3 |
-
from forms import UserRegistration, UserLoginForm
|
4 |
-
from fastapi.responses import JSONResponse
|
5 |
-
from utils import create_cnn_model, get_features, extract_features, pad_or_trim, noise, stretch, pitch
|
6 |
-
from peewee import *
|
7 |
-
import numpy as np
|
8 |
-
import tensorflow as tf
|
9 |
-
import keras
|
10 |
-
import requests
|
11 |
-
import io
|
12 |
-
import os
|
13 |
-
|
14 |
-
from fastapi.middleware.cors import CORSMiddleware
|
15 |
-
|
16 |
-
app = FastAPI()
|
17 |
-
|
18 |
-
app.add_middleware(
|
19 |
-
CORSMiddleware,
|
20 |
-
allow_origins=["*"],
|
21 |
-
allow_credentials=True,
|
22 |
-
allow_methods=["*"],
|
23 |
-
allow_headers=["*"],
|
24 |
-
)
|
25 |
-
|
26 |
-
UPLOAD_DIR = 'audio'
|
27 |
-
os.makedirs(UPLOAD_DIR, exist_ok=True)
|
28 |
-
|
29 |
-
MODEL_SERVER_URL = "http://model-server-url/predict"
|
30 |
-
|
31 |
-
@app.post("/save-audio")
|
32 |
-
async def save_audio(file: UploadFile = File(...)):
|
33 |
-
if not file.content_type.startswith('audio/'):
|
34 |
-
raise HTTPException(status_code=400, detail="Invalid file type")
|
35 |
-
|
36 |
-
file_path = os.path.join(UPLOAD_DIR, file.filename)
|
37 |
-
|
38 |
-
try:
|
39 |
-
with open(file_path, "wb") as f:
|
40 |
-
content = await file.read()
|
41 |
-
f.write(content)
|
42 |
-
return JSONResponse(content={"message": "File saved successfully", "filePath": file_path}, status_code=200)
|
43 |
-
except Exception as e:
|
44 |
-
return JSONResponse(content={"error": str(e)}, status_code=500)
|
45 |
-
|
46 |
-
|
47 |
-
model = tf.keras.models.load_model("cnn_1_v6_final_model.keras", compile=False)
|
48 |
-
|
49 |
-
@app.post("/process-audio")
|
50 |
-
async def process_audio(audio: UploadFile = File(...)):
|
51 |
-
if audio.content_type != "audio/mpeg":
|
52 |
-
raise HTTPException(status_code=400, detail="Invalid file type. Please upload an MP3 file.")
|
53 |
-
|
54 |
-
audio_bytes = await audio.read()
|
55 |
-
|
56 |
-
features = get_features(audio_bytes)
|
57 |
-
|
58 |
-
if features is None:
|
59 |
-
raise HTTPException(status_code=400, detail="Invalid audio file. Please upload a valid MP3 file.")
|
60 |
-
|
61 |
-
prediction = model.predict(np.expand_dims(features, axis=0))
|
62 |
-
|
63 |
-
return {"prediction": prediction}
|
64 |
-
|
65 |
-
|
66 |
-
'''
|
67 |
-
@router.post("/login")
|
68 |
-
async def login(user_data: UserLoginForm):
|
69 |
-
user = User.get(User.login == user_data.login)
|
70 |
-
if not user or user_data.password != user.password:
|
71 |
-
return {"message": "Invalid login or password"}
|
72 |
-
token_content = {"user_id": user.user_id}
|
73 |
-
jwt_token = jwt.encode(token_content, SECRET_KEY, algorithm=ALGORITHM)
|
74 |
-
return {"token": jwt_token}
|
75 |
-
|
76 |
-
|
77 |
-
@router.post("/registration")
|
78 |
-
async def registration(user_data: UserRegistration):
|
79 |
-
try:
|
80 |
-
new_user = User.create(login=user_data.login, password=user_data.password)
|
81 |
-
new_user.save()
|
82 |
-
return {"message": "User registered successfully"}
|
83 |
-
except IntegrityError:
|
84 |
-
return {"message": "User with this login already exists"}
|
85 |
-
'''
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
models.py
DELETED
@@ -1,56 +0,0 @@
|
|
1 |
-
from peewee import *
|
2 |
-
|
3 |
-
connection = SqliteDatabase('database.db')
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
class BaseModel(Model):
|
8 |
-
class Meta:
|
9 |
-
database = connection
|
10 |
-
|
11 |
-
class User(BaseModel):
|
12 |
-
user_id = AutoField()
|
13 |
-
login = CharField(unique=True)
|
14 |
-
password = CharField()
|
15 |
-
|
16 |
-
class Meta:
|
17 |
-
db_table = 'Users'
|
18 |
-
order_by = ('user_id',)
|
19 |
-
|
20 |
-
|
21 |
-
class Course(BaseModel):
|
22 |
-
course_id = AutoField()
|
23 |
-
name = CharField()
|
24 |
-
progress = IntegerField()
|
25 |
-
|
26 |
-
class Meta:
|
27 |
-
db_table = 'Courses'
|
28 |
-
order_by = ('course_id',)
|
29 |
-
from peewee import *
|
30 |
-
|
31 |
-
connection = SqliteDatabase('database.db')
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
class BaseModel(Model):
|
36 |
-
class Meta:
|
37 |
-
database = connection
|
38 |
-
|
39 |
-
class User(BaseModel):
|
40 |
-
user_id = AutoField()
|
41 |
-
login = CharField(unique=True)
|
42 |
-
password = CharField()
|
43 |
-
|
44 |
-
class Meta:
|
45 |
-
db_table = 'Users'
|
46 |
-
order_by = ('user_id',)
|
47 |
-
|
48 |
-
|
49 |
-
class Course(BaseModel):
|
50 |
-
course_id = AutoField()
|
51 |
-
name = CharField()
|
52 |
-
progress = IntegerField()
|
53 |
-
|
54 |
-
class Meta:
|
55 |
-
db_table = 'Courses'
|
56 |
-
order_by = ('course_id',)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
server.log
DELETED
File without changes
|
test.py
DELETED
@@ -1,62 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import numpy as np
|
3 |
-
import keras
|
4 |
-
import httpx
|
5 |
-
import librosa
|
6 |
-
import whisper
|
7 |
-
|
8 |
-
from utils import (
|
9 |
-
extract_features,
|
10 |
-
pad_or_trim,
|
11 |
-
)
|
12 |
-
|
13 |
-
def test_get_answer(audio_file_path: str):
|
14 |
-
url = "http://127.0.0.1:8000/process-audio"
|
15 |
-
headers = {
|
16 |
-
"accept": "application/json",
|
17 |
-
}
|
18 |
-
|
19 |
-
with open(audio_file_path, "rb") as audio_file:
|
20 |
-
files = {
|
21 |
-
"audio": ("test.mp3", audio_file, "audio/mp3")
|
22 |
-
}
|
23 |
-
response = httpx.post(url, headers=headers, files=files)
|
24 |
-
print("Status Code:", response.status_code)
|
25 |
-
print("Response JSON:", response.json())
|
26 |
-
|
27 |
-
|
28 |
-
audio_file_path = "test_audio.mp3"
|
29 |
-
if not os.path.exists(audio_file_path):
|
30 |
-
raise FileNotFoundError(f"Audio file not found at {audio_file_path}")
|
31 |
-
|
32 |
-
audio_data, sample_rate = librosa.load(audio_file_path)
|
33 |
-
|
34 |
-
features = extract_features(audio_data, sample_rate)
|
35 |
-
|
36 |
-
target_shape = (32, 200)
|
37 |
-
features = pad_or_trim(features, target_shape[1])
|
38 |
-
|
39 |
-
|
40 |
-
features = np.expand_dims(features, axis=0)
|
41 |
-
|
42 |
-
filepath = os.path.abspath("cnn_1_v6_final_model.h5")
|
43 |
-
if not os.path.exists(filepath):
|
44 |
-
raise FileNotFoundError(f"Model file not found at {filepath}")
|
45 |
-
|
46 |
-
model = keras.models.load_model(filepath, compile=False)
|
47 |
-
|
48 |
-
|
49 |
-
prediction = model.predict(features)
|
50 |
-
print(f"Prediction: {prediction.tolist()}")
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
def transcribe_russian(audio_file, model_name="tiny"):
|
55 |
-
model = whisper.load_model(model_name)
|
56 |
-
result = model.transcribe(audio_file, language="russian")
|
57 |
-
return result["text"]
|
58 |
-
|
59 |
-
# Example usage:
|
60 |
-
audio_file = "audio.mp3"
|
61 |
-
text = transcribe_russian(audio_file)
|
62 |
-
print(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
test_audio.mp3
DELETED
Binary file (2.71 kB)
|
|