yufii commited on
Commit
476003d
·
1 Parent(s): d1dcb47

added coments

Browse files
Files changed (10) hide show
  1. .gitignore +2 -1
  2. __pycache__/app.cpython-311.pyc +0 -0
  3. app.py +39 -13
  4. audio.mp3 +0 -0
  5. forms.py +0 -9
  6. main.py +0 -85
  7. models.py +0 -56
  8. server.log +0 -0
  9. test.py +0 -62
  10. test_audio.mp3 +0 -0
.gitignore CHANGED
@@ -1,4 +1,5 @@
1
  venv
2
  ol.py
3
  server.zip
4
- whisper_cahce
 
 
1
  venv
2
  ol.py
3
  server.zip
4
+ whisper_cahce
5
+ others
__pycache__/app.cpython-311.pyc CHANGED
Binary files a/__pycache__/app.cpython-311.pyc and b/__pycache__/app.cpython-311.pyc differ
 
app.py CHANGED
@@ -17,16 +17,20 @@ from utils import (
17
  pad_or_trim,
18
  )
19
 
 
20
  logging.basicConfig(
21
  level=logging.INFO,
22
  format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
23
  handlers=[logging.StreamHandler()]
24
  )
25
 
 
26
  os.environ['NUMBA_CACHE_DIR'] = '/tmp'
27
 
 
28
  app = FastAPI(port=8000)
29
 
 
30
  app.add_middleware(
31
  CORSMiddleware,
32
  allow_origins=["*"],
@@ -35,18 +39,22 @@ app.add_middleware(
35
  allow_headers=["*"],
36
  )
37
 
38
- filepath = os.path.abspath("cnn_1_v6_final_model.h5")
39
- if not os.path.exists(filepath):
40
- raise FileNotFoundError(f"Model file not found at {filepath}")
41
-
42
  cache_dir = "/tmp/whisper_cache"
43
  os.makedirs(cache_dir, exist_ok=True)
44
  whisper_model = whisper.load_model("tiny", download_root=cache_dir)
45
 
46
- model = keras.models.load_model(filepath, compile=False)
 
 
 
47
 
 
48
  @contextmanager
49
  def temporary_audio_file(audio_bytes):
 
 
 
50
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
51
  tmp_file.write(audio_bytes)
52
  tmp_file.flush()
@@ -57,20 +65,21 @@ def temporary_audio_file(audio_bytes):
57
  if os.path.exists(tmp_filename):
58
  os.remove(tmp_filename)
59
 
 
60
  @app.get("/")
61
  async def read_root():
62
  return {"message": "Welcome to the Defects_model API"}
63
 
64
- filepath = os.path.abspath("cnn_1_v6_final_model.h5")
65
- if not os.path.exists(filepath):
66
- raise FileNotFoundError(f"Model file not found at {filepath}")
67
 
68
  model = keras.models.load_model(filepath, compile=False)
69
  target_shape = (32, 200)
70
 
71
-
72
  @app.post("/save-audio")
73
  async def save_audio(file: UploadFile = File(...)):
 
 
 
74
  if not file.content_type.startswith("audio/"):
75
  raise HTTPException(status_code=400, detail="Invalid file type")
76
 
@@ -87,27 +96,36 @@ async def save_audio(file: UploadFile = File(...)):
87
  except Exception as e:
88
  return JSONResponse(content={"error": str(e)}, status_code=500)
89
 
 
90
  log_file_path = os.path.join("/tmp", "server.log")
91
 
 
92
  logging.basicConfig(
93
  level=logging.INFO,
94
  format="%(asctime)s - %(levelname)s - %(message)s",
95
  handlers=[logging.StreamHandler()]
96
  )
97
 
98
-
99
-
100
  @app.post("/process-audio")
101
  async def process_audio(
102
  audio: UploadFile = File(...),
103
  phrase: str = Form(...)
104
  ):
 
 
 
 
 
 
 
105
  if audio.content_type != "audio/mpeg":
106
  raise HTTPException(
107
  status_code=400, detail="Invalid file type. Only MP3 files are supported."
108
  )
109
 
110
  try:
 
111
  audio_bytes = await audio.read()
112
 
113
  if not audio_bytes:
@@ -115,9 +133,11 @@ async def process_audio(
115
 
116
  logging.info(f"Received audio bytes: {len(audio_bytes)} bytes")
117
 
 
118
  with temporary_audio_file(audio_bytes) as tmp_filename:
119
  logging.info(f"Temporary file created: {tmp_filename}")
120
 
 
121
  audio_data, sample_rate = librosa.load(tmp_filename, sr=None)
122
  logging.info(
123
  f"Audio loaded: sample rate = {sample_rate}, data shape = {audio_data.shape}"
@@ -125,32 +145,38 @@ async def process_audio(
125
  if not audio_data.any() or sample_rate == 0:
126
  raise ValueError("Empty or invalid audio data.")
127
 
 
128
  features = extract_features(audio_data, sample_rate)
129
  logging.info(f"Features extracted: shape = {features.shape}")
130
 
 
131
  target_shape = (1, model.input_shape[1])
132
  features = pad_or_trim(features, target_shape[1])
133
  features = np.expand_dims(features, axis=0)
134
 
 
135
  prediction = model.predict(features)
136
  logging.info(f"Prediction: {prediction}")
137
 
 
138
  transcription_result = whisper_model.transcribe(tmp_filename, language="russian")
139
  transcribed_text = transcription_result["text"].lower().strip()
140
 
141
- # Удаление знаков препинания из транскрибированного текста
142
  transcribed_text_clean = re.sub(r'[^\w\s]', '', transcribed_text)
143
  logging.info(f"Transcribed text (cleaned): {transcribed_text_clean}")
144
 
145
- # Вычисление редакторского расстояния
146
  lev_distance = Levenshtein.distance(transcribed_text_clean, phrase.lower().strip())
147
  phrase_length = max(len(transcribed_text_clean), len(phrase))
148
 
 
149
  max_acceptable_distance = 0.5 * phrase_length
150
  match_phrase = lev_distance <= max_acceptable_distance
151
 
152
  logging.info(f"Expected phrase: {phrase}, Is correct: {match_phrase}, Transcribed text: {transcribed_text_clean}, Levenshtein distance: {lev_distance}")
153
 
 
154
  return {
155
  "prediction": prediction.tolist(),
156
  "match_phrase": match_phrase
 
17
  pad_or_trim,
18
  )
19
 
20
+ #вывод в консоль для просмотри на hugging face
21
  logging.basicConfig(
22
  level=logging.INFO,
23
  format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
24
  handlers=[logging.StreamHandler()]
25
  )
26
 
27
+ # Установка временной директории для кэша Numba
28
  os.environ['NUMBA_CACHE_DIR'] = '/tmp'
29
 
30
+ # Инициализация FastAPI приложения
31
  app = FastAPI(port=8000)
32
 
33
+ # Настройка CORS (Cross-Origin Resource Sharing) для обработки запросов с разных доменов
34
  app.add_middleware(
35
  CORSMiddleware,
36
  allow_origins=["*"],
 
39
  allow_headers=["*"],
40
  )
41
 
42
+ # Инициализация и загрузка модели Whisper для распознавания речи
 
 
 
43
  cache_dir = "/tmp/whisper_cache"
44
  os.makedirs(cache_dir, exist_ok=True)
45
  whisper_model = whisper.load_model("tiny", download_root=cache_dir)
46
 
47
+ # загрузка параметров модели
48
+ filepath = os.path.abspath("cnn_1_v6_final_model.h5")
49
+ if not os.path.exists(filepath):
50
+ raise FileNotFoundError(f"Model file not found at {filepath}")
51
 
52
+ # Контекстный менеджер для временных аудио файлов
53
  @contextmanager
54
  def temporary_audio_file(audio_bytes):
55
+ """
56
+ Создает временный файл для хранения аудио данных и автоматически удаляет его после использования
57
+ """
58
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
59
  tmp_file.write(audio_bytes)
60
  tmp_file.flush()
 
65
  if os.path.exists(tmp_filename):
66
  os.remove(tmp_filename)
67
 
68
+ # Корневой endpoint
69
  @app.get("/")
70
  async def read_root():
71
  return {"message": "Welcome to the Defects_model API"}
72
 
 
 
 
73
 
74
  model = keras.models.load_model(filepath, compile=False)
75
  target_shape = (32, 200)
76
 
77
+ # Endpoint для сохранения аудио файлов
78
  @app.post("/save-audio")
79
  async def save_audio(file: UploadFile = File(...)):
80
+ """
81
+ Обработчик для сохранения загруженных аудио файлов
82
+ """
83
  if not file.content_type.startswith("audio/"):
84
  raise HTTPException(status_code=400, detail="Invalid file type")
85
 
 
96
  except Exception as e:
97
  return JSONResponse(content={"error": str(e)}, status_code=500)
98
 
99
+ # Настройка пути для файла логов
100
  log_file_path = os.path.join("/tmp", "server.log")
101
 
102
+ # Настройка логирования для отслеживания работы сервера
103
  logging.basicConfig(
104
  level=logging.INFO,
105
  format="%(asctime)s - %(levelname)s - %(message)s",
106
  handlers=[logging.StreamHandler()]
107
  )
108
 
109
+ # Основной endpoint для обработки аудио
 
110
  @app.post("/process-audio")
111
  async def process_audio(
112
  audio: UploadFile = File(...),
113
  phrase: str = Form(...)
114
  ):
115
+ """
116
+ Главный обработчик для анализа аудио файлов:
117
+ - Делает предсказание моделью
118
+ - Прогоняет аудио через openai-whisper для проверки фразы
119
+ - Сравнивает полученный текст с ожидаемой фразой
120
+ """
121
+ # Проверка формата файла
122
  if audio.content_type != "audio/mpeg":
123
  raise HTTPException(
124
  status_code=400, detail="Invalid file type. Only MP3 files are supported."
125
  )
126
 
127
  try:
128
+ # Чтение аудио файла
129
  audio_bytes = await audio.read()
130
 
131
  if not audio_bytes:
 
133
 
134
  logging.info(f"Received audio bytes: {len(audio_bytes)} bytes")
135
 
136
+ # Обработка аудио во временном файле
137
  with temporary_audio_file(audio_bytes) as tmp_filename:
138
  logging.info(f"Temporary file created: {tmp_filename}")
139
 
140
+ # Загрузка аудио данных
141
  audio_data, sample_rate = librosa.load(tmp_filename, sr=None)
142
  logging.info(
143
  f"Audio loaded: sample rate = {sample_rate}, data shape = {audio_data.shape}"
 
145
  if not audio_data.any() or sample_rate == 0:
146
  raise ValueError("Empty or invalid audio data.")
147
 
148
+ # Извлечение признаков из аудио
149
  features = extract_features(audio_data, sample_rate)
150
  logging.info(f"Features extracted: shape = {features.shape}")
151
 
152
+ # Подготовка данных для модели
153
  target_shape = (1, model.input_shape[1])
154
  features = pad_or_trim(features, target_shape[1])
155
  features = np.expand_dims(features, axis=0)
156
 
157
+ # Получение предсказания от модели
158
  prediction = model.predict(features)
159
  logging.info(f"Prediction: {prediction}")
160
 
161
+ # Транскрибация аудио с помощью Whisper
162
  transcription_result = whisper_model.transcribe(tmp_filename, language="russian")
163
  transcribed_text = transcription_result["text"].lower().strip()
164
 
165
+ # Очистка транскрибированного текста
166
  transcribed_text_clean = re.sub(r'[^\w\s]', '', transcribed_text)
167
  logging.info(f"Transcribed text (cleaned): {transcribed_text_clean}")
168
 
169
+ # Сравнение с ожидаемой фразой
170
  lev_distance = Levenshtein.distance(transcribed_text_clean, phrase.lower().strip())
171
  phrase_length = max(len(transcribed_text_clean), len(phrase))
172
 
173
+ # Определение допустимого расстояния Левенштейна
174
  max_acceptable_distance = 0.5 * phrase_length
175
  match_phrase = lev_distance <= max_acceptable_distance
176
 
177
  logging.info(f"Expected phrase: {phrase}, Is correct: {match_phrase}, Transcribed text: {transcribed_text_clean}, Levenshtein distance: {lev_distance}")
178
 
179
+ # Возврат результатов
180
  return {
181
  "prediction": prediction.tolist(),
182
  "match_phrase": match_phrase
audio.mp3 DELETED
Binary file (190 kB)
 
forms.py DELETED
@@ -1,9 +0,0 @@
1
- from pydantic import BaseModel
2
-
3
- class UserRegistration(BaseModel):
4
- login: str
5
- password: str
6
-
7
- class UserLoginForm(BaseModel):
8
- login: str
9
- password: str
 
 
 
 
 
 
 
 
 
 
main.py DELETED
@@ -1,85 +0,0 @@
1
- from fastapi import FastAPI, File, UploadFile, HTTPException
2
- from models import User, Course, connection
3
- from forms import UserRegistration, UserLoginForm
4
- from fastapi.responses import JSONResponse
5
- from utils import create_cnn_model, get_features, extract_features, pad_or_trim, noise, stretch, pitch
6
- from peewee import *
7
- import numpy as np
8
- import tensorflow as tf
9
- import keras
10
- import requests
11
- import io
12
- import os
13
-
14
- from fastapi.middleware.cors import CORSMiddleware
15
-
16
- app = FastAPI()
17
-
18
- app.add_middleware(
19
- CORSMiddleware,
20
- allow_origins=["*"],
21
- allow_credentials=True,
22
- allow_methods=["*"],
23
- allow_headers=["*"],
24
- )
25
-
26
- UPLOAD_DIR = 'audio'
27
- os.makedirs(UPLOAD_DIR, exist_ok=True)
28
-
29
- MODEL_SERVER_URL = "http://model-server-url/predict"
30
-
31
- @app.post("/save-audio")
32
- async def save_audio(file: UploadFile = File(...)):
33
- if not file.content_type.startswith('audio/'):
34
- raise HTTPException(status_code=400, detail="Invalid file type")
35
-
36
- file_path = os.path.join(UPLOAD_DIR, file.filename)
37
-
38
- try:
39
- with open(file_path, "wb") as f:
40
- content = await file.read()
41
- f.write(content)
42
- return JSONResponse(content={"message": "File saved successfully", "filePath": file_path}, status_code=200)
43
- except Exception as e:
44
- return JSONResponse(content={"error": str(e)}, status_code=500)
45
-
46
-
47
- model = tf.keras.models.load_model("cnn_1_v6_final_model.keras", compile=False)
48
-
49
- @app.post("/process-audio")
50
- async def process_audio(audio: UploadFile = File(...)):
51
- if audio.content_type != "audio/mpeg":
52
- raise HTTPException(status_code=400, detail="Invalid file type. Please upload an MP3 file.")
53
-
54
- audio_bytes = await audio.read()
55
-
56
- features = get_features(audio_bytes)
57
-
58
- if features is None:
59
- raise HTTPException(status_code=400, detail="Invalid audio file. Please upload a valid MP3 file.")
60
-
61
- prediction = model.predict(np.expand_dims(features, axis=0))
62
-
63
- return {"prediction": prediction}
64
-
65
-
66
- '''
67
- @router.post("/login")
68
- async def login(user_data: UserLoginForm):
69
- user = User.get(User.login == user_data.login)
70
- if not user or user_data.password != user.password:
71
- return {"message": "Invalid login or password"}
72
- token_content = {"user_id": user.user_id}
73
- jwt_token = jwt.encode(token_content, SECRET_KEY, algorithm=ALGORITHM)
74
- return {"token": jwt_token}
75
-
76
-
77
- @router.post("/registration")
78
- async def registration(user_data: UserRegistration):
79
- try:
80
- new_user = User.create(login=user_data.login, password=user_data.password)
81
- new_user.save()
82
- return {"message": "User registered successfully"}
83
- except IntegrityError:
84
- return {"message": "User with this login already exists"}
85
- '''
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models.py DELETED
@@ -1,56 +0,0 @@
1
- from peewee import *
2
-
3
- connection = SqliteDatabase('database.db')
4
-
5
-
6
-
7
- class BaseModel(Model):
8
- class Meta:
9
- database = connection
10
-
11
- class User(BaseModel):
12
- user_id = AutoField()
13
- login = CharField(unique=True)
14
- password = CharField()
15
-
16
- class Meta:
17
- db_table = 'Users'
18
- order_by = ('user_id',)
19
-
20
-
21
- class Course(BaseModel):
22
- course_id = AutoField()
23
- name = CharField()
24
- progress = IntegerField()
25
-
26
- class Meta:
27
- db_table = 'Courses'
28
- order_by = ('course_id',)
29
- from peewee import *
30
-
31
- connection = SqliteDatabase('database.db')
32
-
33
-
34
-
35
- class BaseModel(Model):
36
- class Meta:
37
- database = connection
38
-
39
- class User(BaseModel):
40
- user_id = AutoField()
41
- login = CharField(unique=True)
42
- password = CharField()
43
-
44
- class Meta:
45
- db_table = 'Users'
46
- order_by = ('user_id',)
47
-
48
-
49
- class Course(BaseModel):
50
- course_id = AutoField()
51
- name = CharField()
52
- progress = IntegerField()
53
-
54
- class Meta:
55
- db_table = 'Courses'
56
- order_by = ('course_id',)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
server.log DELETED
File without changes
test.py DELETED
@@ -1,62 +0,0 @@
1
- import os
2
- import numpy as np
3
- import keras
4
- import httpx
5
- import librosa
6
- import whisper
7
-
8
- from utils import (
9
- extract_features,
10
- pad_or_trim,
11
- )
12
-
13
- def test_get_answer(audio_file_path: str):
14
- url = "http://127.0.0.1:8000/process-audio"
15
- headers = {
16
- "accept": "application/json",
17
- }
18
-
19
- with open(audio_file_path, "rb") as audio_file:
20
- files = {
21
- "audio": ("test.mp3", audio_file, "audio/mp3")
22
- }
23
- response = httpx.post(url, headers=headers, files=files)
24
- print("Status Code:", response.status_code)
25
- print("Response JSON:", response.json())
26
-
27
-
28
- audio_file_path = "test_audio.mp3"
29
- if not os.path.exists(audio_file_path):
30
- raise FileNotFoundError(f"Audio file not found at {audio_file_path}")
31
-
32
- audio_data, sample_rate = librosa.load(audio_file_path)
33
-
34
- features = extract_features(audio_data, sample_rate)
35
-
36
- target_shape = (32, 200)
37
- features = pad_or_trim(features, target_shape[1])
38
-
39
-
40
- features = np.expand_dims(features, axis=0)
41
-
42
- filepath = os.path.abspath("cnn_1_v6_final_model.h5")
43
- if not os.path.exists(filepath):
44
- raise FileNotFoundError(f"Model file not found at {filepath}")
45
-
46
- model = keras.models.load_model(filepath, compile=False)
47
-
48
-
49
- prediction = model.predict(features)
50
- print(f"Prediction: {prediction.tolist()}")
51
-
52
-
53
-
54
- def transcribe_russian(audio_file, model_name="tiny"):
55
- model = whisper.load_model(model_name)
56
- result = model.transcribe(audio_file, language="russian")
57
- return result["text"]
58
-
59
- # Example usage:
60
- audio_file = "audio.mp3"
61
- text = transcribe_russian(audio_file)
62
- print(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
test_audio.mp3 DELETED
Binary file (2.71 kB)