Spaces:
Running
Running
alessandro trinca tornidor
commited on
Commit
•
0b0c1a6
1
Parent(s):
6468ecf
feat: use structlog to handle logging, add some logs
Browse files
aip_trainer/lambdas/lambdaSpeechToScore.py
CHANGED
@@ -60,8 +60,10 @@ def lambda_handler(event, context):
|
|
60 |
duration = time.time() - start
|
61 |
app_logger.info(f'Loaded .ogg file {random_file_name} in {duration}s.')
|
62 |
|
63 |
-
|
64 |
-
|
|
|
|
|
65 |
|
66 |
start = time.time()
|
67 |
os.remove(random_file_name)
|
|
|
60 |
duration = time.time() - start
|
61 |
app_logger.info(f'Loaded .ogg file {random_file_name} in {duration}s.')
|
62 |
|
63 |
+
language_trainer_sst_lambda = trainer_SST_lambda[language]
|
64 |
+
app_logger.info(f'language_trainer_sst_lambda: preparing...')
|
65 |
+
result = language_trainer_sst_lambda.processAudioForGivenText(signal, real_text)
|
66 |
+
app_logger.info(f'language_trainer_sst_lambda: result: {result}...')
|
67 |
|
68 |
start = time.time()
|
69 |
os.remove(random_file_name)
|
aip_trainer/models/RuleBasedModels.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import eng_to_ipa
|
2 |
|
3 |
from aip_trainer.models import ModelInterfaces
|
|
|
4 |
|
5 |
|
6 |
class EpitranPhonemConverter(ModelInterfaces.ITextToPhonemModel):
|
@@ -12,7 +13,10 @@ class EpitranPhonemConverter(ModelInterfaces.ITextToPhonemModel):
|
|
12 |
self.epitran_model = epitran_model
|
13 |
|
14 |
def convertToPhonem(self, sentence: str) -> str:
|
|
|
|
|
15 |
phonem_representation = self.epitran_model.transliterate(sentence)
|
|
|
16 |
return phonem_representation
|
17 |
|
18 |
|
|
|
1 |
import eng_to_ipa
|
2 |
|
3 |
from aip_trainer.models import ModelInterfaces
|
4 |
+
from aip_trainer import app_logger
|
5 |
|
6 |
|
7 |
class EpitranPhonemConverter(ModelInterfaces.ITextToPhonemModel):
|
|
|
13 |
self.epitran_model = epitran_model
|
14 |
|
15 |
def convertToPhonem(self, sentence: str) -> str:
|
16 |
+
|
17 |
+
app_logger.info(f'starting EpitranPhonemConverter.convertToPhonem...')
|
18 |
phonem_representation = self.epitran_model.transliterate(sentence)
|
19 |
+
app_logger.info(f'EpitranPhonemConverter: got phonem_representation!')
|
20 |
return phonem_representation
|
21 |
|
22 |
|
aip_trainer/pronunciationTrainer.py
CHANGED
@@ -21,15 +21,14 @@ def getTrainer(language: str):
|
|
21 |
asr_model = AIModels.NeuralASR(model, decoder)
|
22 |
|
23 |
if language == 'de':
|
24 |
-
|
25 |
-
|
26 |
elif language == 'en':
|
27 |
phonem_converter = RuleBasedModels.EngPhonemConverter()
|
28 |
else:
|
29 |
raise ValueError('Language not implemented')
|
30 |
|
31 |
-
trainer = PronunciationTrainer(
|
32 |
-
asr_model, phonem_converter)
|
33 |
|
34 |
return trainer
|
35 |
|
@@ -82,8 +81,8 @@ class PronunciationTrainer:
|
|
82 |
def processAudioForGivenText(self, recordedAudio: torch.Tensor = None, real_text=None):
|
83 |
|
84 |
start = time.time()
|
85 |
-
|
86 |
-
|
87 |
|
88 |
duration = time.time() - start
|
89 |
app_logger.info(f'Time for NN to transcript audio: {duration}.')
|
@@ -114,16 +113,19 @@ class PronunciationTrainer:
|
|
114 |
def getAudioTranscript(self, recordedAudio: torch.Tensor = None):
|
115 |
current_recorded_audio = recordedAudio
|
116 |
|
117 |
-
|
118 |
-
|
119 |
|
|
|
120 |
self.asr_model.processAudio(current_recorded_audio)
|
121 |
|
|
|
122 |
current_recorded_transcript, current_recorded_word_locations = self.getTranscriptAndWordsLocations(
|
123 |
current_recorded_audio.shape[1])
|
124 |
-
|
125 |
-
|
126 |
|
|
|
127 |
return current_recorded_transcript, current_recorded_ipa, current_recorded_word_locations
|
128 |
|
129 |
def getWordLocationsFromRecordInSeconds(self, word_locations, mapped_words_indices) -> list:
|
|
|
21 |
asr_model = AIModels.NeuralASR(model, decoder)
|
22 |
|
23 |
if language == 'de':
|
24 |
+
epitran_deu_latn = epitran.Epitran('deu-Latn')
|
25 |
+
phonem_converter = RuleBasedModels.EpitranPhonemConverter(epitran_deu_latn)
|
26 |
elif language == 'en':
|
27 |
phonem_converter = RuleBasedModels.EngPhonemConverter()
|
28 |
else:
|
29 |
raise ValueError('Language not implemented')
|
30 |
|
31 |
+
trainer = PronunciationTrainer(asr_model, phonem_converter)
|
|
|
32 |
|
33 |
return trainer
|
34 |
|
|
|
81 |
def processAudioForGivenText(self, recordedAudio: torch.Tensor = None, real_text=None):
|
82 |
|
83 |
start = time.time()
|
84 |
+
app_logger.info(f'starting getAudioTranscript...')
|
85 |
+
recording_transcript, recording_ipa, word_locations = self.getAudioTranscript(recordedAudio)
|
86 |
|
87 |
duration = time.time() - start
|
88 |
app_logger.info(f'Time for NN to transcript audio: {duration}.')
|
|
|
113 |
def getAudioTranscript(self, recordedAudio: torch.Tensor = None):
|
114 |
current_recorded_audio = recordedAudio
|
115 |
|
116 |
+
app_logger.info(f'starting preprocessAudio...')
|
117 |
+
current_recorded_audio = self.preprocessAudio(current_recorded_audio)
|
118 |
|
119 |
+
app_logger.info(f'starting processAudio...')
|
120 |
self.asr_model.processAudio(current_recorded_audio)
|
121 |
|
122 |
+
app_logger.info(f'starting getTranscriptAndWordsLocations...')
|
123 |
current_recorded_transcript, current_recorded_word_locations = self.getTranscriptAndWordsLocations(
|
124 |
current_recorded_audio.shape[1])
|
125 |
+
app_logger.info(f'starting convertToPhonem...')
|
126 |
+
current_recorded_ipa = self.ipa_converter.convertToPhonem(current_recorded_transcript)
|
127 |
|
128 |
+
app_logger.info(f'ok, return audio transcript!')
|
129 |
return current_recorded_transcript, current_recorded_ipa, current_recorded_word_locations
|
130 |
|
131 |
def getWordLocationsFromRecordInSeconds(self, word_locations, mapped_words_indices) -> list:
|
tests/__init__.py
CHANGED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
import structlog
|
4 |
+
from aip_trainer import PROJECT_ROOT_FOLDER, LOG_JSON_FORMAT
|
5 |
+
from aip_trainer.utils import session_logger
|
6 |
+
|
7 |
+
|
8 |
+
TEST_ROOT_FOLDER = PROJECT_ROOT_FOLDER / "tests"
|
9 |
+
EVENTS_FOLDER = TEST_ROOT_FOLDER / "events"
|
10 |
+
log_level = os.getenv("LOG_LEVEL", "INFO")
|
11 |
+
session_logger.setup_logging(json_logs=LOG_JSON_FORMAT, log_level=log_level)
|
12 |
+
test_logger = structlog.stdlib.get_logger(__name__)
|