Spaces:

aletrn
/

ai-pronunciation-trainer

Running

App Files Files Community

alessandro trinca tornidor commited on Nov 15, 2024

Commit

5abbb8c

1 Parent(s): bfec55c

refactor: remove use of utilsFileIO.py, update jquery@3.7.1 and bootstrap@5.3.3

Browse files

Files changed (4) hide show

aip_trainer/lambdas/lambdaSpeechToScore.py +24 -17
aip_trainer/utilsFileIO.py +0 -9
{templates → static}/main.html +14 -15
webApp.py +7 -6

aip_trainer/lambdas/lambdaSpeechToScore.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import base64
 import json
 import os
 import time
 import audioread
@@ -11,7 +11,6 @@ from torchaudio.transforms import Resample
 from aip_trainer import WordMatching as wm, app_logger
 from aip_trainer import pronunciationTrainer
-from aip_trainer import utilsFileIO
 trainer_SST_lambda = {
@@ -23,7 +22,6 @@ transform = Resample(orig_freq=48000, new_freq=16000)
 def lambda_handler(event, context):
     data = json.loads(event['body'])
     real_text = data['title']
@@ -43,21 +41,24 @@ def lambda_handler(event, context):
             'body': ''
         }
-    start = time.time()
-    random_file_name = './' + utilsFileIO.generateRandomString() + '.ogg'
-    f = open(random_file_name, 'wb')
-    f.write(file_bytes)
-    f.close()
-    duration = time.time() - start
-    app_logger.info(f'Time for saving binary in file: {duration}.')
     start = time.time()
     signal, fs = audioread_load(random_file_name)
     signal = transform(torch.Tensor(signal)).unsqueeze(0)
     duration = time.time() - start
-    app_logger.info(f'Time for loading .ogg file file: {duration}.')
     result = trainer_SST_lambda[language].processAudioForGivenText(
         signal, real_text)
@@ -65,7 +66,7 @@ def lambda_handler(event, context):
     start = time.time()
     os.remove(random_file_name)
     duration = time.time() - start
-    app_logger.info(f'Time for deleting file: {duration}')
     start = time.time()
     real_transcripts_ipa = ' '.join(
@@ -83,7 +84,6 @@ def lambda_handler(event, context):
     is_letter_correct_all_words = ''
     for idx, word_real in enumerate(words_real):
         mapped_letters, mapped_letters_indices = wm.get_best_mapped_words(
             mapped_words[idx], word_real)
@@ -96,7 +96,8 @@ def lambda_handler(event, context):
     pair_accuracy_category = ' '.join(
         [str(category) for category in result['pronunciation_categories']])
     duration = time.time() - start
-    app_logger.info(f'Time to post-process results: {duration}')
     res = {'real_transcript': result['recording_transcript'],
            'ipa_transcript': result['recording_ipa'],
@@ -110,8 +111,12 @@ def lambda_handler(event, context):
     return json.dumps(res)
 # From Librosa
 def audioread_load(path, offset=0.0, duration=None, dtype=np.float32):
     """Load an audio buffer using audioread.
@@ -120,17 +125,18 @@ def audioread_load(path, offset=0.0, duration=None, dtype=np.float32):
     """
     y = []
     with audioread.audio_open(path) as input_file:
         sr_native = input_file.samplerate
         n_channels = input_file.channels
-        s_start = int(np.round(sr_native * offset)) * n_channels
         if duration is None:
             s_end = np.inf
         else:
-            s_end = s_start + \
-                (int(np.round(sr_native * duration)) * n_channels)
         n = 0
@@ -168,6 +174,7 @@ def audioread_load(path, offset=0.0, duration=None, dtype=np.float32):
     return y, sr_native
 # From Librosa

 import base64
 import json
 import os
+import tempfile
 import time
 import audioread
 from aip_trainer import WordMatching as wm, app_logger
 from aip_trainer import pronunciationTrainer
 trainer_SST_lambda = {
 def lambda_handler(event, context):
     data = json.loads(event['body'])
     real_text = data['title']
             'body': ''
         }
+    start0 = time.time()
+    with tempfile.NamedTemporaryFile(prefix="temp_sound_speech_score_", suffix=".ogg", delete=False) as f1:
+        f1.write(file_bytes)
+        duration = time.time() - start0
+        app_logger.info(f'Saved binary in file in {duration}s.')
+        random_file_name = f1.name
     start = time.time()
+    app_logger.info(f'Loading .ogg file file {random_file_name} ...')
     signal, fs = audioread_load(random_file_name)
+    duration = time.time() - start
+    app_logger.info(f'Read .ogg file {random_file_name} in {duration}s.')
     signal = transform(torch.Tensor(signal)).unsqueeze(0)
     duration = time.time() - start
+    app_logger.info(f'Loaded .ogg file {random_file_name} in {duration}s.')
     result = trainer_SST_lambda[language].processAudioForGivenText(
         signal, real_text)
     start = time.time()
     os.remove(random_file_name)
     duration = time.time() - start
+    app_logger.info(f'Deleted file {random_file_name} in {duration}s.')
     start = time.time()
     real_transcripts_ipa = ' '.join(
     is_letter_correct_all_words = ''
     for idx, word_real in enumerate(words_real):
         mapped_letters, mapped_letters_indices = wm.get_best_mapped_words(
             mapped_words[idx], word_real)
     pair_accuracy_category = ' '.join(
         [str(category) for category in result['pronunciation_categories']])
     duration = time.time() - start
+    duration_tot = time.time() - start0
+    app_logger.info(f'Time to post-process results: {duration}, tot_duration:{duration_tot}.')
     res = {'real_transcript': result['recording_transcript'],
            'ipa_transcript': result['recording_ipa'],
     return json.dumps(res)
 # From Librosa
+def calc_start_end(sr_native, time_position, n_channels):
+    return int(np.round(sr_native * time_position)) * n_channels
 def audioread_load(path, offset=0.0, duration=None, dtype=np.float32):
     """Load an audio buffer using audioread.
     """
     y = []
+    app_logger.debug(f"reading audio file at path:{path} ...")
     with audioread.audio_open(path) as input_file:
         sr_native = input_file.samplerate
         n_channels = input_file.channels
+        s_start = calc_start_end(sr_native, offset, n_channels)
         if duration is None:
             s_end = np.inf
         else:
+            duration = calc_start_end(sr_native, duration, n_channels)
+            s_end = duration + s_start
         n = 0
     return y, sr_native
 # From Librosa

aip_trainer/utilsFileIO.py DELETED Viewed

@@ -1,9 +0,0 @@
-import string
-import random
-def generateRandomString(str_length: int = 20):
-    # printing lowercase
-    letters = string.ascii_lowercase
-    return ''.join(random.choice(letters) for i in range(str_length))

{templates → static}/main.html RENAMED Viewed

@@ -4,23 +4,22 @@
 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1">
-    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" rel="stylesheet"
-        integrity="sha384-1BmE4kWBq78iYhFldvKuhfTAU6auU8tT94WrHftjDbrCEXSU1oBoqyl2QvZ6jIW3" crossorigin="anonymous">
-    </script>
-    <link rel="stylesheet" href="../static/css/style-new.css">
-    <script src="../static/javascript/callbacks.js"></script>
     <title>AI pronunciation trainer</title>
-    <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.7.1/jquery.min.js" type="text/javascript"></script>
-    <link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet">
 </head>
 <body style="height: 100%; width: 100%; background-color: white; max-width: 90%;">

 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1">
     <title>AI pronunciation trainer</title>
+    <link href="https://cdnjs.cloudflare.com/ajax/libs/bootstrap/5.3.3/css/bootstrap.min.css"
+            crossorigin="anonymous"
+            integrity="sha384-QWTKZyjpPEjISv5WaRU9OFeRpok6YctnYmDr5pNlyT2bRjXh0JMhjY6hW+ALEwIH"
+            rel="stylesheet"
+    />
+    <script src="https://code.jquery.com/jquery-3.7.1.js"
+            crossorigin="anonymous"
+            integrity="sha384-wsqsSADZR1YRBEZ4/kKHNSmU+aX8ojbnKUMN4RyD3jDkxw5mHtoe2z/T/n4l56U/"
+            type="text/javascript"
+    ></script>
+    <script src="static/javascript/callbacks.js"></script>
+    <link rel="stylesheet" href="static/css/style-new.css" />
+    <link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet" />
 </head>
 <body style="height: 100%; width: 100%; background-color: white; max-width: 90%;">

webApp.py CHANGED Viewed

@@ -1,15 +1,16 @@
-from flask import Flask, render_template, request
-import webbrowser
 import os
 from flask_cors import CORS
-import json
-from aip_trainer.lambdas import lambdaTTS
-from aip_trainer.lambdas import lambdaSpeechToScore
 from aip_trainer.lambdas import lambdaGetSample
-app = Flask(__name__)
 cors = CORS(app)
 app.config['CORS_HEADERS'] = '*'

+import json
 import os
+import webbrowser
+from flask import Flask, render_template, request
 from flask_cors import CORS
 from aip_trainer.lambdas import lambdaGetSample
+from aip_trainer.lambdas import lambdaSpeechToScore
+from aip_trainer.lambdas import lambdaTTS
+app = Flask(__name__, template_folder="static")
 cors = CORS(app)
 app.config['CORS_HEADERS'] = '*'