alessandro trinca tornidor commited on
Commit
e8a1983
1 Parent(s): d46b8e0

refactor: improve using suggestions from pycharm

Browse files
AIModels.py CHANGED
@@ -14,13 +14,13 @@ class NeuralASR(ModelInterfaces.IASRModel):
14
 
15
  def getTranscript(self) -> str:
16
  """Get the transcripts of the process audio"""
17
- assert(self.audio_transcript != None,
18
  'Can get audio transcripts without having processed the audio')
19
  return self.audio_transcript
20
 
21
  def getWordLocations(self) -> list:
22
  """Get the pair of words location from audio"""
23
- assert(self.word_locations_in_samples != None,
24
  'Can get word locations without having processed the audio')
25
 
26
  return self.word_locations_in_samples
 
14
 
15
  def getTranscript(self) -> str:
16
  """Get the transcripts of the process audio"""
17
+ assert(self.audio_transcript is not None,
18
  'Can get audio transcripts without having processed the audio')
19
  return self.audio_transcript
20
 
21
  def getWordLocations(self) -> list:
22
  """Get the pair of words location from audio"""
23
+ assert(self.word_locations_in_samples is not None,
24
  'Can get word locations without having processed the audio')
25
 
26
  return self.word_locations_in_samples
WordMetrics.py CHANGED
@@ -11,9 +11,10 @@ def edit_distance_python2(a, b):
11
  if len(b) == 0: # Can deal with empty sequences faster
12
  return len(a)
13
  # Only two rows are really needed: the one currently filled in, and the previous
14
- distances = []
15
- distances.append([i for i in range(len(b)+1)])
16
- distances.append([0 for _ in range(len(b)+1)])
 
17
  # We can prefill the first row:
18
  costs = [0 for _ in range(3)]
19
  for i, a_token in enumerate(a, start=1):
@@ -27,7 +28,7 @@ def edit_distance_python2(a, b):
27
  distances[0][:] = distances[1][:]
28
  return distances[1][len(b)]
29
 
30
- #https://stackabuse.com/levenshtein-distance-and-text-similarity-in-python/
31
  def edit_distance_python(seq1, seq2):
32
  size_x = len(seq1) + 1
33
  size_y = len(seq2) + 1
@@ -52,4 +53,4 @@ def edit_distance_python(seq1, seq2):
52
  matrix[x,y-1] + 1
53
  )
54
  #print (matrix)
55
- return (matrix[size_x - 1, size_y - 1])
 
11
  if len(b) == 0: # Can deal with empty sequences faster
12
  return len(a)
13
  # Only two rows are really needed: the one currently filled in, and the previous
14
+ distances = [
15
+ [i for i in range(len(b) + 1)],
16
+ [0 for _ in range(len(b) + 1)]
17
+ ]
18
  # We can prefill the first row:
19
  costs = [0 for _ in range(3)]
20
  for i, a_token in enumerate(a, start=1):
 
28
  distances[0][:] = distances[1][:]
29
  return distances[1][len(b)]
30
 
31
+ # https://stackabuse.com/levenshtein-distance-and-text-similarity-in-python/
32
  def edit_distance_python(seq1, seq2):
33
  size_x = len(seq1) + 1
34
  size_y = len(seq2) + 1
 
53
  matrix[x,y-1] + 1
54
  )
55
  #print (matrix)
56
+ return matrix[size_x - 1, size_y - 1]
lambdaGetSample.py CHANGED
@@ -7,7 +7,7 @@ import random
7
  import pickle
8
 
9
 
10
- class TextDataset():
11
  def __init__(self, table, language='-'):
12
  self.table_dataframe = table
13
  self.number_of_samples = len(table)
@@ -52,7 +52,7 @@ def lambda_handler(event, context):
52
 
53
  sample_in_category = False
54
 
55
- while(not sample_in_category):
56
  valid_sequence = False
57
  while not valid_sequence:
58
  try:
@@ -85,5 +85,5 @@ def getSentenceCategory(sentence) -> int:
85
  number_of_words = len(sentence.split())
86
  categories_word_limits = [0, 8, 20, 100000]
87
  for category in range(len(categories_word_limits)-1):
88
- if number_of_words > categories_word_limits[category] and number_of_words <= categories_word_limits[category+1]:
89
  return category+1
 
7
  import pickle
8
 
9
 
10
+ class TextDataset:
11
  def __init__(self, table, language='-'):
12
  self.table_dataframe = table
13
  self.number_of_samples = len(table)
 
52
 
53
  sample_in_category = False
54
 
55
+ while not sample_in_category:
56
  valid_sequence = False
57
  while not valid_sequence:
58
  try:
 
85
  number_of_words = len(sentence.split())
86
  categories_word_limits = [0, 8, 20, 100000]
87
  for category in range(len(categories_word_limits)-1):
88
+ if categories_word_limits[category] < number_of_words <= categories_word_limits[category + 1]:
89
  return category+1
lambdaSpeechToScore.py CHANGED
@@ -12,9 +12,10 @@ import numpy as np
12
  from torchaudio.transforms import Resample
13
 
14
 
15
- trainer_SST_lambda = {}
16
- trainer_SST_lambda['de'] = pronunciationTrainer.getTrainer("de")
17
- trainer_SST_lambda['en'] = pronunciationTrainer.getTrainer("en")
 
18
 
19
  transform = Resample(orig_freq=48000, new_freq=16000)
20
 
 
12
  from torchaudio.transforms import Resample
13
 
14
 
15
+ trainer_SST_lambda = {
16
+ 'de': pronunciationTrainer.getTrainer("de"),
17
+ 'en': pronunciationTrainer.getTrainer("en")
18
+ }
19
 
20
  transform = Resample(orig_freq=48000, new_freq=16000)
21
 
unitTests.py CHANGED
@@ -15,7 +15,7 @@ def test_category(category: int, threshold_min: int, threshold_max: int):
15
  response_dict = json.loads(response)
16
  number_of_words = len(
17
  response_dict['real_transcript'][0].split())
18
- length_valid = number_of_words > threshold_min and number_of_words <= threshold_max
19
  if not length_valid:
20
  print('Category ', category,
21
  ' had a sentence with length ', number_of_words)
@@ -65,8 +65,7 @@ class TestPhonemConverter(unittest.TestCase):
65
  phonem_converter, 'Hallo, das ist ein Test', 'haloː, dɑːs ɪst ain tɛst'))
66
 
67
 
68
- trainer_SST_lambda = {}
69
- trainer_SST_lambda['de'] = pronunciationTrainer.getTrainer("de")
70
 
71
 
72
  class TestScore(unittest.TestCase):
 
15
  response_dict = json.loads(response)
16
  number_of_words = len(
17
  response_dict['real_transcript'][0].split())
18
+ length_valid = threshold_min < number_of_words <= threshold_max
19
  if not length_valid:
20
  print('Category ', category,
21
  ' had a sentence with length ', number_of_words)
 
65
  phonem_converter, 'Hallo, das ist ein Test', 'haloː, dɑːs ɪst ain tɛst'))
66
 
67
 
68
+ trainer_SST_lambda = {'de': pronunciationTrainer.getTrainer("de")}
 
69
 
70
 
71
  class TestScore(unittest.TestCase):