Spaces:

MothersTongue
/

voice-matcher-api

Running on T4

App Files Files Community

arnabg95 commited on Jun 20

Commit

cc08b68

•

1 Parent(s): 2c92e3b

v2 added

Browse files

Files changed (8) hide show

app/__pycache__/matcher.cpython-310.pyc +0 -0
app/__pycache__/passing.cpython-310.pyc +0 -0
app/__pycache__/transcriber.cpython-310.pyc +0 -0
app/matcher.py +2 -2
app/passing.py +5 -5
app/routers/V1/voice/__pycache__/voice_router.cpython-310.pyc +0 -0
app/routers/V1/voice/voice_router.py +1 -4
app/transcriber.py +2 -1

app/__pycache__/matcher.cpython-310.pyc CHANGED Viewed

Binary files a/app/__pycache__/matcher.cpython-310.pyc and b/app/__pycache__/matcher.cpython-310.pyc differ

app/__pycache__/passing.cpython-310.pyc CHANGED Viewed

Binary files a/app/__pycache__/passing.cpython-310.pyc and b/app/__pycache__/passing.cpython-310.pyc differ

app/__pycache__/transcriber.cpython-310.pyc CHANGED Viewed

Binary files a/app/__pycache__/transcriber.cpython-310.pyc and b/app/__pycache__/transcriber.cpython-310.pyc differ

app/matcher.py CHANGED Viewed

@@ -19,6 +19,6 @@ def sequence_match(a, b):
 def match(original, transcription):
-    sequence = sequence_match(original, transcription)
-    phonetic = phonetic_match(original, transcription)
     return sequence, phonetic

 def match(original, transcription):
+    sequence = sequence_match(original.lower(), transcription.lower())
+    phonetic = phonetic_match(original.lower(), transcription.lower())
     return sequence, phonetic

app/passing.py CHANGED Viewed

@@ -5,7 +5,7 @@ def normalize_euclidean(euclidean, max_value):
     """
     return max(0, 100 - (euclidean / max_value) * 100)
-def calculate_passing(sequence, phonetic, cosine, euclidean, passing_threshold=60, euclidean_max=200):
     # Normalize sequence and phonetic to 0-100 scale
     sequence_normalized = sequence * 100
     phonetic_normalized = phonetic * 100
@@ -17,10 +17,10 @@ def calculate_passing(sequence, phonetic, cosine, euclidean, passing_threshold=6
     # Calculate the weighted average
     weights = {
-        'sequence': 0.35,
-        'phonetic': 0.35,
-        'cosine': 0.10,
-        'euclidean': 0.20
     }
     weighted_score = (

     """
     return max(0, 100 - (euclidean / max_value) * 100)
+def calculate_passing(sequence, phonetic, cosine=0, euclidean=0, passing_threshold=50, euclidean_max=200):
     # Normalize sequence and phonetic to 0-100 scale
     sequence_normalized = sequence * 100
     phonetic_normalized = phonetic * 100
     # Calculate the weighted average
     weights = {
+        'sequence': 0.50,
+        'phonetic': 0.50,
+        'cosine': 0,
+        'euclidean': 0
     }
     weighted_score = (

app/routers/V1/voice/__pycache__/voice_router.cpython-310.pyc CHANGED Viewed

Binary files a/app/routers/V1/voice/__pycache__/voice_router.cpython-310.pyc and b/app/routers/V1/voice/__pycache__/voice_router.cpython-310.pyc differ

app/routers/V1/voice/voice_router.py CHANGED Viewed

@@ -55,10 +55,7 @@ async def transcribe_audio(
             text = get_transcription(filename_recorded)
             text = clean_transcription(text)
             sequence, phonetic = match(matcher_text, text)
-            Euclidean, Cosine = mfcc_similarty_check(
-                filename_original, filename_recorded
-            )
-            weighted_score, is_passing = calculate_passing(sequence, phonetic, Cosine, Euclidean)
             return JSONResponse(
                 {
                     "transcription": text,

             text = get_transcription(filename_recorded)
             text = clean_transcription(text)
             sequence, phonetic = match(matcher_text, text)
+            weighted_score, is_passing = calculate_passing(sequence, phonetic)
             return JSONResponse(
                 {
                     "transcription": text,

app/transcriber.py CHANGED Viewed

@@ -6,7 +6,8 @@ from datasets import load_dataset
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
 torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
-model_id = "MothersTongue/mother_tongue_model"
 model = AutoModelForSpeechSeq2Seq.from_pretrained(
     model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True

 device = "cuda:0" if torch.cuda.is_available() else "cpu"
 torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+model_id = "openai/whisper-large-v3"
+# model_id = "MothersTongue/mother_tongue_model"
 model = AutoModelForSpeechSeq2Seq.from_pretrained(
     model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True