Spaces:

YuAnthony
/

Voice-Recognition

Runtime error

App Files Files Community

YuAnthony commited on Jan 6, 2022

Commit

dd65803

1 Parent(s): 74a0954

update files

Browse files

Files changed (4) hide show

.ipynb_checkpoints/app-checkpoint.py +49 -3
.ipynb_checkpoints/infer_contrast-checkpoint.py +2 -6
app.py +49 -3
infer_contrast.py +2 -6

.ipynb_checkpoints/app-checkpoint.py CHANGED Viewed

@@ -10,9 +10,53 @@ from infer_contrast import run
 from utils.reader import load_audio
 from utils.utility import add_arguments, print_arguments
 def voiceRecognition(audio1,audio2):
-    text = run(audio1,audio2)
-    return text
 title = "Voice Recognition"
@@ -20,6 +64,8 @@ description = "This voice recognition demo(Chinese Format) is a simple implement
 inputs = [gr.inputs.Audio(source="upload",type="filepath", label="Speaker1"),
           gr.inputs.Audio(source="upload",type="filepath", label="Speaker2")]
 article = (
     "<p style='text-align: center'>"
@@ -39,7 +85,7 @@ examples = [
 interface = gr.Interface(
     fn=voiceRecognition,
     inputs=inputs,
-    outputs="text",
     title=title,
     description=description,
     examples=examples,

 from utils.reader import load_audio
 from utils.utility import add_arguments, print_arguments
+STYLE = """
+<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" integrity="sha256-YvdLHPgkqJ8DVUxjjnGVlMMJtNimJ6dYkowFFvp4kKs=" crossorigin="anonymous">
+"""
+OUTPUT_OK = (
+    STYLE
+    + """
+    <div class="container">
+        <div class="row"><h1 style="text-align: center">Speaker1 和 Speaker2</h1></div>
+        <div class="row"><h1 class="display-1 text-success" style="text-align: center">是</h1></div>
+        <div class="row"><h1 style="text-align: center">同一人</h1></div>
+        <div class="row"><h1 style="text-align: center">相似度为:</h1></div>
+        <div class="row"><h1 class="display-1 text-success" style="text-align: center">{:.1f}%</h1></div>
+        <div class="row"><small style="text-align: center">(相似度为70%以上才能被认为是同一人)</small><div class="row">
+    </div>
+"""
+)
+OUTPUT_FAIL = (
+    STYLE
+    + """
+    <div class="container">
+        <div class="row"><h1 style="text-align: center">Speaker1 和 Speaker2</h1></div>
+        <div class="row"><h1 class="display-1 text-danger" style="text-align: center">不是</h1></div>
+        <div class="row"><h1 style="text-align: center">同一人</h1></div>
+        <div class="row"><h1 style="text-align: center">相似度为:</h1></div>
+        <div class="row"><h1 class="text-danger" style="text-align: center">{:.1f}%</h1></div>
+        <div class="row"><small style="text-align: center">(相似度为70%以上才能被认为是同一人)</small><div class="row">
+    </div>
+"""
+)
+EFFECTS = [
+    ["remix", "-"],
+    ["channels", "1"],
+    ["rate", "16000"],
+    ["gain", "-1.0"],
+    ["silence", "1", "0.1", "0.1%", "-1", "0.1", "0.1%"],
+    ["trim", "0", "10"],
+]
+THRESHOLD = 0.70
 def voiceRecognition(audio1,audio2):
+    score = run(audio1,audio2)
+    if score >= THRESHOLD:
+        output = OUTPUT_OK.format(score * 100)
+    else:
+        output = OUTPUT_FAIL.format(score * 100)
+    return output
 title = "Voice Recognition"
 inputs = [gr.inputs.Audio(source="upload",type="filepath", label="Speaker1"),
           gr.inputs.Audio(source="upload",type="filepath", label="Speaker2")]
+output = gr.outputs.HTML(label="")
 article = (
     "<p style='text-align: center'>"
 interface = gr.Interface(
     fn=voiceRecognition,
     inputs=inputs,
+    outputs=output,
     title=title,
     description=description,
     examples=examples,

.ipynb_checkpoints/infer_contrast-checkpoint.py CHANGED Viewed

@@ -43,9 +43,5 @@ def run(audio1,audio2):
     feature2 = infer(audio2)[0]
     # 对角余弦值
     dist = np.dot(feature1, feature2) / (np.linalg.norm(feature1) * np.linalg.norm(feature2))
-    if dist > args.threshold:
-        result = "Speaker1 和 Speaker2 为同一个人，相似度为：%f" % (dist)
-    else:
-        result = "Speaker1 和 Speaker2 不是同一个人，相似度为：%f" % (dist)
-    return result

     feature2 = infer(audio2)[0]
     # 对角余弦值
     dist = np.dot(feature1, feature2) / (np.linalg.norm(feature1) * np.linalg.norm(feature2))
+    return dist

app.py CHANGED Viewed

@@ -10,9 +10,53 @@ from infer_contrast import run
 from utils.reader import load_audio
 from utils.utility import add_arguments, print_arguments
 def voiceRecognition(audio1,audio2):
-    text = run(audio1,audio2)
-    return text
 title = "Voice Recognition"
@@ -20,6 +64,8 @@ description = "This voice recognition demo(Chinese Format) is a simple implement
 inputs = [gr.inputs.Audio(source="upload",type="filepath", label="Speaker1"),
           gr.inputs.Audio(source="upload",type="filepath", label="Speaker2")]
 article = (
     "<p style='text-align: center'>"
@@ -39,7 +85,7 @@ examples = [
 interface = gr.Interface(
     fn=voiceRecognition,
     inputs=inputs,
-    outputs="text",
     title=title,
     description=description,
     examples=examples,

 from utils.reader import load_audio
 from utils.utility import add_arguments, print_arguments
+STYLE = """
+<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" integrity="sha256-YvdLHPgkqJ8DVUxjjnGVlMMJtNimJ6dYkowFFvp4kKs=" crossorigin="anonymous">
+"""
+OUTPUT_OK = (
+    STYLE
+    + """
+    <div class="container">
+        <div class="row"><h1 style="text-align: center">Speaker1 和 Speaker2</h1></div>
+        <div class="row"><h1 class="display-1 text-success" style="text-align: center">是</h1></div>
+        <div class="row"><h1 style="text-align: center">同一人</h1></div>
+        <div class="row"><h1 style="text-align: center">相似度为:</h1></div>
+        <div class="row"><h1 class="display-1 text-success" style="text-align: center">{:.1f}%</h1></div>
+        <div class="row"><small style="text-align: center">(相似度为70%以上才能被认为是同一人)</small><div class="row">
+    </div>
+"""
+)
+OUTPUT_FAIL = (
+    STYLE
+    + """
+    <div class="container">
+        <div class="row"><h1 style="text-align: center">Speaker1 和 Speaker2</h1></div>
+        <div class="row"><h1 class="display-1 text-danger" style="text-align: center">不是</h1></div>
+        <div class="row"><h1 style="text-align: center">同一人</h1></div>
+        <div class="row"><h1 style="text-align: center">相似度为:</h1></div>
+        <div class="row"><h1 class="text-danger" style="text-align: center">{:.1f}%</h1></div>
+        <div class="row"><small style="text-align: center">(相似度为70%以上才能被认为是同一人)</small><div class="row">
+    </div>
+"""
+)
+EFFECTS = [
+    ["remix", "-"],
+    ["channels", "1"],
+    ["rate", "16000"],
+    ["gain", "-1.0"],
+    ["silence", "1", "0.1", "0.1%", "-1", "0.1", "0.1%"],
+    ["trim", "0", "10"],
+]
+THRESHOLD = 0.70
 def voiceRecognition(audio1,audio2):
+    score = run(audio1,audio2)
+    if score >= THRESHOLD:
+        output = OUTPUT_OK.format(score * 100)
+    else:
+        output = OUTPUT_FAIL.format(score * 100)
+    return output
 title = "Voice Recognition"
 inputs = [gr.inputs.Audio(source="upload",type="filepath", label="Speaker1"),
           gr.inputs.Audio(source="upload",type="filepath", label="Speaker2")]
+output = gr.outputs.HTML(label="")
 article = (
     "<p style='text-align: center'>"
 interface = gr.Interface(
     fn=voiceRecognition,
     inputs=inputs,
+    outputs=output,
     title=title,
     description=description,
     examples=examples,

infer_contrast.py CHANGED Viewed

@@ -43,9 +43,5 @@ def run(audio1,audio2):
     feature2 = infer(audio2)[0]
     # 对角余弦值
     dist = np.dot(feature1, feature2) / (np.linalg.norm(feature1) * np.linalg.norm(feature2))
-    if dist > args.threshold:
-        result = "Speaker1 和 Speaker2 为同一个人，相似度为：%f" % (dist)
-    else:
-        result = "Speaker1 和 Speaker2 不是同一个人，相似度为：%f" % (dist)
-    return result

     feature2 = infer(audio2)[0]
     # 对角余弦值
     dist = np.dot(feature1, feature2) / (np.linalg.norm(feature1) * np.linalg.norm(feature2))
+    return dist