Spaces:
Runtime error
Runtime error
update files
Browse files- .ipynb_checkpoints/app-checkpoint.py +49 -3
- .ipynb_checkpoints/infer_contrast-checkpoint.py +2 -6
- app.py +49 -3
- infer_contrast.py +2 -6
.ipynb_checkpoints/app-checkpoint.py
CHANGED
@@ -10,9 +10,53 @@ from infer_contrast import run
|
|
10 |
from utils.reader import load_audio
|
11 |
from utils.utility import add_arguments, print_arguments
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
def voiceRecognition(audio1,audio2):
|
14 |
-
|
15 |
-
|
|
|
|
|
|
|
|
|
16 |
|
17 |
|
18 |
title = "Voice Recognition"
|
@@ -20,6 +64,8 @@ description = "This voice recognition demo(Chinese Format) is a simple implement
|
|
20 |
|
21 |
inputs = [gr.inputs.Audio(source="upload",type="filepath", label="Speaker1"),
|
22 |
gr.inputs.Audio(source="upload",type="filepath", label="Speaker2")]
|
|
|
|
|
23 |
|
24 |
article = (
|
25 |
"<p style='text-align: center'>"
|
@@ -39,7 +85,7 @@ examples = [
|
|
39 |
interface = gr.Interface(
|
40 |
fn=voiceRecognition,
|
41 |
inputs=inputs,
|
42 |
-
outputs=
|
43 |
title=title,
|
44 |
description=description,
|
45 |
examples=examples,
|
|
|
10 |
from utils.reader import load_audio
|
11 |
from utils.utility import add_arguments, print_arguments
|
12 |
|
13 |
+
|
14 |
+
STYLE = """
|
15 |
+
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" integrity="sha256-YvdLHPgkqJ8DVUxjjnGVlMMJtNimJ6dYkowFFvp4kKs=" crossorigin="anonymous">
|
16 |
+
"""
|
17 |
+
OUTPUT_OK = (
|
18 |
+
STYLE
|
19 |
+
+ """
|
20 |
+
<div class="container">
|
21 |
+
<div class="row"><h1 style="text-align: center">Speaker1 和 Speaker2</h1></div>
|
22 |
+
<div class="row"><h1 class="display-1 text-success" style="text-align: center">是</h1></div>
|
23 |
+
<div class="row"><h1 style="text-align: center">同一人</h1></div>
|
24 |
+
<div class="row"><h1 style="text-align: center">相似度为:</h1></div>
|
25 |
+
<div class="row"><h1 class="display-1 text-success" style="text-align: center">{:.1f}%</h1></div>
|
26 |
+
<div class="row"><small style="text-align: center">(相似度为70%以上才能被认为是同一人)</small><div class="row">
|
27 |
+
</div>
|
28 |
+
"""
|
29 |
+
)
|
30 |
+
OUTPUT_FAIL = (
|
31 |
+
STYLE
|
32 |
+
+ """
|
33 |
+
<div class="container">
|
34 |
+
<div class="row"><h1 style="text-align: center">Speaker1 和 Speaker2</h1></div>
|
35 |
+
<div class="row"><h1 class="display-1 text-danger" style="text-align: center">不是</h1></div>
|
36 |
+
<div class="row"><h1 style="text-align: center">同一人</h1></div>
|
37 |
+
<div class="row"><h1 style="text-align: center">相似度为:</h1></div>
|
38 |
+
<div class="row"><h1 class="text-danger" style="text-align: center">{:.1f}%</h1></div>
|
39 |
+
<div class="row"><small style="text-align: center">(相似度为70%以上才能被认为是同一人)</small><div class="row">
|
40 |
+
</div>
|
41 |
+
"""
|
42 |
+
)
|
43 |
+
EFFECTS = [
|
44 |
+
["remix", "-"],
|
45 |
+
["channels", "1"],
|
46 |
+
["rate", "16000"],
|
47 |
+
["gain", "-1.0"],
|
48 |
+
["silence", "1", "0.1", "0.1%", "-1", "0.1", "0.1%"],
|
49 |
+
["trim", "0", "10"],
|
50 |
+
]
|
51 |
+
|
52 |
+
THRESHOLD = 0.70
|
53 |
def voiceRecognition(audio1,audio2):
|
54 |
+
score = run(audio1,audio2)
|
55 |
+
if score >= THRESHOLD:
|
56 |
+
output = OUTPUT_OK.format(score * 100)
|
57 |
+
else:
|
58 |
+
output = OUTPUT_FAIL.format(score * 100)
|
59 |
+
return output
|
60 |
|
61 |
|
62 |
title = "Voice Recognition"
|
|
|
64 |
|
65 |
inputs = [gr.inputs.Audio(source="upload",type="filepath", label="Speaker1"),
|
66 |
gr.inputs.Audio(source="upload",type="filepath", label="Speaker2")]
|
67 |
+
|
68 |
+
output = gr.outputs.HTML(label="")
|
69 |
|
70 |
article = (
|
71 |
"<p style='text-align: center'>"
|
|
|
85 |
interface = gr.Interface(
|
86 |
fn=voiceRecognition,
|
87 |
inputs=inputs,
|
88 |
+
outputs=output,
|
89 |
title=title,
|
90 |
description=description,
|
91 |
examples=examples,
|
.ipynb_checkpoints/infer_contrast-checkpoint.py
CHANGED
@@ -43,9 +43,5 @@ def run(audio1,audio2):
|
|
43 |
feature2 = infer(audio2)[0]
|
44 |
# 对角余弦值
|
45 |
dist = np.dot(feature1, feature2) / (np.linalg.norm(feature1) * np.linalg.norm(feature2))
|
46 |
-
|
47 |
-
|
48 |
-
else:
|
49 |
-
result = "Speaker1 和 Speaker2 不是同一个人,相似度为:%f" % (dist)
|
50 |
-
|
51 |
-
return result
|
|
|
43 |
feature2 = infer(audio2)[0]
|
44 |
# 对角余弦值
|
45 |
dist = np.dot(feature1, feature2) / (np.linalg.norm(feature1) * np.linalg.norm(feature2))
|
46 |
+
|
47 |
+
return dist
|
|
|
|
|
|
|
|
app.py
CHANGED
@@ -10,9 +10,53 @@ from infer_contrast import run
|
|
10 |
from utils.reader import load_audio
|
11 |
from utils.utility import add_arguments, print_arguments
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
def voiceRecognition(audio1,audio2):
|
14 |
-
|
15 |
-
|
|
|
|
|
|
|
|
|
16 |
|
17 |
|
18 |
title = "Voice Recognition"
|
@@ -20,6 +64,8 @@ description = "This voice recognition demo(Chinese Format) is a simple implement
|
|
20 |
|
21 |
inputs = [gr.inputs.Audio(source="upload",type="filepath", label="Speaker1"),
|
22 |
gr.inputs.Audio(source="upload",type="filepath", label="Speaker2")]
|
|
|
|
|
23 |
|
24 |
article = (
|
25 |
"<p style='text-align: center'>"
|
@@ -39,7 +85,7 @@ examples = [
|
|
39 |
interface = gr.Interface(
|
40 |
fn=voiceRecognition,
|
41 |
inputs=inputs,
|
42 |
-
outputs=
|
43 |
title=title,
|
44 |
description=description,
|
45 |
examples=examples,
|
|
|
10 |
from utils.reader import load_audio
|
11 |
from utils.utility import add_arguments, print_arguments
|
12 |
|
13 |
+
|
14 |
+
STYLE = """
|
15 |
+
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" integrity="sha256-YvdLHPgkqJ8DVUxjjnGVlMMJtNimJ6dYkowFFvp4kKs=" crossorigin="anonymous">
|
16 |
+
"""
|
17 |
+
OUTPUT_OK = (
|
18 |
+
STYLE
|
19 |
+
+ """
|
20 |
+
<div class="container">
|
21 |
+
<div class="row"><h1 style="text-align: center">Speaker1 和 Speaker2</h1></div>
|
22 |
+
<div class="row"><h1 class="display-1 text-success" style="text-align: center">是</h1></div>
|
23 |
+
<div class="row"><h1 style="text-align: center">同一人</h1></div>
|
24 |
+
<div class="row"><h1 style="text-align: center">相似度为:</h1></div>
|
25 |
+
<div class="row"><h1 class="display-1 text-success" style="text-align: center">{:.1f}%</h1></div>
|
26 |
+
<div class="row"><small style="text-align: center">(相似度为70%以上才能被认为是同一人)</small><div class="row">
|
27 |
+
</div>
|
28 |
+
"""
|
29 |
+
)
|
30 |
+
OUTPUT_FAIL = (
|
31 |
+
STYLE
|
32 |
+
+ """
|
33 |
+
<div class="container">
|
34 |
+
<div class="row"><h1 style="text-align: center">Speaker1 和 Speaker2</h1></div>
|
35 |
+
<div class="row"><h1 class="display-1 text-danger" style="text-align: center">不是</h1></div>
|
36 |
+
<div class="row"><h1 style="text-align: center">同一人</h1></div>
|
37 |
+
<div class="row"><h1 style="text-align: center">相似度为:</h1></div>
|
38 |
+
<div class="row"><h1 class="text-danger" style="text-align: center">{:.1f}%</h1></div>
|
39 |
+
<div class="row"><small style="text-align: center">(相似度为70%以上才能被认为是同一人)</small><div class="row">
|
40 |
+
</div>
|
41 |
+
"""
|
42 |
+
)
|
43 |
+
EFFECTS = [
|
44 |
+
["remix", "-"],
|
45 |
+
["channels", "1"],
|
46 |
+
["rate", "16000"],
|
47 |
+
["gain", "-1.0"],
|
48 |
+
["silence", "1", "0.1", "0.1%", "-1", "0.1", "0.1%"],
|
49 |
+
["trim", "0", "10"],
|
50 |
+
]
|
51 |
+
|
52 |
+
THRESHOLD = 0.70
|
53 |
def voiceRecognition(audio1,audio2):
|
54 |
+
score = run(audio1,audio2)
|
55 |
+
if score >= THRESHOLD:
|
56 |
+
output = OUTPUT_OK.format(score * 100)
|
57 |
+
else:
|
58 |
+
output = OUTPUT_FAIL.format(score * 100)
|
59 |
+
return output
|
60 |
|
61 |
|
62 |
title = "Voice Recognition"
|
|
|
64 |
|
65 |
inputs = [gr.inputs.Audio(source="upload",type="filepath", label="Speaker1"),
|
66 |
gr.inputs.Audio(source="upload",type="filepath", label="Speaker2")]
|
67 |
+
|
68 |
+
output = gr.outputs.HTML(label="")
|
69 |
|
70 |
article = (
|
71 |
"<p style='text-align: center'>"
|
|
|
85 |
interface = gr.Interface(
|
86 |
fn=voiceRecognition,
|
87 |
inputs=inputs,
|
88 |
+
outputs=output,
|
89 |
title=title,
|
90 |
description=description,
|
91 |
examples=examples,
|
infer_contrast.py
CHANGED
@@ -43,9 +43,5 @@ def run(audio1,audio2):
|
|
43 |
feature2 = infer(audio2)[0]
|
44 |
# 对角余弦值
|
45 |
dist = np.dot(feature1, feature2) / (np.linalg.norm(feature1) * np.linalg.norm(feature2))
|
46 |
-
|
47 |
-
|
48 |
-
else:
|
49 |
-
result = "Speaker1 和 Speaker2 不是同一个人,相似度为:%f" % (dist)
|
50 |
-
|
51 |
-
return result
|
|
|
43 |
feature2 = infer(audio2)[0]
|
44 |
# 对角余弦值
|
45 |
dist = np.dot(feature1, feature2) / (np.linalg.norm(feature1) * np.linalg.norm(feature2))
|
46 |
+
|
47 |
+
return dist
|
|
|
|
|
|
|
|