YuAnthony commited on
Commit
dd65803
·
1 Parent(s): 74a0954

update files

Browse files
.ipynb_checkpoints/app-checkpoint.py CHANGED
@@ -10,9 +10,53 @@ from infer_contrast import run
10
  from utils.reader import load_audio
11
  from utils.utility import add_arguments, print_arguments
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def voiceRecognition(audio1,audio2):
14
- text = run(audio1,audio2)
15
- return text
 
 
 
 
16
 
17
 
18
  title = "Voice Recognition"
@@ -20,6 +64,8 @@ description = "This voice recognition demo(Chinese Format) is a simple implement
20
 
21
  inputs = [gr.inputs.Audio(source="upload",type="filepath", label="Speaker1"),
22
  gr.inputs.Audio(source="upload",type="filepath", label="Speaker2")]
 
 
23
 
24
  article = (
25
  "<p style='text-align: center'>"
@@ -39,7 +85,7 @@ examples = [
39
  interface = gr.Interface(
40
  fn=voiceRecognition,
41
  inputs=inputs,
42
- outputs="text",
43
  title=title,
44
  description=description,
45
  examples=examples,
 
10
  from utils.reader import load_audio
11
  from utils.utility import add_arguments, print_arguments
12
 
13
+
14
+ STYLE = """
15
+ <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" integrity="sha256-YvdLHPgkqJ8DVUxjjnGVlMMJtNimJ6dYkowFFvp4kKs=" crossorigin="anonymous">
16
+ """
17
+ OUTPUT_OK = (
18
+ STYLE
19
+ + """
20
+ <div class="container">
21
+ <div class="row"><h1 style="text-align: center">Speaker1 和 Speaker2</h1></div>
22
+ <div class="row"><h1 class="display-1 text-success" style="text-align: center">是</h1></div>
23
+ <div class="row"><h1 style="text-align: center">同一人</h1></div>
24
+ <div class="row"><h1 style="text-align: center">相似度为:</h1></div>
25
+ <div class="row"><h1 class="display-1 text-success" style="text-align: center">{:.1f}%</h1></div>
26
+ <div class="row"><small style="text-align: center">(相似度为70%以上才能被认为是同一人)</small><div class="row">
27
+ </div>
28
+ """
29
+ )
30
+ OUTPUT_FAIL = (
31
+ STYLE
32
+ + """
33
+ <div class="container">
34
+ <div class="row"><h1 style="text-align: center">Speaker1 和 Speaker2</h1></div>
35
+ <div class="row"><h1 class="display-1 text-danger" style="text-align: center">不是</h1></div>
36
+ <div class="row"><h1 style="text-align: center">同一人</h1></div>
37
+ <div class="row"><h1 style="text-align: center">相似度为:</h1></div>
38
+ <div class="row"><h1 class="text-danger" style="text-align: center">{:.1f}%</h1></div>
39
+ <div class="row"><small style="text-align: center">(相似度为70%以上才能被认为是同一人)</small><div class="row">
40
+ </div>
41
+ """
42
+ )
43
+ EFFECTS = [
44
+ ["remix", "-"],
45
+ ["channels", "1"],
46
+ ["rate", "16000"],
47
+ ["gain", "-1.0"],
48
+ ["silence", "1", "0.1", "0.1%", "-1", "0.1", "0.1%"],
49
+ ["trim", "0", "10"],
50
+ ]
51
+
52
+ THRESHOLD = 0.70
53
  def voiceRecognition(audio1,audio2):
54
+ score = run(audio1,audio2)
55
+ if score >= THRESHOLD:
56
+ output = OUTPUT_OK.format(score * 100)
57
+ else:
58
+ output = OUTPUT_FAIL.format(score * 100)
59
+ return output
60
 
61
 
62
  title = "Voice Recognition"
 
64
 
65
  inputs = [gr.inputs.Audio(source="upload",type="filepath", label="Speaker1"),
66
  gr.inputs.Audio(source="upload",type="filepath", label="Speaker2")]
67
+
68
+ output = gr.outputs.HTML(label="")
69
 
70
  article = (
71
  "<p style='text-align: center'>"
 
85
  interface = gr.Interface(
86
  fn=voiceRecognition,
87
  inputs=inputs,
88
+ outputs=output,
89
  title=title,
90
  description=description,
91
  examples=examples,
.ipynb_checkpoints/infer_contrast-checkpoint.py CHANGED
@@ -43,9 +43,5 @@ def run(audio1,audio2):
43
  feature2 = infer(audio2)[0]
44
  # 对角余弦值
45
  dist = np.dot(feature1, feature2) / (np.linalg.norm(feature1) * np.linalg.norm(feature2))
46
- if dist > args.threshold:
47
- result = "Speaker1 和 Speaker2 为同一个人,相似度为:%f" % (dist)
48
- else:
49
- result = "Speaker1 和 Speaker2 不是同一个人,相似度为:%f" % (dist)
50
-
51
- return result
 
43
  feature2 = infer(audio2)[0]
44
  # 对角余弦值
45
  dist = np.dot(feature1, feature2) / (np.linalg.norm(feature1) * np.linalg.norm(feature2))
46
+
47
+ return dist
 
 
 
 
app.py CHANGED
@@ -10,9 +10,53 @@ from infer_contrast import run
10
  from utils.reader import load_audio
11
  from utils.utility import add_arguments, print_arguments
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def voiceRecognition(audio1,audio2):
14
- text = run(audio1,audio2)
15
- return text
 
 
 
 
16
 
17
 
18
  title = "Voice Recognition"
@@ -20,6 +64,8 @@ description = "This voice recognition demo(Chinese Format) is a simple implement
20
 
21
  inputs = [gr.inputs.Audio(source="upload",type="filepath", label="Speaker1"),
22
  gr.inputs.Audio(source="upload",type="filepath", label="Speaker2")]
 
 
23
 
24
  article = (
25
  "<p style='text-align: center'>"
@@ -39,7 +85,7 @@ examples = [
39
  interface = gr.Interface(
40
  fn=voiceRecognition,
41
  inputs=inputs,
42
- outputs="text",
43
  title=title,
44
  description=description,
45
  examples=examples,
 
10
  from utils.reader import load_audio
11
  from utils.utility import add_arguments, print_arguments
12
 
13
+
14
+ STYLE = """
15
+ <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" integrity="sha256-YvdLHPgkqJ8DVUxjjnGVlMMJtNimJ6dYkowFFvp4kKs=" crossorigin="anonymous">
16
+ """
17
+ OUTPUT_OK = (
18
+ STYLE
19
+ + """
20
+ <div class="container">
21
+ <div class="row"><h1 style="text-align: center">Speaker1 和 Speaker2</h1></div>
22
+ <div class="row"><h1 class="display-1 text-success" style="text-align: center">是</h1></div>
23
+ <div class="row"><h1 style="text-align: center">同一人</h1></div>
24
+ <div class="row"><h1 style="text-align: center">相似度为:</h1></div>
25
+ <div class="row"><h1 class="display-1 text-success" style="text-align: center">{:.1f}%</h1></div>
26
+ <div class="row"><small style="text-align: center">(相似度为70%以上才能被认为是同一人)</small><div class="row">
27
+ </div>
28
+ """
29
+ )
30
+ OUTPUT_FAIL = (
31
+ STYLE
32
+ + """
33
+ <div class="container">
34
+ <div class="row"><h1 style="text-align: center">Speaker1 和 Speaker2</h1></div>
35
+ <div class="row"><h1 class="display-1 text-danger" style="text-align: center">不是</h1></div>
36
+ <div class="row"><h1 style="text-align: center">同一人</h1></div>
37
+ <div class="row"><h1 style="text-align: center">相似度为:</h1></div>
38
+ <div class="row"><h1 class="text-danger" style="text-align: center">{:.1f}%</h1></div>
39
+ <div class="row"><small style="text-align: center">(相似度为70%以上才能被认为是同一人)</small><div class="row">
40
+ </div>
41
+ """
42
+ )
43
+ EFFECTS = [
44
+ ["remix", "-"],
45
+ ["channels", "1"],
46
+ ["rate", "16000"],
47
+ ["gain", "-1.0"],
48
+ ["silence", "1", "0.1", "0.1%", "-1", "0.1", "0.1%"],
49
+ ["trim", "0", "10"],
50
+ ]
51
+
52
+ THRESHOLD = 0.70
53
  def voiceRecognition(audio1,audio2):
54
+ score = run(audio1,audio2)
55
+ if score >= THRESHOLD:
56
+ output = OUTPUT_OK.format(score * 100)
57
+ else:
58
+ output = OUTPUT_FAIL.format(score * 100)
59
+ return output
60
 
61
 
62
  title = "Voice Recognition"
 
64
 
65
  inputs = [gr.inputs.Audio(source="upload",type="filepath", label="Speaker1"),
66
  gr.inputs.Audio(source="upload",type="filepath", label="Speaker2")]
67
+
68
+ output = gr.outputs.HTML(label="")
69
 
70
  article = (
71
  "<p style='text-align: center'>"
 
85
  interface = gr.Interface(
86
  fn=voiceRecognition,
87
  inputs=inputs,
88
+ outputs=output,
89
  title=title,
90
  description=description,
91
  examples=examples,
infer_contrast.py CHANGED
@@ -43,9 +43,5 @@ def run(audio1,audio2):
43
  feature2 = infer(audio2)[0]
44
  # 对角余弦值
45
  dist = np.dot(feature1, feature2) / (np.linalg.norm(feature1) * np.linalg.norm(feature2))
46
- if dist > args.threshold:
47
- result = "Speaker1 和 Speaker2 为同一个人,相似度为:%f" % (dist)
48
- else:
49
- result = "Speaker1 和 Speaker2 不是同一个人,相似度为:%f" % (dist)
50
-
51
- return result
 
43
  feature2 = infer(audio2)[0]
44
  # 对角余弦值
45
  dist = np.dot(feature1, feature2) / (np.linalg.norm(feature1) * np.linalg.norm(feature2))
46
+
47
+ return dist