Spaces:
abhi99555
/
Runtime error

abhi99555 peb-peb commited on
Commit
d532a4d
β€’
0 Parent(s):

Duplicate from peb-peb/shravan

Browse files

Co-authored-by: Harsh <peb-peb@users.noreply.huggingface.co>

Files changed (14) hide show
  1. .gitattributes +36 -0
  2. .gitignore +162 -0
  3. CODE_OF_CONDUCT.md +66 -0
  4. CONTRIBUTING.md +15 -0
  5. LICENSE +21 -0
  6. README.md +14 -0
  7. app.py +61 -0
  8. data.py +24 -0
  9. examples/sample4.wav +3 -0
  10. requirements.txt +4 -0
  11. sentiment_analysis.py +5 -0
  12. summary.py +5 -0
  13. topic.py +5 -0
  14. transcribe.py +96 -0
.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.wav filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ # PyCharm
156
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
159
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
+ #.idea/
161
+
162
+ gradio*
CODE_OF_CONDUCT.md ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Code of Conduct
2
+
3
+ ### Our Pledge
4
+
5
+ In the interest of fostering an open and welcoming environment, we as
6
+ contributors and maintainers pledge to making participation in our project and
7
+ our community a harassment-free experience for everyone, regardless of age, body
8
+ size, disability, ethnicity, gender identity and expression, level of experience,
9
+ nationality, personal appearance, race, religion, or sexual identity and
10
+ orientation.
11
+
12
+ ### Our Standards
13
+
14
+ Examples of behavior that contributes to creating a positive environment
15
+ include:
16
+
17
+ * Using welcoming and inclusive language
18
+ * Being respectful of differing viewpoints and experiences
19
+ * Gracefully accepting constructive criticism
20
+ * Focusing on what is best for the community
21
+ * Showing empathy towards other community members
22
+
23
+ Examples of unacceptable behavior by participants include:
24
+
25
+ * The use of sexualized language or imagery and unwelcome sexual attention or
26
+ advances
27
+ * Trolling, insulting/derogatory comments, and personal or political attacks
28
+ * Public or private harassment
29
+ * Publishing others' private information, such as a physical or electronic
30
+ address, without explicit permission
31
+ * Other conduct which could reasonably be considered inappropriate in a
32
+ professional setting
33
+
34
+ ### Our Responsibilities
35
+
36
+ Project maintainers are responsible for clarifying the standards of acceptable
37
+ behavior and are expected to take appropriate and fair corrective action in
38
+ response to any instances of unacceptable behavior.
39
+
40
+ Project maintainers have the right and responsibility to remove, edit, or
41
+ reject comments, commits, code, wiki edits, issues, and other contributions
42
+ that are not aligned to this Code of Conduct, or to ban temporarily or
43
+ permanently any contributor for other behaviors that they deem inappropriate,
44
+ threatening, offensive, or harmful.
45
+
46
+ ### Scope
47
+
48
+ This Code of Conduct applies both within project spaces and in public spaces
49
+ when an individual is representing the project or its community. Examples of
50
+ representing a project or community include using an official project e-mail
51
+ address, posting via an official social media account, or acting as an appointed
52
+ representative at an online or offline event. Representation of a project may be
53
+ further defined and clarified by project maintainers.
54
+
55
+ ### Enforcement
56
+
57
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be
58
+ reported by contacting the project team at [INSERT EMAIL ADDRESS]. All
59
+ complaints will be reviewed and investigated and will result in a response that
60
+ is deemed necessary and appropriate to the circumstances. The project team is
61
+ obligated to maintain confidentiality with regard to the reporter of an incident.
62
+ Further details of specific enforcement policies may be posted separately.
63
+
64
+ Project maintainers who do not follow or enforce the Code of Conduct in good
65
+ faith may face temporary or permanent repercussions as determined by other
66
+ members of the project's leadership.
CONTRIBUTING.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Contributing
2
+
3
+ When contributing to this repository, please first discuss the change you wish to make via issue,
4
+ email, or any other method with the owners of this repository before making a change.
5
+
6
+ Please note we have a code of conduct, please follow it in all your interactions with the project.
7
+
8
+ ## Pull Request Process
9
+
10
+ 1. Ensure any install or build dependencies are removed before the end of the layer when doing a
11
+ build.
12
+ 2. Update the README.md with details of changes to the interface, this includes new environment
13
+ variables, exposed ports, useful file locations and container parameters.
14
+ 3. You may merge the Pull Request in once you have the sign-off of two other developers, or if you
15
+ do not have permission to do that, you may request the second reviewer to merge it for you.
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Harsh
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Shravan
3
+ emoji: πŸ‘€
4
+ colorFrom: indigo
5
+ colorTo: yellow
6
+ sdk: gradio
7
+ sdk_version: 3.35.2
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ duplicated_from: peb-peb/shravan
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ # from transcribe import transcribe
4
+ from sentiment_analysis import sentiment_analyser
5
+ from summary import summarizer
6
+ from topic import topic_gen
7
+ from data import data
8
+
9
+ def transcribe2():
10
+ response = requests.post("https://dwarkesh-whisper-speaker-recognition.hf.space/run/predict", json={
11
+ "data": [
12
+ {"name":"audio.wav","data":"data:audio/wav;base64,UklGRiQAAABXQVZFZm10IBAAAAABAAEARKwAAIhYAQACABAAZGF0YQAAAAA="},
13
+ 2,
14
+ ]}).json()
15
+
16
+ data = response["data"]
17
+
18
+ def main(audio_file, number_of_speakers):
19
+ # Audio to Text Converter
20
+ # text_data = transcribe(audio_file, number_of_speakers)
21
+ # print(text_data)
22
+ text_data = data
23
+ topic = topic_gen(text_data)[0]["generated_text"]
24
+ summary = summarizer(text_data)[0]["summary_text"]
25
+ sent_analy = sentiment_analyser(text_data)
26
+ sent_analysis = sent_analy[0]["label"] + " (" + str(float(sent_analy[0]["score"]) * 100) + "%)"
27
+ return topic, summary, sent_analysis
28
+
29
+ # UI Interface on the Hugging Face Page
30
+ with gr.Blocks() as demo:
31
+ gr.Markdown("# Shravan - Unlocking Value from Call Data")
32
+ with gr.Box():
33
+ with gr.Row():
34
+ with gr.Column():
35
+ audio_file = gr.Audio(label="Upload an Audio file (.wav)", source="upload", type="filepath")
36
+ number_of_speakers = gr.Number(label="Number of Speakers", value=2)
37
+ with gr.Row():
38
+ btn_clear = gr.ClearButton(value="Clear", components=[audio_file, number_of_speakers])
39
+ btn_submit = gr.Button(value="Submit")
40
+ with gr.Column():
41
+ topic = gr.Textbox(label="Title", placeholder="Title for Conversation")
42
+ summary = gr.Textbox(label="Short Summary", placeholder="Short Summary for Conversation")
43
+ sentiment_analysis = gr.Textbox(label="Sentiment Analysis", placeholder="Sentiment Analysis for Conversation")
44
+ btn_submit.click(fn=main, inputs=[audio_file, number_of_speakers], outputs=[topic, summary, sentiment_analysis])
45
+ gr.Markdown("## Examples")
46
+ gr.Examples(
47
+ examples=[
48
+ ["./examples/sample4.wav", 2],
49
+ ],
50
+ inputs=[audio_file, number_of_speakers],
51
+ outputs=[topic, summary, sentiment_analysis],
52
+ fn=main,
53
+ )
54
+ gr.Markdown(
55
+ """
56
+ NOTE: The Tool takes around 5mins to run. So be patient! ;)
57
+ See [https://github.com/peb-peb/shravan](https://github.com/peb-peb/shravan) for more details.
58
+ """
59
+ )
60
+
61
+ demo.launch()
data.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ data = """
2
+ SPEAKER 1 0:00:00
3
+ Thank you for calling Martha's Flowers. How may I assist you?
4
+ SPEAKER 2 0:00:04
5
+ Hello, I'd like to order flowers and I think you have what I'm looking for.
6
+ SPEAKER 1 0:00:08
7
+ I'd be happy to take care of your order. May I have your name please?
8
+ SPEAKER 2 0:00:10
9
+ Randall Thomas.
10
+ SPEAKER 1 0:00:12
11
+ Randall Thomas. Can you spell that for me?
12
+ SPEAKER 2 0:00:16
13
+ Randall, R-A-N-D-A-L-L, Thomas, T-H-O-M-A-N.
14
+ SPEAKER 1 0:00:22
15
+ Thank you for that information, Randall. May I have your home or office number and area code first?
16
+ SPEAKER 2 0:00:28
17
+ Area code 409, then 866-5088.
18
+ SPEAKER 1 0:00:34
19
+ That's 409-866-5088. Do you have a fax number or email address?
20
+ SPEAKER 2 0:00:42
21
+ My email is randall.thomas at gmail.com.
22
+ SPEAKER 1 0:00:46
23
+ randall.thomas at gmail.com.
24
+ """
examples/sample4.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:839bd656f8baf785086c86ef835c7b9ad7118c6adf14bb6bdf7607a327c6129b
3
+ size 8820078
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ git+https://github.com/pyannote/pyannote-audio
2
+ git+https://github.com/openai/whisper.git
3
+ git+https://github.com/huggingface/transformers
4
+ gradio
sentiment_analysis.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+
3
+ def sentiment_analyser(text):
4
+ sent = pipeline("sentiment-analysis",model="siebert/sentiment-roberta-large-english")
5
+ return sent(text)
summary.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+
3
+ def summarizer(text):
4
+ summ = pipeline("summarization", model="knkarthick/MEETING-SUMMARY-BART-LARGE-XSUM-SAMSUM-DIALOGSUM")
5
+ return summ(text)
topic.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+
3
+ def topic_gen(text):
4
+ topic = pipeline("text2text-generation", model="knkarthick/TOPIC-DIALOGSUM")
5
+ return topic(text)
transcribe.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisper
2
+ import datetime
3
+ import subprocess
4
+ import wave
5
+ import contextlib
6
+
7
+
8
+ import torch
9
+ import pyannote.audio
10
+ from pyannote.audio.pipelines.speaker_verification import PretrainedSpeakerEmbedding
11
+ from pyannote.audio import Audio
12
+ from pyannote.core import Segment
13
+ from sklearn.cluster import AgglomerativeClustering
14
+ import numpy as np
15
+
16
+ model = whisper.load_model("large-v2")
17
+ embedding_model = PretrainedSpeakerEmbedding(
18
+ "speechbrain/spkrec-ecapa-voxceleb",
19
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
20
+ )
21
+
22
+ def transcribe(audio, num_speakers):
23
+ print(type(audio))
24
+ path, error = convert_to_wav(audio)
25
+ if error is not None:
26
+ return error
27
+
28
+ duration = get_duration(path)
29
+ if duration > 4 * 60 * 60:
30
+ return "Audio duration too long"
31
+
32
+ result = model.transcribe(path)
33
+ segments = result["segments"]
34
+
35
+ num_speakers = min(max(round(num_speakers), 1), len(segments))
36
+ if len(segments) == 1:
37
+ segments[0]['speaker'] = 'SPEAKER 1'
38
+ else:
39
+ embeddings = make_embeddings(path, segments, duration)
40
+ add_speaker_labels(segments, embeddings, num_speakers)
41
+ output = get_output(segments)
42
+ return output
43
+
44
+ def convert_to_wav(path):
45
+ if path[-3:] != 'wav':
46
+ new_path = '.'.join(path.split('.')[:-1]) + '.wav'
47
+ try:
48
+ subprocess.call(['ffmpeg', '-i', path, new_path, '-y'])
49
+ except:
50
+ return path, 'Error: Could not convert file to .wav'
51
+ path = new_path
52
+ return path, None
53
+
54
+ def get_duration(path):
55
+ with contextlib.closing(wave.open(path,'r')) as f:
56
+ frames = f.getnframes()
57
+ rate = f.getframerate()
58
+ return frames / float(rate)
59
+
60
+ def make_embeddings(path, segments, duration):
61
+ embeddings = np.zeros(shape=(len(segments), 192))
62
+ for i, segment in enumerate(segments):
63
+ embeddings[i] = segment_embedding(path, segment, duration)
64
+ return np.nan_to_num(embeddings)
65
+
66
+ audio = Audio()
67
+
68
+ def segment_embedding(path, segment, duration):
69
+ start = segment["start"]
70
+ # Whisper overshoots the end timestamp in the last segment
71
+ end = min(duration, segment["end"])
72
+ clip = Segment(start, end)
73
+ waveform, sample_rate = audio.crop(path, clip)
74
+ return embedding_model(waveform[None])
75
+
76
+ def add_speaker_labels(segments, embeddings, num_speakers):
77
+ """Add speaker labels"""
78
+ clustering = AgglomerativeClustering(num_speakers).fit(embeddings)
79
+ labels = clustering.labels_
80
+ for i in range(len(segments)):
81
+ segments[i]["speaker"] = 'SPEAKER ' + str(labels[i] + 1)
82
+
83
+ def time(secs):
84
+ """Function to return time delta"""
85
+ return datetime.timedelta(seconds=round(secs))
86
+
87
+ def get_output(segments):
88
+ """Format and generate the output string"""
89
+ output = ''
90
+ for (i, segment) in enumerate(segments):
91
+ if i == 0 or segments[i - 1]["speaker"] != segment["speaker"]:
92
+ if i != 0:
93
+ output += '\n\n'
94
+ output += segment["speaker"] + ' ' + str(time(segment["start"])) + '\n'
95
+ output += segment["text"][1:] + ' '
96
+ return output