Commit
·
5b00b76
verified
·
0
Parent(s):
Initial release
Browse files- .github/workflows/ci-cd.yaml +33 -0
- .gitignore +4 -0
- .streamlit/config.toml +8 -0
- LICENSE +201 -0
- README.md +80 -0
- app.py +236 -0
- languages.py +101 -0
- packages.txt +1 -0
- requirements.txt +10 -0
- utils.py +96 -0
.github/workflows/ci-cd.yaml
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright Jiaqi Liu
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
---
|
15 |
+
name: CI/CD
|
16 |
+
|
17 |
+
on:
|
18 |
+
pull_request:
|
19 |
+
push:
|
20 |
+
branches: [master]
|
21 |
+
|
22 |
+
jobs:
|
23 |
+
sync-to-huggingface-space:
|
24 |
+
runs-on: ubuntu-latest
|
25 |
+
steps:
|
26 |
+
- uses: actions/checkout@v3
|
27 |
+
with:
|
28 |
+
fetch-depth: 0
|
29 |
+
lfs: true
|
30 |
+
- name: Push to hub
|
31 |
+
run: git push https://QubitPi:$HF_TOKEN@huggingface.co/spaces/QubitPi/tiger master:main -f
|
32 |
+
env:
|
33 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
.gitignore
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.venv
|
2 |
+
.idea/
|
3 |
+
.DS_Store
|
4 |
+
__pycache__
|
.streamlit/config.toml
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[theme]
|
2 |
+
primaryColor="#F63366"
|
3 |
+
backgroundColor="#FFFFFF"
|
4 |
+
secondaryBackgroundColor="#F0F2F6"
|
5 |
+
textColor="#262730"
|
6 |
+
font="sans serif"
|
7 |
+
[server]
|
8 |
+
maxUploadSize=1028
|
LICENSE
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Apache License
|
2 |
+
Version 2.0, January 2004
|
3 |
+
http://www.apache.org/licenses/
|
4 |
+
|
5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
6 |
+
|
7 |
+
1. Definitions.
|
8 |
+
|
9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
11 |
+
|
12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
13 |
+
the copyright owner that is granting the License.
|
14 |
+
|
15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
16 |
+
other entities that control, are controlled by, or are under common
|
17 |
+
control with that entity. For the purposes of this definition,
|
18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
19 |
+
direction or management of such entity, whether by contract or
|
20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
22 |
+
|
23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
24 |
+
exercising permissions granted by this License.
|
25 |
+
|
26 |
+
"Source" form shall mean the preferred form for making modifications,
|
27 |
+
including but not limited to software source code, documentation
|
28 |
+
source, and configuration files.
|
29 |
+
|
30 |
+
"Object" form shall mean any form resulting from mechanical
|
31 |
+
transformation or translation of a Source form, including but
|
32 |
+
not limited to compiled object code, generated documentation,
|
33 |
+
and conversions to other media types.
|
34 |
+
|
35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
36 |
+
Object form, made available under the License, as indicated by a
|
37 |
+
copyright notice that is included in or attached to the work
|
38 |
+
(an example is provided in the Appendix below).
|
39 |
+
|
40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
41 |
+
form, that is based on (or derived from) the Work and for which the
|
42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
44 |
+
of this License, Derivative Works shall not include works that remain
|
45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
46 |
+
the Work and Derivative Works thereof.
|
47 |
+
|
48 |
+
"Contribution" shall mean any work of authorship, including
|
49 |
+
the original version of the Work and any modifications or additions
|
50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
51 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
54 |
+
means any form of electronic, verbal, or written communication sent
|
55 |
+
to the Licensor or its representatives, including but not limited to
|
56 |
+
communication on electronic mailing lists, source code control systems,
|
57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
59 |
+
excluding communication that is conspicuously marked or otherwise
|
60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
61 |
+
|
62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
63 |
+
on behalf of whom a Contribution has been received by Licensor and
|
64 |
+
subsequently incorporated within the Work.
|
65 |
+
|
66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
71 |
+
Work and such Derivative Works in Source or Object form.
|
72 |
+
|
73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
76 |
+
(except as stated in this section) patent license to make, have made,
|
77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
78 |
+
where such license applies only to those patent claims licensable
|
79 |
+
by such Contributor that are necessarily infringed by their
|
80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
82 |
+
institute patent litigation against any entity (including a
|
83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
84 |
+
or a Contribution incorporated within the Work constitutes direct
|
85 |
+
or contributory patent infringement, then any patent licenses
|
86 |
+
granted to You under this License for that Work shall terminate
|
87 |
+
as of the date such litigation is filed.
|
88 |
+
|
89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
90 |
+
Work or Derivative Works thereof in any medium, with or without
|
91 |
+
modifications, and in Source or Object form, provided that You
|
92 |
+
meet the following conditions:
|
93 |
+
|
94 |
+
(a) You must give any other recipients of the Work or
|
95 |
+
Derivative Works a copy of this License; and
|
96 |
+
|
97 |
+
(b) You must cause any modified files to carry prominent notices
|
98 |
+
stating that You changed the files; and
|
99 |
+
|
100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
101 |
+
that You distribute, all copyright, patent, trademark, and
|
102 |
+
attribution notices from the Source form of the Work,
|
103 |
+
excluding those notices that do not pertain to any part of
|
104 |
+
the Derivative Works; and
|
105 |
+
|
106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
107 |
+
distribution, then any Derivative Works that You distribute must
|
108 |
+
include a readable copy of the attribution notices contained
|
109 |
+
within such NOTICE file, excluding those notices that do not
|
110 |
+
pertain to any part of the Derivative Works, in at least one
|
111 |
+
of the following places: within a NOTICE text file distributed
|
112 |
+
as part of the Derivative Works; within the Source form or
|
113 |
+
documentation, if provided along with the Derivative Works; or,
|
114 |
+
within a display generated by the Derivative Works, if and
|
115 |
+
wherever such third-party notices normally appear. The contents
|
116 |
+
of the NOTICE file are for informational purposes only and
|
117 |
+
do not modify the License. You may add Your own attribution
|
118 |
+
notices within Derivative Works that You distribute, alongside
|
119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
120 |
+
that such additional attribution notices cannot be construed
|
121 |
+
as modifying the License.
|
122 |
+
|
123 |
+
You may add Your own copyright statement to Your modifications and
|
124 |
+
may provide additional or different license terms and conditions
|
125 |
+
for use, reproduction, or distribution of Your modifications, or
|
126 |
+
for any such Derivative Works as a whole, provided Your use,
|
127 |
+
reproduction, and distribution of the Work otherwise complies with
|
128 |
+
the conditions stated in this License.
|
129 |
+
|
130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
132 |
+
by You to the Licensor shall be under the terms and conditions of
|
133 |
+
this License, without any additional terms or conditions.
|
134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
135 |
+
the terms of any separate license agreement you may have executed
|
136 |
+
with Licensor regarding such Contributions.
|
137 |
+
|
138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
140 |
+
except as required for reasonable and customary use in describing the
|
141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
142 |
+
|
143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
144 |
+
agreed to in writing, Licensor provides the Work (and each
|
145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
147 |
+
implied, including, without limitation, any warranties or conditions
|
148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
150 |
+
appropriateness of using or redistributing the Work and assume any
|
151 |
+
risks associated with Your exercise of permissions under this License.
|
152 |
+
|
153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
154 |
+
whether in tort (including negligence), contract, or otherwise,
|
155 |
+
unless required by applicable law (such as deliberate and grossly
|
156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
157 |
+
liable to You for damages, including any direct, indirect, special,
|
158 |
+
incidental, or consequential damages of any character arising as a
|
159 |
+
result of this License or out of the use or inability to use the
|
160 |
+
Work (including but not limited to damages for loss of goodwill,
|
161 |
+
work stoppage, computer failure or malfunction, or any and all
|
162 |
+
other commercial damages or losses), even if such Contributor
|
163 |
+
has been advised of the possibility of such damages.
|
164 |
+
|
165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
168 |
+
or other liability obligations and/or rights consistent with this
|
169 |
+
License. However, in accepting such obligations, You may act only
|
170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
171 |
+
of any other Contributor, and only if You agree to indemnify,
|
172 |
+
defend, and hold each Contributor harmless for any liability
|
173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
174 |
+
of your accepting any such warranty or additional liability.
|
175 |
+
|
176 |
+
END OF TERMS AND CONDITIONS
|
177 |
+
|
178 |
+
APPENDIX: How to apply the Apache License to your work.
|
179 |
+
|
180 |
+
To apply the Apache License to your work, attach the following
|
181 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
182 |
+
replaced with your own identifying information. (Don't include
|
183 |
+
the brackets!) The text should be enclosed in the appropriate
|
184 |
+
comment syntax for the file format. We also recommend that a
|
185 |
+
file or class name and description of purpose be included on the
|
186 |
+
same "printed page" as the copyright notice for easier
|
187 |
+
identification within third-party archives.
|
188 |
+
|
189 |
+
Copyright 2024 Jiaqi Liu
|
190 |
+
|
191 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
192 |
+
you may not use this file except in compliance with the License.
|
193 |
+
You may obtain a copy of the License at
|
194 |
+
|
195 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
196 |
+
|
197 |
+
Unless required by applicable law or agreed to in writing, software
|
198 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
199 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
200 |
+
See the License for the specific language governing permissions and
|
201 |
+
limitations under the License.
|
README.md
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Tiger
|
3 |
+
emoji: 🎥
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: indigo
|
6 |
+
sdk: streamlit
|
7 |
+
sdk_version: 1.39.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
license: apache-2.0
|
11 |
+
short_description: Movie transcription for language learners
|
12 |
+
---
|
13 |
+
|
14 |
+
[![Hugging Face space badge]][Hugging Face space URL]
|
15 |
+
[![Hugging Face sync status badge]][Hugging Face sync status URL]
|
16 |
+
[![Apache License Badge]][Apache License, Version 2.0]
|
17 |
+
|
18 |
+
Tiger
|
19 |
+
=====
|
20 |
+
|
21 |
+
__Tiger__ is an automatic speech recognition application that takes a video file as input and generates a video with
|
22 |
+
subtitles as well as downloadable .txt, .vtt, .srt transcription files. Tiger generates transcriptions using the
|
23 |
+
[OpenAI Whisper](https://openai.com/blog/whisper) models.
|
24 |
+
|
25 |
+
The app is available on [Hugging Face space][Project Tiger on Hugging Face]. Please check it out.
|
26 |
+
|
27 |
+
How Project Tiger Started
|
28 |
+
-------------------------
|
29 |
+
|
30 |
+
I don't believe NLP is inherently AI. [Project Tiger](), however, enlightened me on what way should AI makes NLP more
|
31 |
+
powerful for people.
|
32 |
+
|
33 |
+
Paraphrasing movie lines has been my favorite approach to learn a foreign language. This was why I was able to speak
|
34 |
+
close-native English in early days. I used this same tactic for German today as well.
|
35 |
+
|
36 |
+
One morning I was studying German with the famous German WW2 movie
|
37 |
+
[_Generation War_](https://youtu.be/TmyGPX23px4?si=wk3V62vLUe9zFzfo). Managing to find its subtitles[^1][^2][^3] were
|
38 |
+
easy for me. A problem, however, soon arose as I proceeded with them: _the subtitles had errors - missing couple of
|
39 |
+
phrases every few other lines_. This was a big NO for language learners like me. In old days 10 years ago, I would've
|
40 |
+
reached a dead end...
|
41 |
+
|
42 |
+
[^1]: https://www.opensubtitles.org/en/subtitles/6243166/generation-war-eine-andere-zeit-de
|
43 |
+
[^2]: https://www.opensubtitles.org/en/subtitles/6243167/generation-war-ein-anderer-krieg-de
|
44 |
+
[^3]: https://www.opensubtitles.org/en/subtitles/6243168/generation-war-ein-anderes-land-de
|
45 |
+
|
46 |
+
Almost immediately, however, I had this idea of having AI figure out those missing words for me; so I tried
|
47 |
+
[OpenAI's Whisper](https://huggingface.co/spaces/openai/whisper) which, quite surprisingly, yield 100% accurate results.
|
48 |
+
That was the first occasion where AI produced significant solid values for my life as a language learner.
|
49 |
+
|
50 |
+
Being so excited, I decided to industrialize this process and hereby present it as [Project Tiger on Hugging Face], an
|
51 |
+
__AI movie transcription service that targets specifically for language learners__
|
52 |
+
|
53 |
+
> [!NOTE]
|
54 |
+
>
|
55 |
+
> On name "__Tiger__"
|
56 |
+
>
|
57 |
+
> The project name _tiger_ was taken from the _German heavy tank of World War II -
|
58 |
+
> [Tiger I](https://tanks-encyclopedia.com/ww2/germany/panzer-vi_tiger.php#index17)_.
|
59 |
+
>
|
60 |
+
> I chose this name because the project idea was spawned from the study of German language. More importantly, _Tiger I_
|
61 |
+
> has been widely recognized as a revolutionary tank being a crown jewels in the history of warfare technology:
|
62 |
+
>
|
63 |
+
> > _"It is tempting to view the appearance of the Tiger tank as revolutionary, ... the end result was a machine that
|
64 |
+
> > represented a quantum leap forward in tank design and changed everything - forever"_[^4]
|
65 |
+
>
|
66 |
+
> As _Tiger_ had steered the direction of next-generation heavy tank, this project has also revolutionized my view
|
67 |
+
> toward AI subconsciously and serves as an empirical guide on how I should make NLP more powerful for people with AI.
|
68 |
+
|
69 |
+
[^4]: _[Tiger tank : Panzerkampfwagen VI Tiger I Ausf. E (SdKfz 181): owner's workshop manual](https://a.co/d/28OzPsK)_, The Tank Museum, 2011. p.13
|
70 |
+
|
71 |
+
[Apache License Badge]: https://img.shields.io/badge/Apache%202.0-F25910.svg?style=for-the-badge&logo=Apache&logoColor=white
|
72 |
+
[Apache License, Version 2.0]: https://www.apache.org/licenses/LICENSE-2.0
|
73 |
+
|
74 |
+
[Hugging Face space badge]: https://img.shields.io/badge/Hugging%20Face%20Space-tiger-FFD21E?style=for-the-badge&logo=huggingface&logoColor=white
|
75 |
+
[Hugging Face space URL]: https://huggingface.co/spaces/QubitPi/tiger
|
76 |
+
|
77 |
+
[Hugging Face sync status badge]: https://img.shields.io/github/actions/workflow/status/QubitPi/tiger/ci-cd.yaml?branch=master&style=for-the-badge&logo=github&logoColor=white&label=Hugging%20Face%20Sync%20Up
|
78 |
+
[Hugging Face sync status URL]: https://github.com/QubitPi/tiger/actions/workflows/ci-cd.yaml
|
79 |
+
|
80 |
+
[Project Tiger on Hugging Face]: https://huggingface.co/spaces/QubitPi/tiger
|
app.py
ADDED
@@ -0,0 +1,236 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import whisper
|
2 |
+
import streamlit as st
|
3 |
+
from streamlit_lottie import st_lottie
|
4 |
+
from utils import write_vtt, write_srt
|
5 |
+
import ffmpeg
|
6 |
+
import requests
|
7 |
+
from typing import Iterator
|
8 |
+
from io import StringIO
|
9 |
+
import numpy as np
|
10 |
+
import pathlib
|
11 |
+
import os
|
12 |
+
|
13 |
+
st.set_page_config(page_title="Auto Subtitled Video Generator", page_icon=":movie_camera:", layout="wide")
|
14 |
+
|
15 |
+
|
16 |
+
# Define a function that we can use to load lottie files from a link.
|
17 |
+
def load_lottieurl(url: str):
|
18 |
+
r = requests.get(url)
|
19 |
+
if r.status_code != 200:
|
20 |
+
return None
|
21 |
+
return r.json()
|
22 |
+
|
23 |
+
|
24 |
+
APP_DIR = pathlib.Path(__file__).parent.absolute()
|
25 |
+
|
26 |
+
LOCAL_DIR = APP_DIR / "local"
|
27 |
+
LOCAL_DIR.mkdir(exist_ok=True)
|
28 |
+
save_dir = LOCAL_DIR / "output"
|
29 |
+
save_dir.mkdir(exist_ok=True)
|
30 |
+
|
31 |
+
loaded_model = whisper.load_model("base")
|
32 |
+
current_size = "None"
|
33 |
+
|
34 |
+
col1, col2 = st.columns([1, 3])
|
35 |
+
with col1:
|
36 |
+
lottie = load_lottieurl("https://assets1.lottiefiles.com/packages/lf20_HjK9Ol.json")
|
37 |
+
st_lottie(lottie)
|
38 |
+
|
39 |
+
with col2:
|
40 |
+
st.write("""
|
41 |
+
## Auto Subtitled Video Generator
|
42 |
+
##### Upload a video file and get a video with subtitles.
|
43 |
+
###### ➠ If you want to transcribe the video in its original language, select the task as "Transcribe"
|
44 |
+
###### ➠ If you want to translate the subtitles to English, select the task as "Translate"
|
45 |
+
###### I recommend starting with the base model and then experimenting with the larger models, the small and medium models often work well. """)
|
46 |
+
|
47 |
+
|
48 |
+
def change_model(current_size, size):
|
49 |
+
if current_size != size:
|
50 |
+
loaded_model = whisper.load_model(size)
|
51 |
+
return loaded_model
|
52 |
+
else:
|
53 |
+
raise Exception("Model size is the same as the current size.")
|
54 |
+
|
55 |
+
|
56 |
+
def inferecence(_loaded_model, uploaded_file, task):
|
57 |
+
with open(f"{save_dir}/input.mp4", "wb") as f:
|
58 |
+
f.write(uploaded_file.read())
|
59 |
+
audio = ffmpeg.input(f"{save_dir}/input.mp4")
|
60 |
+
audio = ffmpeg.output(audio, f"{save_dir}/output.wav", acodec="pcm_s16le", ac=1, ar="16k")
|
61 |
+
ffmpeg.run(audio, overwrite_output=True)
|
62 |
+
if task == "Transcribe":
|
63 |
+
options = dict(task="transcribe", best_of=5)
|
64 |
+
results = _loaded_model.transcribe(f"{save_dir}/output.wav", **options)
|
65 |
+
vtt = getSubs(results["segments"], "vtt", 80)
|
66 |
+
srt = getSubs(results["segments"], "srt", 80)
|
67 |
+
lang = results["language"]
|
68 |
+
return results["text"], vtt, srt, lang
|
69 |
+
elif task == "Translate":
|
70 |
+
options = dict(task="translate", best_of=5)
|
71 |
+
results = _loaded_model.transcribe(f"{save_dir}/output.wav", **options)
|
72 |
+
vtt = getSubs(results["segments"], "vtt", 80)
|
73 |
+
srt = getSubs(results["segments"], "srt", 80)
|
74 |
+
lang = results["language"]
|
75 |
+
return results["text"], vtt, srt, lang
|
76 |
+
else:
|
77 |
+
raise ValueError("Task not supported")
|
78 |
+
|
79 |
+
|
80 |
+
def getSubs(segments: Iterator[dict], format: str, maxLineWidth: int) -> str:
|
81 |
+
segmentStream = StringIO()
|
82 |
+
|
83 |
+
if format == 'vtt':
|
84 |
+
write_vtt(segments, file=segmentStream, maxLineWidth=maxLineWidth)
|
85 |
+
elif format == 'srt':
|
86 |
+
write_srt(segments, file=segmentStream, maxLineWidth=maxLineWidth)
|
87 |
+
else:
|
88 |
+
raise Exception("Unknown format " + format)
|
89 |
+
|
90 |
+
segmentStream.seek(0)
|
91 |
+
return segmentStream.read()
|
92 |
+
|
93 |
+
|
94 |
+
def generate_subtitled_video(video, audio, transcript):
|
95 |
+
video_file = ffmpeg.input(video)
|
96 |
+
audio_file = ffmpeg.input(audio)
|
97 |
+
ffmpeg.concat(video_file.filter("subtitles", transcript), audio_file, v=1, a=1).output("final.mp4").run(quiet=True,
|
98 |
+
overwrite_output=True)
|
99 |
+
video_with_subs = open("final.mp4", "rb")
|
100 |
+
return video_with_subs
|
101 |
+
|
102 |
+
|
103 |
+
def main():
|
104 |
+
size = st.selectbox(
|
105 |
+
"Select Model Size (The larger the model, the more accurate the transcription will be, but it will take longer)",
|
106 |
+
["tiny", "base", "small", "medium", "large"], index=1)
|
107 |
+
loaded_model = change_model(current_size, size)
|
108 |
+
st.write(f"Model is {'multilingual' if loaded_model.is_multilingual else 'English-only'} "
|
109 |
+
f"and has {sum(np.prod(p.shape) for p in loaded_model.parameters()):,} parameters.")
|
110 |
+
input_file = st.file_uploader("File", type=["mp4", "avi", "mov", "mkv"])
|
111 |
+
# get the name of the input_file
|
112 |
+
if input_file is not None:
|
113 |
+
filename = input_file.name[:-4]
|
114 |
+
else:
|
115 |
+
filename = None
|
116 |
+
task = st.selectbox("Select Task", ["Transcribe", "Translate"], index=0)
|
117 |
+
if task == "Transcribe":
|
118 |
+
if st.button("Transcribe"):
|
119 |
+
results = inferecence(loaded_model, input_file, task)
|
120 |
+
col3, col4 = st.columns(2)
|
121 |
+
col5, col6, col7, col8 = st.columns(4)
|
122 |
+
col9, col10 = st.columns(2)
|
123 |
+
with col3:
|
124 |
+
st.video(input_file)
|
125 |
+
|
126 |
+
with open("transcript.txt", "w+", encoding='utf8') as f:
|
127 |
+
f.writelines(results[0])
|
128 |
+
f.close()
|
129 |
+
with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
|
130 |
+
datatxt = f.read()
|
131 |
+
|
132 |
+
with open("transcript.vtt", "w+", encoding='utf8') as f:
|
133 |
+
f.writelines(results[1])
|
134 |
+
f.close()
|
135 |
+
with open(os.path.join(os.getcwd(), "transcript.vtt"), "rb") as f:
|
136 |
+
datavtt = f.read()
|
137 |
+
|
138 |
+
with open("transcript.srt", "w+", encoding='utf8') as f:
|
139 |
+
f.writelines(results[2])
|
140 |
+
f.close()
|
141 |
+
with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
|
142 |
+
datasrt = f.read()
|
143 |
+
|
144 |
+
with col5:
|
145 |
+
st.download_button(label="Download Transcript (.txt)",
|
146 |
+
data=datatxt,
|
147 |
+
file_name="transcript.txt")
|
148 |
+
with col6:
|
149 |
+
st.download_button(label="Download Transcript (.vtt)",
|
150 |
+
data=datavtt,
|
151 |
+
file_name="transcript.vtt")
|
152 |
+
with col7:
|
153 |
+
st.download_button(label="Download Transcript (.srt)",
|
154 |
+
data=datasrt,
|
155 |
+
file_name="transcript.srt")
|
156 |
+
with col9:
|
157 |
+
st.success(
|
158 |
+
"You can download the transcript in .srt format, edit it (if you need to) and upload it to YouTube to create subtitles for your video.")
|
159 |
+
with col10:
|
160 |
+
st.info(
|
161 |
+
"Streamlit refreshes after the download button is clicked. The data is cached so you can download the transcript again without having to transcribe the video again.")
|
162 |
+
|
163 |
+
with col4:
|
164 |
+
with st.spinner("Generating Subtitled Video"):
|
165 |
+
video_with_subs = generate_subtitled_video(f"{save_dir}/input.mp4", f"{save_dir}/output.wav",
|
166 |
+
"transcript.srt")
|
167 |
+
st.video(video_with_subs)
|
168 |
+
st.snow()
|
169 |
+
with col8:
|
170 |
+
st.download_button(label="Download Video with Subtitles",
|
171 |
+
data=video_with_subs,
|
172 |
+
file_name=f"{filename}_with_subs.mp4")
|
173 |
+
elif task == "Translate":
|
174 |
+
if st.button("Translate to English"):
|
175 |
+
results = inferecence(loaded_model, input_file, task)
|
176 |
+
col3, col4 = st.columns(2)
|
177 |
+
col5, col6, col7, col8 = st.columns(4)
|
178 |
+
col9, col10 = st.columns(2)
|
179 |
+
with col3:
|
180 |
+
st.video(input_file)
|
181 |
+
|
182 |
+
with open("transcript.txt", "w+", encoding='utf8') as f:
|
183 |
+
f.writelines(results[0])
|
184 |
+
f.close()
|
185 |
+
with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
|
186 |
+
datatxt = f.read()
|
187 |
+
|
188 |
+
with open("transcript.vtt", "w+", encoding='utf8') as f:
|
189 |
+
f.writelines(results[1])
|
190 |
+
f.close()
|
191 |
+
with open(os.path.join(os.getcwd(), "transcript.vtt"), "rb") as f:
|
192 |
+
datavtt = f.read()
|
193 |
+
|
194 |
+
with open("transcript.srt", "w+", encoding='utf8') as f:
|
195 |
+
f.writelines(results[2])
|
196 |
+
f.close()
|
197 |
+
with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
|
198 |
+
datasrt = f.read()
|
199 |
+
|
200 |
+
with col5:
|
201 |
+
st.download_button(label="Download Transcript (.txt)",
|
202 |
+
data=datatxt,
|
203 |
+
file_name="transcript.txt")
|
204 |
+
with col6:
|
205 |
+
st.download_button(label="Download Transcript (.vtt)",
|
206 |
+
data=datavtt,
|
207 |
+
file_name="transcript.vtt")
|
208 |
+
with col7:
|
209 |
+
st.download_button(label="Download Transcript (.srt)",
|
210 |
+
data=datasrt,
|
211 |
+
file_name="transcript.srt")
|
212 |
+
with col9:
|
213 |
+
st.success(
|
214 |
+
"You can download the transcript in .srt format, edit it (if you need to) and upload it to YouTube to create subtitles for your video.")
|
215 |
+
with col10:
|
216 |
+
st.info(
|
217 |
+
"Streamlit refreshes after the download button is clicked. The data is cached so you can download the transcript again without having to transcribe the video again.")
|
218 |
+
|
219 |
+
with col4:
|
220 |
+
with st.spinner("Generating Subtitled Video"):
|
221 |
+
video_with_subs = generate_subtitled_video(f"{save_dir}/input.mp4", f"{save_dir}/output.wav",
|
222 |
+
"transcript.srt")
|
223 |
+
st.video(video_with_subs)
|
224 |
+
st.snow()
|
225 |
+
with col8:
|
226 |
+
st.download_button(label="Download Video with Subtitles ",
|
227 |
+
data=video_with_subs,
|
228 |
+
file_name=f"{filename}_with_subs.mp4")
|
229 |
+
else:
|
230 |
+
st.error("Please select a task.")
|
231 |
+
|
232 |
+
|
233 |
+
if __name__ == "__main__":
|
234 |
+
main()
|
235 |
+
st.markdown(
|
236 |
+
"###### Made with :heart: by [@QubitPi](https://github.com/QubitPi) [](https://buymeacoffee.com/qubitpi)")
|
languages.py
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
LANGUAGES = {
|
2 |
+
"en": "eng",
|
3 |
+
"zh": "zho",
|
4 |
+
"de": "deu",
|
5 |
+
"es": "spa",
|
6 |
+
"ru": "rus",
|
7 |
+
"ko": "kor",
|
8 |
+
"fr": "fra",
|
9 |
+
"ja": "jpn",
|
10 |
+
"pt": "por",
|
11 |
+
"tr": "tur",
|
12 |
+
"pl": "pol",
|
13 |
+
"ca": "cat",
|
14 |
+
"nl": "nld",
|
15 |
+
"ar": "ara",
|
16 |
+
"sv": "swe",
|
17 |
+
"it": "ita",
|
18 |
+
"id": "ind",
|
19 |
+
"hi": "hin",
|
20 |
+
"fi": "fin",
|
21 |
+
"vi": "vie",
|
22 |
+
"iw": "heb",
|
23 |
+
"uk": "ukr",
|
24 |
+
"el": "ell",
|
25 |
+
"ms": "msa",
|
26 |
+
"cs": "ces",
|
27 |
+
"ro": "ron",
|
28 |
+
"da": "dan",
|
29 |
+
"hu": "hun",
|
30 |
+
"ta": "tam",
|
31 |
+
"no": "nor",
|
32 |
+
"th": "tha",
|
33 |
+
"ur": "urd",
|
34 |
+
"hr": "hrv",
|
35 |
+
"bg": "bul",
|
36 |
+
"lt": "lit",
|
37 |
+
"la": "lat",
|
38 |
+
"mi": "mri",
|
39 |
+
"ml": "mal",
|
40 |
+
"cy": "cym",
|
41 |
+
"sk": "slk",
|
42 |
+
"te": "tel",
|
43 |
+
"fa": "fas",
|
44 |
+
"lv": "lav",
|
45 |
+
"bn": "ben",
|
46 |
+
"sr": "srp",
|
47 |
+
"az": "aze",
|
48 |
+
"sl": "slv",
|
49 |
+
"kn": "kan",
|
50 |
+
"et": "est",
|
51 |
+
"mk": "mkd",
|
52 |
+
"br": "bre",
|
53 |
+
"eu": "eus",
|
54 |
+
"is": "isl",
|
55 |
+
"hy": "hye",
|
56 |
+
"ne": "nep",
|
57 |
+
"mn": "mon",
|
58 |
+
"bs": "bos",
|
59 |
+
"kk": "kaz",
|
60 |
+
"sq": "sqi",
|
61 |
+
"sw": "swa",
|
62 |
+
"gl": "glg",
|
63 |
+
"mr": "mar",
|
64 |
+
"pa": "pan",
|
65 |
+
"si": "sin",
|
66 |
+
"km": "khm",
|
67 |
+
"sn": "sna",
|
68 |
+
"yo": "yor",
|
69 |
+
"so": "som",
|
70 |
+
"af": "afr",
|
71 |
+
"oc": "oci",
|
72 |
+
"ka": "kat",
|
73 |
+
"be": "bel",
|
74 |
+
"tg": "tgk",
|
75 |
+
"sd": "snd",
|
76 |
+
"gu": "guj",
|
77 |
+
"am": "amh",
|
78 |
+
"yi": "yid",
|
79 |
+
"lo": "lao",
|
80 |
+
"uz": "uzb",
|
81 |
+
"fo": "fao",
|
82 |
+
"ht": "hat",
|
83 |
+
"ps": "pus",
|
84 |
+
"tk": "tuk",
|
85 |
+
"nn": "nno",
|
86 |
+
"mt": "mlt",
|
87 |
+
"sa": "san",
|
88 |
+
"lb": "ltz",
|
89 |
+
"my": "mya",
|
90 |
+
"bo": "bod",
|
91 |
+
"tl": "tgl",
|
92 |
+
"mg": "mlg",
|
93 |
+
"as": "asm",
|
94 |
+
"tt": "tat",
|
95 |
+
"haw": "haw",
|
96 |
+
"ln": "lin",
|
97 |
+
"ha": "hau",
|
98 |
+
"ba": "bak",
|
99 |
+
"jw": "jav",
|
100 |
+
"su": "sun",
|
101 |
+
}
|
packages.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
ffmpeg
|
requirements.txt
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ffmpeg
|
2 |
+
ffmpeg_python
|
3 |
+
numpy
|
4 |
+
pytubefix
|
5 |
+
requests
|
6 |
+
streamlit
|
7 |
+
streamlit_lottie
|
8 |
+
torch
|
9 |
+
transformers
|
10 |
+
openai-whisper
|
utils.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import textwrap
|
2 |
+
import zlib
|
3 |
+
from typing import Iterator, TextIO
|
4 |
+
|
5 |
+
|
6 |
+
def exact_div(x, y):
|
7 |
+
assert x % y == 0
|
8 |
+
return x // y
|
9 |
+
|
10 |
+
|
11 |
+
def str2bool(string):
|
12 |
+
str2val = {"True": True, "False": False}
|
13 |
+
if string in str2val:
|
14 |
+
return str2val[string]
|
15 |
+
else:
|
16 |
+
raise ValueError(f"Expected one of {set(str2val.keys())}, got {string}")
|
17 |
+
|
18 |
+
|
19 |
+
def optional_int(string):
|
20 |
+
return None if string == "None" else int(string)
|
21 |
+
|
22 |
+
|
23 |
+
def optional_float(string):
|
24 |
+
return None if string == "None" else float(string)
|
25 |
+
|
26 |
+
|
27 |
+
def compression_ratio(text) -> float:
|
28 |
+
return len(text) / len(zlib.compress(text.encode("utf-8")))
|
29 |
+
|
30 |
+
|
31 |
+
def format_timestamp(seconds: float, always_include_hours: bool = False, fractionalSeperator: str = '.'):
|
32 |
+
assert seconds >= 0, "non-negative timestamp expected"
|
33 |
+
milliseconds = round(seconds * 1000.0)
|
34 |
+
|
35 |
+
hours = milliseconds // 3_600_000
|
36 |
+
milliseconds -= hours * 3_600_000
|
37 |
+
|
38 |
+
minutes = milliseconds // 60_000
|
39 |
+
milliseconds -= minutes * 60_000
|
40 |
+
|
41 |
+
seconds = milliseconds // 1_000
|
42 |
+
milliseconds -= seconds * 1_000
|
43 |
+
|
44 |
+
hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else ""
|
45 |
+
return f"{hours_marker}{minutes:02d}:{seconds:02d}{fractionalSeperator}{milliseconds:03d}"
|
46 |
+
|
47 |
+
|
48 |
+
def write_txt(transcript: Iterator[dict], file: TextIO):
|
49 |
+
for segment in transcript:
|
50 |
+
print(segment['text'].strip(), file=file, flush=True)
|
51 |
+
|
52 |
+
|
53 |
+
def write_vtt(transcript: Iterator[dict], file: TextIO, maxLineWidth=None):
|
54 |
+
print("WEBVTT\n", file=file)
|
55 |
+
for segment in transcript:
|
56 |
+
text = processText(segment['text'], maxLineWidth).replace('-->', '->')
|
57 |
+
|
58 |
+
print(
|
59 |
+
f"{format_timestamp(segment['start'])} --> {format_timestamp(segment['end'])}\n"
|
60 |
+
f"{text}\n",
|
61 |
+
file=file,
|
62 |
+
flush=True,
|
63 |
+
)
|
64 |
+
|
65 |
+
|
66 |
+
def write_srt(transcript: Iterator[dict], file: TextIO, maxLineWidth=None):
|
67 |
+
"""
|
68 |
+
Write a transcript to a file in SRT format.
|
69 |
+
Example usage:
|
70 |
+
from pathlib import Path
|
71 |
+
from whisper.utils import write_srt
|
72 |
+
result = transcribe(model, audio_path, temperature=temperature, **args)
|
73 |
+
# save SRT
|
74 |
+
audio_basename = Path(audio_path).stem
|
75 |
+
with open(Path(output_dir) / (audio_basename + ".srt"), "w", encoding="utf-8") as srt:
|
76 |
+
write_srt(result["segments"], file=srt)
|
77 |
+
"""
|
78 |
+
for i, segment in enumerate(transcript, start=1):
|
79 |
+
text = processText(segment['text'].strip(), maxLineWidth).replace('-->', '->')
|
80 |
+
|
81 |
+
# write srt lines
|
82 |
+
print(
|
83 |
+
f"{i}\n"
|
84 |
+
f"{format_timestamp(segment['start'], always_include_hours=True, fractionalSeperator=',')} --> "
|
85 |
+
f"{format_timestamp(segment['end'], always_include_hours=True, fractionalSeperator=',')}\n"
|
86 |
+
f"{text}\n",
|
87 |
+
file=file,
|
88 |
+
flush=True,
|
89 |
+
)
|
90 |
+
|
91 |
+
def processText(text: str, maxLineWidth=None):
|
92 |
+
if (maxLineWidth is None or maxLineWidth < 0):
|
93 |
+
return text
|
94 |
+
|
95 |
+
lines = textwrap.wrap(text, width=maxLineWidth, tabsize=4)
|
96 |
+
return '\n'.join(lines)
|