Spaces:
Running
Running
Update metric, fix youtube.py
Browse files- extract.py +0 -1
- main.py +0 -1
- static/404.html +15 -8
- static/analytics.html +15 -8
- static/emotion_detection.html +15 -8
- static/grammar_checker.html +15 -8
- static/index.html +140 -8
- static/text_summarizer.html +15 -8
- youtube.py +4 -4
extract.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
import os
|
2 |
import nltk
|
3 |
nltk.data.path.append(os.path.join(os.getcwd(), 'nltk-data'))
|
4 |
-
print(nltk.data.path)
|
5 |
|
6 |
from nltk.cluster.util import cosine_distance
|
7 |
from nltk.tokenize import sent_tokenize, word_tokenize
|
|
|
1 |
import os
|
2 |
import nltk
|
3 |
nltk.data.path.append(os.path.join(os.getcwd(), 'nltk-data'))
|
|
|
4 |
|
5 |
from nltk.cluster.util import cosine_distance
|
6 |
from nltk.tokenize import sent_tokenize, word_tokenize
|
main.py
CHANGED
@@ -8,7 +8,6 @@ from fastapi.templating import Jinja2Templates
|
|
8 |
from youtube import get_youtube_caption
|
9 |
from inference import predict_emotions, predict_summarization, predict_acceptance
|
10 |
|
11 |
-
MAX_ITER_SIZE = 3000
|
12 |
|
13 |
app = FastAPI(docs_url=None, redoc_url=None)
|
14 |
app.mount("/files", StaticFiles(directory="files"), name="files")
|
|
|
8 |
from youtube import get_youtube_caption
|
9 |
from inference import predict_emotions, predict_summarization, predict_acceptance
|
10 |
|
|
|
11 |
|
12 |
app = FastAPI(docs_url=None, redoc_url=None)
|
13 |
app.mount("/files", StaticFiles(directory="files"), name="files")
|
static/404.html
CHANGED
@@ -12,15 +12,22 @@
|
|
12 |
</head>
|
13 |
|
14 |
<body class="overflow-hidden">
|
15 |
-
<!--
|
16 |
-
<script
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
23 |
</script>
|
|
|
|
|
24 |
|
25 |
<div x-data="{ sidebarOpen: false }" class="relative flex h-screen text-gray-800 bg-white font-roboto">
|
26 |
<div x-cloak :class="sidebarOpen ? 'block' : 'hidden'" @click="sidebarOpen = false"
|
|
|
12 |
</head>
|
13 |
|
14 |
<body class="overflow-hidden">
|
15 |
+
<!-- Yandex.Metrika counter -->
|
16 |
+
<script type="text/javascript" >
|
17 |
+
(function(m,e,t,r,i,k,a){m[i]=m[i]||function(){(m[i].a=m[i].a||[]).push(arguments)};
|
18 |
+
m[i].l=1*new Date();
|
19 |
+
for (var j = 0; j < document.scripts.length; j++) {if (document.scripts[j].src === r) { return; }}
|
20 |
+
k=e.createElement(t),a=e.getElementsByTagName(t)[0],k.async=1,k.src=r,a.parentNode.insertBefore(k,a)})
|
21 |
+
(window, document, "script", "https://mc.yandex.ru/metrika/tag.js", "ym");
|
22 |
+
|
23 |
+
ym(95744490, "init", {
|
24 |
+
clickmap:true,
|
25 |
+
trackLinks:true,
|
26 |
+
accurateTrackBounce:true
|
27 |
+
});
|
28 |
</script>
|
29 |
+
<noscript><div><img src="https://mc.yandex.ru/watch/95744490" style="position:absolute; left:-9999px;" alt="" /></div></noscript>
|
30 |
+
<!-- /Yandex.Metrika counter -->
|
31 |
|
32 |
<div x-data="{ sidebarOpen: false }" class="relative flex h-screen text-gray-800 bg-white font-roboto">
|
33 |
<div x-cloak :class="sidebarOpen ? 'block' : 'hidden'" @click="sidebarOpen = false"
|
static/analytics.html
CHANGED
@@ -15,15 +15,22 @@
|
|
15 |
</head>
|
16 |
|
17 |
<body class="overflow-hidden">
|
18 |
-
<!--
|
19 |
-
<script
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
26 |
</script>
|
|
|
|
|
27 |
|
28 |
<div x-data="{ sidebarOpen: false }" class="relative flex h-screen text-gray-800 bg-white font-roboto">
|
29 |
<div x-cloak :class="sidebarOpen ? 'block' : 'hidden'" @click="sidebarOpen = false"
|
|
|
15 |
</head>
|
16 |
|
17 |
<body class="overflow-hidden">
|
18 |
+
<!-- Yandex.Metrika counter -->
|
19 |
+
<script type="text/javascript" >
|
20 |
+
(function(m,e,t,r,i,k,a){m[i]=m[i]||function(){(m[i].a=m[i].a||[]).push(arguments)};
|
21 |
+
m[i].l=1*new Date();
|
22 |
+
for (var j = 0; j < document.scripts.length; j++) {if (document.scripts[j].src === r) { return; }}
|
23 |
+
k=e.createElement(t),a=e.getElementsByTagName(t)[0],k.async=1,k.src=r,a.parentNode.insertBefore(k,a)})
|
24 |
+
(window, document, "script", "https://mc.yandex.ru/metrika/tag.js", "ym");
|
25 |
+
|
26 |
+
ym(95744490, "init", {
|
27 |
+
clickmap:true,
|
28 |
+
trackLinks:true,
|
29 |
+
accurateTrackBounce:true
|
30 |
+
});
|
31 |
</script>
|
32 |
+
<noscript><div><img src="https://mc.yandex.ru/watch/95744490" style="position:absolute; left:-9999px;" alt="" /></div></noscript>
|
33 |
+
<!-- /Yandex.Metrika counter -->
|
34 |
|
35 |
<div x-data="{ sidebarOpen: false }" class="relative flex h-screen text-gray-800 bg-white font-roboto">
|
36 |
<div x-cloak :class="sidebarOpen ? 'block' : 'hidden'" @click="sidebarOpen = false"
|
static/emotion_detection.html
CHANGED
@@ -12,15 +12,22 @@
|
|
12 |
</head>
|
13 |
|
14 |
<body class="overflow-hidden">
|
15 |
-
<!--
|
16 |
-
<script
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
23 |
</script>
|
|
|
|
|
24 |
|
25 |
<div x-data="{ sidebarOpen: false }" class="relative flex h-screen text-gray-800 bg-white font-roboto">
|
26 |
<div x-cloak :class="sidebarOpen ? 'block' : 'hidden'" @click="sidebarOpen = false"
|
|
|
12 |
</head>
|
13 |
|
14 |
<body class="overflow-hidden">
|
15 |
+
<!-- Yandex.Metrika counter -->
|
16 |
+
<script type="text/javascript" >
|
17 |
+
(function(m,e,t,r,i,k,a){m[i]=m[i]||function(){(m[i].a=m[i].a||[]).push(arguments)};
|
18 |
+
m[i].l=1*new Date();
|
19 |
+
for (var j = 0; j < document.scripts.length; j++) {if (document.scripts[j].src === r) { return; }}
|
20 |
+
k=e.createElement(t),a=e.getElementsByTagName(t)[0],k.async=1,k.src=r,a.parentNode.insertBefore(k,a)})
|
21 |
+
(window, document, "script", "https://mc.yandex.ru/metrika/tag.js", "ym");
|
22 |
+
|
23 |
+
ym(95744490, "init", {
|
24 |
+
clickmap:true,
|
25 |
+
trackLinks:true,
|
26 |
+
accurateTrackBounce:true
|
27 |
+
});
|
28 |
</script>
|
29 |
+
<noscript><div><img src="https://mc.yandex.ru/watch/95744490" style="position:absolute; left:-9999px;" alt="" /></div></noscript>
|
30 |
+
<!-- /Yandex.Metrika counter -->
|
31 |
|
32 |
<div x-data="{ sidebarOpen: false }" class="relative flex h-screen text-gray-800 bg-white font-roboto">
|
33 |
<div x-cloak :class="sidebarOpen ? 'block' : 'hidden'" @click="sidebarOpen = false"
|
static/grammar_checker.html
CHANGED
@@ -12,15 +12,22 @@
|
|
12 |
</head>
|
13 |
|
14 |
<body class="overflow-hidden">
|
15 |
-
<!--
|
16 |
-
<script
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
23 |
</script>
|
|
|
|
|
24 |
|
25 |
<div x-data="{ sidebarOpen: false }" class="relative flex h-screen text-gray-800 bg-white font-roboto">
|
26 |
<div x-cloak :class="sidebarOpen ? 'block' : 'hidden'" @click="sidebarOpen = false"
|
|
|
12 |
</head>
|
13 |
|
14 |
<body class="overflow-hidden">
|
15 |
+
<!-- Yandex.Metrika counter -->
|
16 |
+
<script type="text/javascript" >
|
17 |
+
(function(m,e,t,r,i,k,a){m[i]=m[i]||function(){(m[i].a=m[i].a||[]).push(arguments)};
|
18 |
+
m[i].l=1*new Date();
|
19 |
+
for (var j = 0; j < document.scripts.length; j++) {if (document.scripts[j].src === r) { return; }}
|
20 |
+
k=e.createElement(t),a=e.getElementsByTagName(t)[0],k.async=1,k.src=r,a.parentNode.insertBefore(k,a)})
|
21 |
+
(window, document, "script", "https://mc.yandex.ru/metrika/tag.js", "ym");
|
22 |
+
|
23 |
+
ym(95744490, "init", {
|
24 |
+
clickmap:true,
|
25 |
+
trackLinks:true,
|
26 |
+
accurateTrackBounce:true
|
27 |
+
});
|
28 |
</script>
|
29 |
+
<noscript><div><img src="https://mc.yandex.ru/watch/95744490" style="position:absolute; left:-9999px;" alt="" /></div></noscript>
|
30 |
+
<!-- /Yandex.Metrika counter -->
|
31 |
|
32 |
<div x-data="{ sidebarOpen: false }" class="relative flex h-screen text-gray-800 bg-white font-roboto">
|
33 |
<div x-cloak :class="sidebarOpen ? 'block' : 'hidden'" @click="sidebarOpen = false"
|
static/index.html
CHANGED
@@ -12,15 +12,22 @@
|
|
12 |
</head>
|
13 |
|
14 |
<body class="overflow-hidden">
|
15 |
-
<!--
|
16 |
-
<script
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
23 |
</script>
|
|
|
|
|
24 |
|
25 |
<div x-data="{ sidebarOpen: false }" class="relative flex h-screen text-gray-800 bg-white font-roboto">
|
26 |
<div x-cloak :class="sidebarOpen ? 'block' : 'hidden'" @click="sidebarOpen = false"
|
@@ -335,6 +342,131 @@
|
|
335 |
</p>
|
336 |
</div>
|
337 |
</section>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
338 |
</div>
|
339 |
</div>
|
340 |
</main>
|
|
|
12 |
</head>
|
13 |
|
14 |
<body class="overflow-hidden">
|
15 |
+
<!-- Yandex.Metrika counter -->
|
16 |
+
<script type="text/javascript" >
|
17 |
+
(function(m,e,t,r,i,k,a){m[i]=m[i]||function(){(m[i].a=m[i].a||[]).push(arguments)};
|
18 |
+
m[i].l=1*new Date();
|
19 |
+
for (var j = 0; j < document.scripts.length; j++) {if (document.scripts[j].src === r) { return; }}
|
20 |
+
k=e.createElement(t),a=e.getElementsByTagName(t)[0],k.async=1,k.src=r,a.parentNode.insertBefore(k,a)})
|
21 |
+
(window, document, "script", "https://mc.yandex.ru/metrika/tag.js", "ym");
|
22 |
+
|
23 |
+
ym(95744490, "init", {
|
24 |
+
clickmap:true,
|
25 |
+
trackLinks:true,
|
26 |
+
accurateTrackBounce:true
|
27 |
+
});
|
28 |
</script>
|
29 |
+
<noscript><div><img src="https://mc.yandex.ru/watch/95744490" style="position:absolute; left:-9999px;" alt="" /></div></noscript>
|
30 |
+
<!-- /Yandex.Metrika counter -->
|
31 |
|
32 |
<div x-data="{ sidebarOpen: false }" class="relative flex h-screen text-gray-800 bg-white font-roboto">
|
33 |
<div x-cloak :class="sidebarOpen ? 'block' : 'hidden'" @click="sidebarOpen = false"
|
|
|
342 |
</p>
|
343 |
</div>
|
344 |
</section>
|
345 |
+
|
346 |
+
<!-- <section class="dark-block p-4 mt-8 bg-white rounded-lg shadow-sm xl:p-8">
|
347 |
+
<h2 class="text-lg font-medium text-gray-700 capitalize sm:text-xl md:text-2xl">Projects</h2>
|
348 |
+
|
349 |
+
<p class="flex items-center mt-2 text-gray-500">
|
350 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="no-invert w-5 h-5 text-green-500" viewBox="0 0 20 20" fill="currentColor">
|
351 |
+
<path fill-rule="evenodd" d="M6.267 3.455a3.066 3.066 0 001.745-.723 3.066 3.066 0 013.976 0 3.066 3.066 0 001.745.723 3.066 3.066 0 012.812 2.812c.051.643.304 1.254.723 1.745a3.066 3.066 0 010 3.976 3.066 3.066 0 00-.723 1.745 3.066 3.066 0 01-2.812 2.812 3.066 3.066 0 00-1.745.723 3.066 3.066 0 01-3.976 0 3.066 3.066 0 00-1.745-.723 3.066 3.066 0 01-2.812-2.812 3.066 3.066 0 00-.723-1.745 3.066 3.066 0 010-3.976 3.066 3.066 0 00.723-1.745 3.066 3.066 0 012.812-2.812zm7.44 5.252a1 1 0 00-1.414-1.414L9 10.586 7.707 9.293a1 1 0 00-1.414 1.414l2 2a1 1 0 001.414 0l4-4z" clip-rule="evenodd"></path>
|
352 |
+
</svg>
|
353 |
+
|
354 |
+
<span class="mx-2">30 done this month</span>
|
355 |
+
</p>
|
356 |
+
|
357 |
+
<div class="dark-block flex flex-col mt-8">
|
358 |
+
<div class="overflow-x-auto">
|
359 |
+
<div class="inline-block min-w-full align-middle">
|
360 |
+
<div class="overflow-hidden">
|
361 |
+
<table class="min-w-full divide-y divide-gray-200">
|
362 |
+
<thead>
|
363 |
+
<tr>
|
364 |
+
<th scope="col" class="px-6 py-3 text-sm font-medium tracking-wider text-left text-gray-500 uppercase rtl:text-right">
|
365 |
+
Project Name
|
366 |
+
</th>
|
367 |
+
<th scope="col" class="px-6 py-3 text-sm font-medium tracking-wider text-left text-gray-500 uppercase rtl:text-right">
|
368 |
+
Project Stack
|
369 |
+
</th>
|
370 |
+
<th scope="col" class="px-6 py-3 text-sm font-medium tracking-wider text-left text-gray-500 uppercase rtl:text-right">
|
371 |
+
Date
|
372 |
+
</th>
|
373 |
+
<th scope="col" class="px-6 py-3 text-sm font-medium tracking-wider text-left text-gray-500 uppercase rtl:text-right">
|
374 |
+
COMPLETION
|
375 |
+
</th>
|
376 |
+
</tr>
|
377 |
+
</thead>
|
378 |
+
|
379 |
+
<tbody class="bg-white divide-y divide-gray-200">
|
380 |
+
<tr>
|
381 |
+
<td class="px-6 py-4 font-medium text-gray-800 whitespace-nowrap">
|
382 |
+
Text Summarizer
|
383 |
+
</td>
|
384 |
+
|
385 |
+
<td class="px-6 py-4 font-medium text-gray-700 whitespace-nowrap">
|
386 |
+
NLTK, Numpy, Pandas
|
387 |
+
</td>
|
388 |
+
|
389 |
+
<td class="px-6 py-4 font-medium text-gray-700 whitespace-nowrap">
|
390 |
+
29.09.23 - 06.10.23
|
391 |
+
</td>
|
392 |
+
|
393 |
+
<td class="no-invert px-6 py-4 text-gray-700 capitalize whitespace-nowrap">
|
394 |
+
<span class="font-medium text-indigo-500">66%</span>
|
395 |
+
<div class="w-full h-2 overflow-hidden bg-gray-200 rounded-full">
|
396 |
+
<div class="w-8/12 h-full bg-indigo-500"></div>
|
397 |
+
</div>
|
398 |
+
</td>
|
399 |
+
</tr>
|
400 |
+
|
401 |
+
<tr>
|
402 |
+
<td class="px-6 py-4 font-medium text-gray-800 whitespace-nowrap">
|
403 |
+
Emotion Detection
|
404 |
+
</td>
|
405 |
+
|
406 |
+
<td class="px-6 py-4 font-medium text-gray-700 whitespace-nowrap">
|
407 |
+
Transformers, Torch
|
408 |
+
</td>
|
409 |
+
|
410 |
+
<td class="px-6 py-4 font-medium text-gray-700 whitespace-nowrap">
|
411 |
+
29.09.23 - 03.10.23
|
412 |
+
</td>
|
413 |
+
|
414 |
+
<td class="no-invert px-6 py-4 text-gray-700 capitalize whitespace-nowrap">
|
415 |
+
<span class="font-medium text-indigo-500">25%</span>
|
416 |
+
<div class="w-full h-2 overflow-hidden bg-gray-200 rounded-full">
|
417 |
+
<div class="w-3/12 h-full bg-indigo-500 "></div>
|
418 |
+
</div>
|
419 |
+
</td>
|
420 |
+
</tr>
|
421 |
+
|
422 |
+
<tr>
|
423 |
+
<td class="px-6 py-4 font-medium text-gray-800 whitespace-nowrap">
|
424 |
+
Analytics
|
425 |
+
</td>
|
426 |
+
|
427 |
+
<td class="px-6 py-4 font-medium text-gray-700 whitespace-nowrap">
|
428 |
+
JS, Numpy, Pandas
|
429 |
+
</td>
|
430 |
+
|
431 |
+
<td class="px-6 py-4 font-medium text-gray-700 whitespace-nowrap">
|
432 |
+
29.09.23 - 03.10.23
|
433 |
+
</td>
|
434 |
+
|
435 |
+
<td class="no-invert px-6 py-4 text-gray-700 capitalize whitespace-nowrap">
|
436 |
+
<span class="font-medium text-indigo-500">100%</span>
|
437 |
+
<div class="w-full h-2 overflow-hidden bg-gray-200 rounded-full">
|
438 |
+
<div class="w-full h-full bg-indigo-500"></div>
|
439 |
+
</div>
|
440 |
+
</td>
|
441 |
+
</tr>
|
442 |
+
|
443 |
+
<tr>
|
444 |
+
<td class="px-6 py-4 font-medium text-gray-800 whitespace-nowrap">
|
445 |
+
Grammar Checker
|
446 |
+
</td>
|
447 |
+
|
448 |
+
<td class="px-6 py-4 font-medium text-gray-700 whitespace-nowrap">
|
449 |
+
Transformers, Torch
|
450 |
+
</td>
|
451 |
+
|
452 |
+
<td class="px-6 py-4 font-medium text-gray-700 whitespace-nowrap">
|
453 |
+
29.09.23 - 06.10.23
|
454 |
+
</td>
|
455 |
+
|
456 |
+
<td class="no-invert px-6 py-4 text-gray-700 capitalize whitespace-nowrap">
|
457 |
+
<span class="font-medium text-indigo-500">75%</span>
|
458 |
+
<div class="w-full h-2 overflow-hidden bg-gray-200 rounded-full">
|
459 |
+
<div class="w-9/12 h-full bg-indigo-500 "></div>
|
460 |
+
</div>
|
461 |
+
</td>
|
462 |
+
</tr>
|
463 |
+
</tbody>
|
464 |
+
</table>
|
465 |
+
</div>
|
466 |
+
</div>
|
467 |
+
</div>
|
468 |
+
</div>
|
469 |
+
</section> -->
|
470 |
</div>
|
471 |
</div>
|
472 |
</main>
|
static/text_summarizer.html
CHANGED
@@ -12,15 +12,22 @@
|
|
12 |
</head>
|
13 |
|
14 |
<body class="overflow-hidden">
|
15 |
-
<!--
|
16 |
-
<script
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
23 |
</script>
|
|
|
|
|
24 |
|
25 |
<div x-data="{ sidebarOpen: false }" class="relative flex h-screen text-gray-800 bg-white font-roboto">
|
26 |
<div x-cloak :class="sidebarOpen ? 'block' : 'hidden'" @click="sidebarOpen = false"
|
|
|
12 |
</head>
|
13 |
|
14 |
<body class="overflow-hidden">
|
15 |
+
<!-- Yandex.Metrika counter -->
|
16 |
+
<script type="text/javascript" >
|
17 |
+
(function(m,e,t,r,i,k,a){m[i]=m[i]||function(){(m[i].a=m[i].a||[]).push(arguments)};
|
18 |
+
m[i].l=1*new Date();
|
19 |
+
for (var j = 0; j < document.scripts.length; j++) {if (document.scripts[j].src === r) { return; }}
|
20 |
+
k=e.createElement(t),a=e.getElementsByTagName(t)[0],k.async=1,k.src=r,a.parentNode.insertBefore(k,a)})
|
21 |
+
(window, document, "script", "https://mc.yandex.ru/metrika/tag.js", "ym");
|
22 |
+
|
23 |
+
ym(95744490, "init", {
|
24 |
+
clickmap:true,
|
25 |
+
trackLinks:true,
|
26 |
+
accurateTrackBounce:true
|
27 |
+
});
|
28 |
</script>
|
29 |
+
<noscript><div><img src="https://mc.yandex.ru/watch/95744490" style="position:absolute; left:-9999px;" alt="" /></div></noscript>
|
30 |
+
<!-- /Yandex.Metrika counter -->
|
31 |
|
32 |
<div x-data="{ sidebarOpen: false }" class="relative flex h-screen text-gray-800 bg-white font-roboto">
|
33 |
<div x-cloak :class="sidebarOpen ? 'block' : 'hidden'" @click="sidebarOpen = false"
|
youtube.py
CHANGED
@@ -2,7 +2,7 @@ import re
|
|
2 |
from youtube_transcript_api import YouTubeTranscriptApi
|
3 |
from youtube_transcript_api._errors import TranscriptsDisabled
|
4 |
|
5 |
-
MAX_SIZE =
|
6 |
YT_REGEX = r'^((http)s?:\/\/)?((www\.)|(m\.))?youtube.com\/watch\?([^\?]*&)?v=.+$' # noqa
|
7 |
YT_REGEX_SHORT = r'^((http)s?:\/\/)?youtu.be\/([^\?=]+)(\?[^?]+)?$'
|
8 |
|
@@ -30,11 +30,11 @@ def get_youtube_caption(url: str) -> str:
|
|
30 |
|
31 |
res, size = [], 0
|
32 |
for transcript in YouTubeTranscriptApi.get_transcript(video_id):
|
33 |
-
res.append(transcript['text'])
|
34 |
size += len(transcript['text'])
|
35 |
if size >= MAX_SIZE:
|
36 |
-
return '
|
37 |
-
return '
|
38 |
except TranscriptsDisabled:
|
39 |
return 'no-cap'
|
40 |
except Exception:
|
|
|
2 |
from youtube_transcript_api import YouTubeTranscriptApi
|
3 |
from youtube_transcript_api._errors import TranscriptsDisabled
|
4 |
|
5 |
+
MAX_SIZE = 50_000
|
6 |
YT_REGEX = r'^((http)s?:\/\/)?((www\.)|(m\.))?youtube.com\/watch\?([^\?]*&)?v=.+$' # noqa
|
7 |
YT_REGEX_SHORT = r'^((http)s?:\/\/)?youtu.be\/([^\?=]+)(\?[^?]+)?$'
|
8 |
|
|
|
30 |
|
31 |
res, size = [], 0
|
32 |
for transcript in YouTubeTranscriptApi.get_transcript(video_id):
|
33 |
+
res.append(transcript['text'].replace('\n', ' '))
|
34 |
size += len(transcript['text'])
|
35 |
if size >= MAX_SIZE:
|
36 |
+
return ' '.join(res)[:MAX_SIZE]
|
37 |
+
return ' '.join(res)[:MAX_SIZE]
|
38 |
except TranscriptsDisabled:
|
39 |
return 'no-cap'
|
40 |
except Exception:
|