juan-op commited on
Commit
29eb72e
·
1 Parent(s): 9bd8643

App initializer

Browse files
Files changed (2) hide show
  1. app.py +64 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from tempfile import TemporaryDirectory
2
+
3
+ from pytube import YouTube
4
+ import whisper
5
+ from transformers import pipeline
6
+ import gradio as gr
7
+
8
+
9
+ def get_title(url: str) -> str:
10
+ """Returns the title of the YouTube video at the given URL."""
11
+ yt = YouTube(url)
12
+ return f"**{yt.title}**"
13
+
14
+
15
+ def download_audio(url: str, path: str) -> None:
16
+ """Downloads the audio from the YouTube video at the given URL and saves it to the specified path."""
17
+ yt = YouTube(url)
18
+ audio = yt.streams.filter(only_audio=True).first()
19
+ audio.download(output_path=path, filename="a.mp4")
20
+
21
+
22
+ def transcribe(path: str) -> list[str]:
23
+ """Transcribes the audio file at the given path and returns the text."""
24
+ model = whisper.load_model("base")
25
+ transcription = model.transcribe(path)["text"]
26
+ transcription_chunks = [transcription[i : i + 1000] for i in range(0, len(transcription), 1000)]
27
+ return transcription_chunks
28
+
29
+
30
+ def summarize(transcription: list[str]) -> str:
31
+ """Summarizes the given text and returns the summary."""
32
+ model = pipeline("summarization")
33
+ summary_chunks = model(transcription, max_length=80, min_length=30)
34
+ summary = (" ".join([chunks["summary_text"] for chunks in summary_chunks]).strip().replace(" . ", ". "))
35
+ return summary
36
+
37
+
38
+ def execute_pipeline(url: str) -> str:
39
+ """Generates a temporary directory and executes the pipeline to download, transcribe and summarize the video."""
40
+ with TemporaryDirectory(dir=".") as tmp_dir:
41
+ download_audio(url, tmp_dir)
42
+ result = transcribe(f"{tmp_dir}/a.mp4")
43
+ text = summarize(result)
44
+ print("Done!")
45
+ return text
46
+
47
+
48
+ def main() -> None:
49
+ """Generates the Gradio interface."""
50
+ with gr.Blocks(analytics_enabled=True, title="Resume un video") as page:
51
+ gr.HTML('<h2 style="text-align:center"><span style="font-size:36px">Resume un video de <strong>Youtube</strong></span></h2>')
52
+ url = gr.Textbox(label="Introduce el link del video:")
53
+ title = gr.Markdown()
54
+ output = gr.Textbox(label="Resumen")
55
+ summarize_btn = gr.Button("Dale").style(full_width=False)
56
+ summarize_btn.click(fn=execute_pipeline, inputs=url, outputs=output)
57
+ summarize_btn.click(fn=get_title, inputs=url, outputs=title)
58
+ gr.Markdown("*Funciona mejor con vídeos en inglés y de menos de 10 minutos.*")
59
+
60
+ page.launch()
61
+
62
+
63
+ if __name__ == "__main__":
64
+ main()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio==3.16.0
2
+ pytube==12.1.2
3
+ transformers==4.25.1
4
+ whisper @ git+https://github.com/openai/whisper.git@28769fcfe50755a817ab922a7bc83483159600a9
5
+