IndranilB commited on
Commit
079312c
1 Parent(s): bb26788

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +137 -0
app.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import ssl
3
+ ssl._create_default_https_context = ssl._create_unverified_context
4
+ import glob
5
+ import os
6
+
7
+ def vid_to_audio(url=None):
8
+ # importing packages
9
+ from pytube import YouTube
10
+ import os
11
+
12
+ # url input from user
13
+ yt = YouTube(url)
14
+
15
+ # extract only audio
16
+ video = yt.streams.filter(only_audio=True).first()
17
+
18
+ # check for destination to save file
19
+ destination = '.'
20
+
21
+ # download the file
22
+ out_file = video.download(output_path=destination)
23
+
24
+ # save the file
25
+ base, ext = os.path.splitext(out_file)
26
+ new_file = base + '.mp3'
27
+ os.rename(out_file, new_file)
28
+
29
+ # result of success
30
+ print(yt.title + " has been successfully downloaded.")
31
+
32
+ return "OK"
33
+
34
+ #vid_to_text(url='https://youtu.be/FE5tva_o7ew?si=ztkKeO7qwcpC36AS')
35
+
36
+ def audio_to_text():
37
+ import torch
38
+
39
+
40
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
41
+
42
+
43
+
44
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
45
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
46
+
47
+ model_id = "openai/whisper-tiny"
48
+
49
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(
50
+ model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
51
+ )
52
+ model.to(device)
53
+
54
+ processor = AutoProcessor.from_pretrained(model_id)
55
+ #
56
+ pipe = pipeline(
57
+ "automatic-speech-recognition",
58
+ model=model,
59
+ tokenizer=processor.tokenizer,
60
+ feature_extractor=processor.feature_extractor,
61
+ max_new_tokens=128,
62
+ chunk_length_s=30,
63
+ batch_size=16,
64
+
65
+ torch_dtype=torch_dtype,
66
+ device=device,
67
+ )
68
+
69
+ #files = glob.glob('*.mp3')[0]
70
+ files = os.listdir()
71
+ # Get a list of all files in the current directory
72
+ files = os.listdir()
73
+ #st.write(files)
74
+
75
+ # Create an empty list to collect results
76
+ results = []
77
+
78
+ # Iterate through the files
79
+ for i in files:
80
+ if ".mp3" in i:
81
+ # Build the full path to the MP3 file
82
+ file_path = os.path.join(os.getcwd(), i)
83
+
84
+ # Display information (optional)
85
+ st.write("Current Directory:", os.getcwd())
86
+ st.write("File Path:", file_path)
87
+
88
+
89
+ result = pipe(file_path)
90
+ #print(result)
91
+ return result['text']
92
+
93
+
94
+
95
+ def summarize():
96
+ transcript = audio_to_text()
97
+ len_trans = len(transcript)
98
+
99
+ chunks = int(len_trans/512)
100
+ from transformers import pipeline
101
+
102
+ summarizer = pipeline("summarization", model="snrspeaks/t5-one-line-summary")
103
+
104
+
105
+ #print(summarizer(transcript, do_sample=False))
106
+ cutoff = 512
107
+ final_output = ''
108
+ """
109
+ for i in range(chunks):
110
+ print(i)
111
+ if i == 0:
112
+ tran_text = transcript[:512]
113
+ inter_output = summarizer(tran_text, do_sample=False)[0]['summary_text']
114
+ final_output += inter_output
115
+ final_output += ' '
116
+
117
+ else:
118
+ #end_slice = cutoff + cutoff
119
+ tran_text = transcript[cutoff:cutoff + 2]
120
+ inter_output = summarizer(tran_text, do_sample=False)[0]['summary_text']
121
+ final_output += inter_output
122
+ final_output += ' '
123
+ cutoff += cutoff
124
+ """
125
+ final_output = summarizer(tran_text, do_sample=False)
126
+ return final_output
127
+
128
+ yt_link = st.text_input("Enter the YouTube URL: ")
129
+
130
+ if st.button("Start Summarization"):
131
+
132
+ with st.status("Downloading the video..."):
133
+ vid_to_audio(url=yt_link)
134
+ with st.status("Summarizing..."):
135
+ s = summarize()
136
+ st.write(s)
137
+