Ayushnangia commited on
Commit
5167b0f
1 Parent(s): 29685bd

updating with summarizer

Browse files
Files changed (2) hide show
  1. app.py +35 -6
  2. requirements.txt +3 -1
app.py CHANGED
@@ -1,6 +1,9 @@
1
  import gradio as gr
2
  import yt_dlp as ydlp
 
 
3
  from whispercpp import Whisper
 
4
 
5
  def download_audio(youtube_url, output_folder='.'):
6
  ydl_opts = {
@@ -32,21 +35,47 @@ def process_general_transcription(transcription):
32
  transcript_str = "\n".join(formatted_transcription)
33
 
34
  return transcript_str
35
- def transcribe_youtube(youtube_url):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  download_audio(youtube_url)
37
  result = w.transcribe("audio.wav")
38
  text = w.extract_text(result)
39
- return process_general_transcription(text)
 
 
 
40
  with gr.Blocks() as demo:
41
  gr.Markdown(
42
  """
43
  # CPP Whisperer - Transcribe YouTube Videos
44
 
45
  """)
46
- inp = gr.Textbox(placeholder="Insert YT Url here")
47
- result_button_transcribe = gr.Button('Transcribe')
48
- out = gr.Textbox()
49
- result_button_transcribe.click(transcribe_youtube, inputs = inp, outputs = out)
 
 
 
 
 
 
 
 
50
 
51
 
52
  demo.launch()
 
1
  import gradio as gr
2
  import yt_dlp as ydlp
3
+ from transformers import pipeline
4
+
5
  from whispercpp import Whisper
6
+ summarizer = pipeline("summarization", model="knkarthick/MEETING_SUMMARY")
7
 
8
  def download_audio(youtube_url, output_folder='.'):
9
  ydl_opts = {
 
35
  transcript_str = "\n".join(formatted_transcription)
36
 
37
  return transcript_str
38
+ def chunk_to_tokens(text, n):
39
+ tokens = text.split()
40
+ max_chunk_size = min(len(tokens), 512)
41
+
42
+ token_size = max(1, int(max_chunk_size * (1 - n / 100)))
43
+
44
+ chunks = [" ".join(tokens[i:i + token_size]) for i in range(0, len(tokens), token_size)]
45
+
46
+ return chunks
47
+ def summarizing(text,n):
48
+ valid_tok=chunk_to_tokens(text,n)
49
+ res=""
50
+ for i in valid_tok:
51
+ res+=summarizer(i)[0]['summary_text']+'\n'
52
+ return res
53
+ def transcribe_sum_youtube(youtube_url,n):
54
  download_audio(youtube_url)
55
  result = w.transcribe("audio.wav")
56
  text = w.extract_text(result)
57
+ res=process_general_transcription(text)
58
+ return summarizing(res,n)
59
+
60
+
61
  with gr.Blocks() as demo:
62
  gr.Markdown(
63
  """
64
  # CPP Whisperer - Transcribe YouTube Videos
65
 
66
  """)
67
+ with gr.Row():
68
+ with gr.Column():
69
+
70
+ inp = gr.Textbox(label="Youtube Url",placeholder="Insert YT Url here")
71
+ inp2 = gr.Slider(label="Summarization Percentage",min_value=0,max_value=100,step_size=1)
72
+ result_button_transcribe = gr.Button('Transcribe and Summarize')
73
+
74
+ with gr.Column():
75
+ out = gr.Textbox(label="Transcribed and Summarize Text")
76
+
77
+
78
+ result_button_transcribe.click(transcribe_sum_youtube, inputs = [inp,inp2] , outputs = out)
79
 
80
 
81
  demo.launch()
requirements.txt CHANGED
@@ -1,3 +1,5 @@
1
  git+https://github.com/stlukey/whispercpp.py
2
  gradio
3
- yt_dlp
 
 
 
1
  git+https://github.com/stlukey/whispercpp.py
2
  gradio
3
+ yt_dlp
4
+ transformers
5
+ torch