SevenhuijsenM commited on
Commit
2a6ff40
·
1 Parent(s): 5238f45

Implementation of AI

Browse files
Files changed (2) hide show
  1. app.py +21 -10
  2. requirements.txt +2 -1
app.py CHANGED
@@ -4,8 +4,11 @@ from pytube import YouTube
4
  import os
5
  import requests
6
  import time
 
 
7
 
8
  pipe = pipeline(model="dussen/whisper-small-nl-hc")
 
9
  print(pipe)
10
  def download_audio(url, output_path='downloads'):
11
  try:
@@ -15,28 +18,20 @@ def download_audio(url, output_path='downloads'):
15
  # Get the audio stream with the highest quality
16
  audio_stream = yt.streams.filter(only_audio=True, file_extension='mp4').first()
17
  audio_stream.download(output_path)
18
- print(f"Downloaded audio to {output_path}")
19
 
20
  # If a video.mp4 file already exists, delete it
21
  if os.path.exists(f"{output_path}/video.mp4"):
22
  os.remove(f"{output_path}/video.mp4")
23
 
24
- print("Downloading video...")
25
-
26
  # Change the name of the file to video.mp4
27
  default_filename = audio_stream.default_filename
28
  mp4_path = f"{output_path}/{default_filename}"
29
  mp3_path = f"{output_path}/video.mp3"
30
  os.rename(mp4_path, mp3_path)
31
 
32
- print("Downloaded video")
33
-
34
-
35
- print("Transcribing audio...")
36
- print("Type of audio: ", type(mp3_path))
37
  # Use the model to transcribe the audio
38
  text = pipe(mp3_path)["text"]
39
- print(f"Transcribed audio: {text}")
40
  # Delete the audio file
41
  os.remove(mp3_path)
42
 
@@ -66,6 +61,22 @@ def radio_to_text(radio_url):
66
  pass
67
  text = pipe("stream.mp3")["text"]
68
  print(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  return text
70
 
71
  iface_video_url = gr.Interface(
@@ -89,7 +100,7 @@ iface_radio = gr.Interface(
89
  inputs="text",
90
  outputs="text",
91
  title="Whisper Small Dutch - Use a radio URL",
92
- description="Demo for dutch speech recognition using a fine-tuned Whisper small model.",
93
  )
94
 
95
  app = gr.TabbedInterface([iface_audio, iface_video_url, iface_radio], ["Audio to text", "Video to text", "Radio to text"])
 
4
  import os
5
  import requests
6
  import time
7
+ from openai import OpenAI
8
+ client = OpenAI(api_key="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")
9
 
10
  pipe = pipeline(model="dussen/whisper-small-nl-hc")
11
+
12
  print(pipe)
13
  def download_audio(url, output_path='downloads'):
14
  try:
 
18
  # Get the audio stream with the highest quality
19
  audio_stream = yt.streams.filter(only_audio=True, file_extension='mp4').first()
20
  audio_stream.download(output_path)
 
21
 
22
  # If a video.mp4 file already exists, delete it
23
  if os.path.exists(f"{output_path}/video.mp4"):
24
  os.remove(f"{output_path}/video.mp4")
25
 
 
 
26
  # Change the name of the file to video.mp4
27
  default_filename = audio_stream.default_filename
28
  mp4_path = f"{output_path}/{default_filename}"
29
  mp3_path = f"{output_path}/video.mp3"
30
  os.rename(mp4_path, mp3_path)
31
 
 
 
 
 
 
32
  # Use the model to transcribe the audio
33
  text = pipe(mp3_path)["text"]
34
+
35
  # Delete the audio file
36
  os.remove(mp3_path)
37
 
 
61
  pass
62
  text = pipe("stream.mp3")["text"]
63
  print(text)
64
+
65
+ # Use chatGPT to summarise the text using a prompt that says whether it is news, an ad or a song
66
+ prompt = f"Dit stuk komt uit een radio uitzending en is getranscribeerd door AI. Er kunnen fouten in zitten. Kan je eerst het categorie text geven uit `nieuws`, `muziek`, `advertentie` of rest`, en dan in max drie zinnen wat er gezegd is?{text}"
67
+
68
+ # Limit the text to 3000 tokens
69
+ prompt = prompt[:3584]
70
+
71
+ response = client.chat.completions.create(
72
+ model="gpt-3.5-turbo",
73
+ messages=[{"role": "user", "content": prompt}],
74
+ temperature=0.7,
75
+ max_tokens=512,
76
+ top_p=1
77
+ )
78
+ text = f"Tekst van de AI die is getranscribeerd: {text}\n\n---\n\nSamenvatting door AI:\n\n{response}"
79
+
80
  return text
81
 
82
  iface_video_url = gr.Interface(
 
100
  inputs="text",
101
  outputs="text",
102
  title="Whisper Small Dutch - Use a radio URL",
103
+ description="Demo for dutch speech recognition using a fine-tuned Whisper small model. It gets information on what is playing on the given radio URL. It transcribes it and then summarises it using chatGPT.",
104
  )
105
 
106
  app = gr.TabbedInterface([iface_audio, iface_video_url, iface_radio], ["Audio to text", "Video to text", "Radio to text"])
requirements.txt CHANGED
@@ -2,4 +2,5 @@ torch
2
  torchvision
3
  torchaudio
4
  transformers
5
- pytube
 
 
2
  torchvision
3
  torchaudio
4
  transformers
5
+ pytube
6
+ openai