Spaces:

dussen
/

Whisper_dutch

Sleeping

App Files Files Community

SevenhuijsenM commited on Dec 8, 2023

Commit

2a6ff40

1 Parent(s): 5238f45

Implementation of AI

Browse files

Files changed (2) hide show

app.py +21 -10
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -4,8 +4,11 @@ from pytube import YouTube
 import os
 import requests
 import time
 pipe = pipeline(model="dussen/whisper-small-nl-hc")
 print(pipe)
 def download_audio(url, output_path='downloads'):
     try:
@@ -15,28 +18,20 @@ def download_audio(url, output_path='downloads'):
         # Get the audio stream with the highest quality
         audio_stream = yt.streams.filter(only_audio=True, file_extension='mp4').first()
         audio_stream.download(output_path)
-        print(f"Downloaded audio to {output_path}")
         # If a video.mp4 file already exists, delete it
         if os.path.exists(f"{output_path}/video.mp4"):
             os.remove(f"{output_path}/video.mp4")
-        print("Downloading video...")
         # Change the name of the file to video.mp4
         default_filename = audio_stream.default_filename
         mp4_path = f"{output_path}/{default_filename}"
         mp3_path = f"{output_path}/video.mp3"
         os.rename(mp4_path, mp3_path)
-        print("Downloaded video")
-        print("Transcribing audio...")
-        print("Type of audio: ", type(mp3_path))
         # Use the model to transcribe the audio
         text = pipe(mp3_path)["text"]
-        print(f"Transcribed audio: {text}")
         # Delete the audio file
         os.remove(mp3_path)
@@ -66,6 +61,22 @@ def radio_to_text(radio_url):
             pass
     text = pipe("stream.mp3")["text"]
     print(text)
     return text
 iface_video_url = gr.Interface(
@@ -89,7 +100,7 @@ iface_radio = gr.Interface(
     inputs="text",
     outputs="text",
     title="Whisper Small Dutch - Use a radio URL",
-    description="Demo for dutch speech recognition using a fine-tuned Whisper small model.",
 )
 app = gr.TabbedInterface([iface_audio, iface_video_url, iface_radio], ["Audio to text", "Video to text", "Radio to text"])

 import os
 import requests
 import time
+from openai import OpenAI
+client = OpenAI(api_key="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")
 pipe = pipeline(model="dussen/whisper-small-nl-hc")
 print(pipe)
 def download_audio(url, output_path='downloads'):
     try:
         # Get the audio stream with the highest quality
         audio_stream = yt.streams.filter(only_audio=True, file_extension='mp4').first()
         audio_stream.download(output_path)
         # If a video.mp4 file already exists, delete it
         if os.path.exists(f"{output_path}/video.mp4"):
             os.remove(f"{output_path}/video.mp4")
         # Change the name of the file to video.mp4
         default_filename = audio_stream.default_filename
         mp4_path = f"{output_path}/{default_filename}"
         mp3_path = f"{output_path}/video.mp3"
         os.rename(mp4_path, mp3_path)
         # Use the model to transcribe the audio
         text = pipe(mp3_path)["text"]
         # Delete the audio file
         os.remove(mp3_path)
             pass
     text = pipe("stream.mp3")["text"]
     print(text)
+    # Use chatGPT to summarise the text using a prompt that says whether it is news, an ad or a song
+    prompt = f"Dit stuk komt uit een radio uitzending en is getranscribeerd door AI. Er kunnen fouten in zitten. Kan je eerst het categorie text geven uit `nieuws`, `muziek`, `advertentie` of rest`, en dan in max drie zinnen wat er gezegd is?{text}"
+    # Limit the text to 3000 tokens
+    prompt = prompt[:3584]
+    response = client.chat.completions.create(
+        model="gpt-3.5-turbo",
+        messages=[{"role": "user", "content": prompt}],
+        temperature=0.7,
+        max_tokens=512,
+        top_p=1
+        )
+    text = f"Tekst van de AI die is getranscribeerd: {text}\n\n---\n\nSamenvatting door AI:\n\n{response}"
     return text
 iface_video_url = gr.Interface(
     inputs="text",
     outputs="text",
     title="Whisper Small Dutch - Use a radio URL",
+    description="Demo for dutch speech recognition using a fine-tuned Whisper small model. It gets information on what is playing on the given radio URL. It transcribes it and then summarises it using chatGPT.",
 )
 app = gr.TabbedInterface([iface_audio, iface_video_url, iface_radio], ["Audio to text", "Video to text", "Radio to text"])

requirements.txt CHANGED Viewed

@@ -2,4 +2,5 @@ torch
 torchvision
 torchaudio
 transformers
-pytube

 torchvision
 torchaudio
 transformers
+pytube
+openai