Spaces:

oceansweep
/

tldw

Running

App Files Files Community

oceansweep commited on May 10, 2024

Commit

d6b96dc

verified ·

1 Parent(s): 62a9bae

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -31

app.py CHANGED Viewed

@@ -45,23 +45,22 @@ import yt_dlp
 #
 #
 # Usage:
-#          Transcribe a single URL:
-#                python diarize.py https://example.com/video.mp4
 #
-#          Transcribe a single URL and have the resulting transcription summarized:
-#                python diarize.py https://example.com/video.mp4
 #
-#          Transcribe a list of files:
-#               python diarize.py ./path/to/your/text_file.txt
 #
-#          Transcribe a local file:
-#               python diarize.py /path/to/your/localfile.mp4
-#
-#          Transcribe a local file and have it summarized:
-#               python diarize.py ./input.mp4 --api_name openai --api_key <your_openai_api_key>
-#
-#          Transcribe a list of files and have them all summarized:
-#               python diarize.py path_to_your_text_file.txt --api_name <openai> --api_key <your_openai_api_key>
 #
 ###
@@ -350,7 +349,7 @@ def process_local_file(file_path):
 # Video Download/Handling
 #
-def process_url(input_path, num_speakers=2, whisper_model="small.en", offset=0, api_name=None, api_key=None, vad_filter=False, download_video_flag=False, demo_mode=True):
     if demo_mode:
         api_name = "huggingface"
         api_key = os.environ.get(HF_TOKEN)
@@ -793,7 +792,7 @@ def summarize_with_openai(api_key, file_path, model):
         }
         logging.debug("openai: Preparing data + prompt for submittal")
-        prompt_text = f"{text} \n\n\n\nPlease provide a detailed, bulleted list of the points made throughout the transcribed video and any supporting arguments made for said points"
         data = {
             "model": model,
             "messages": [
@@ -803,7 +802,7 @@ def summarize_with_openai(api_key, file_path, model):
                 },
                 {
                     "role": "user",
-                    "content": prompt_text
                 }
             ],
             "max_tokens": 4096,  # Adjust tokens as needed
@@ -846,7 +845,7 @@ def summarize_with_claude(api_key, file_path, model):
         logging.debug("anthropic: Prepping data + prompt for submittal")
         user_message = {
             "role": "user",
-            "content": f"{text} \n\n\n\nPlease provide a detailed, bulleted list of the points made throughout the transcribed video and any supporting arguments made for said points"
         }
         data = {
@@ -913,10 +912,10 @@ def summarize_with_cohere(api_key, file_path, model):
             'Authorization': f'Bearer {api_key}'
         }
-        prompt_text = f"{text} \n\nAs a professional summarizer, create a concise and comprehensive summary of the provided text."
         data = {
             "chat_history": [
-                {"role": "USER", "message": prompt_text}
             ],
             "message": "Please provide a summary.",
             "model": model,
@@ -964,12 +963,12 @@ def summarize_with_groq(api_key, file_path, model):
             'Content-Type': 'application/json'
         }
-        prompt_text = f"{text} \n\nAs a professional summarizer, create a concise and comprehensive summary of the provided text."
         data = {
             "messages": [
                 {
                     "role": "user",
-                    "content": prompt_text
                 }
             ],
             "model": model
@@ -1021,12 +1020,13 @@ def summarize_with_llama(api_url, file_path, token):
             headers['Authorization'] = f'Bearer {token}'
-        prompt_text = f"{text} \n\nAs a professional summarizer, create a concise and comprehensive summary of the provided text."
         data = {
-            "prompt": prompt_text
         }
-        logging.debug("llama: Submitting request to API endpoint")
         print("llama: Submitting request to API endpoint")
         response = requests.post(api_url, headers=headers, json=data)
         response_data = response.json()
@@ -1064,13 +1064,13 @@ def summarize_with_kobold(api_url, file_path):
             'content-type': 'application/json',
         }
         # FIXME
-        prompt_text = f"{text} \n\nAs a professional summarizer, create a concise and comprehensive summary of the above text."
-        logging.debug(prompt_text)
         # Values literally c/p from the api docs....
         data = {
             "max_context_length": 8096,
             "max_length": 4096,
-            "prompt": prompt_text,
         }
         logging.debug("kobold: Submitting request to API endpoint")
@@ -1114,9 +1114,9 @@ def summarize_with_oobabooga(api_url, file_path):
             'content-type': 'application/json',
         }
-        prompt_text = "I like to eat cake and bake cakes. I am a baker. I work in a french bakery baking cakes. It is a fun job. I have been baking cakes for ten years. I also bake lots of other baked goods, but cakes are my favorite."
-        # prompt_text += f"\n\n{text}"  # Uncomment this line if you want to include the text variable
-        prompt_text += "\n\nAs a professional summarizer, create a concise and comprehensive summary of the provided text."
         data =  {
             "mode": "chat",
@@ -1268,6 +1268,7 @@ def launch_ui(demo_mode=False):
         gr.components.Textbox(label="URL of video to be Transcribed/Summarized"),
         gr.components.Number(value=2, label="Number of Speakers (for Diarization)"),
         gr.components.Dropdown(choices=whisper_models, value="small.en", label="Whisper Model (Can ignore this)"),
         gr.components.Number(value=0, label="Offset time to start transcribing from\n\n (helpful if you only want part of a video/lecture)")
     ]
@@ -1316,6 +1317,7 @@ def launch_ui(demo_mode=False):
 ####################################################################################################################################
 # Main()
 #
 def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model="small.en", offset=0, vad_filter=False, download_video_flag=False, demo_mode=False):
     if input_path is None and args.user_interface:
         return []

 #
 #
 # Usage:
 #
+#   Download Audio only from URL -> Transcribe audio:
+#       python summarize.py https://www.youtube.com/watch?v=4nd1CDZP21s`
 #
+#   Download Audio+Video from URL -> Transcribe audio from Video:**
+#       python summarize.py -v https://www.youtube.com/watch?v=4nd1CDZP21s`
 #
+#   Download Audio only from URL -> Transcribe audio -> Summarize using (`anthropic`/`cohere`/`openai`/`llama` (llama.cpp)/`ooba` (oobabooga/text-gen-webui)/`kobold` (kobold.cpp)/`tabby` (Tabbyapi)) API:**
+#       python summarize.py -v https://www.youtube.com/watch?v=4nd1CDZP21s -api <your choice of API>` - Make sure to put your API key into `config.txt` under the appropriate API variable
+#
+#   Download Audio+Video from a list of videos in a text file (can be file paths or URLs) and have them all summarized:**
+#       python summarize.py ./local/file_on_your/system --api_name <API_name>`
+#
+#   Run it as a WebApp**
+#       python summarize.py -gui` - This requires you to either stuff your API keys into the `config.txt` file, or pass them into the app every time you want to use it.
+#           Can be helpful for setting up a shared instance, but not wanting people to perform inference on your server.
 #
 ###
 # Video Download/Handling
 #
+def process_url(input_path, num_speakers=2, whisper_model="small.en", offset=0, api_name=None, api_key=None, vad_filter=False, download_video_flag=False,custom_prompt=None, demo_mode=True):
     if demo_mode:
         api_name = "huggingface"
         api_key = os.environ.get(HF_TOKEN)
         }
         logging.debug("openai: Preparing data + prompt for submittal")
+        openai_prompt = f"{text} \n\n\n\n{prompt_text}"
         data = {
             "model": model,
             "messages": [
                 },
                 {
                     "role": "user",
+                    "content": openai_prompt
                 }
             ],
             "max_tokens": 4096,  # Adjust tokens as needed
         logging.debug("anthropic: Prepping data + prompt for submittal")
         user_message = {
             "role": "user",
+            "content": f"{text} \n\n\n\n{prompt_text}"
         }
         data = {
             'Authorization': f'Bearer {api_key}'
         }
+        cohere_prompt = f"{text} \n\n\n\n{prompt_text}"
         data = {
             "chat_history": [
+                {"role": "USER", "message": cohere_prompt}
             ],
             "message": "Please provide a summary.",
             "model": model,
             'Content-Type': 'application/json'
         }
+        groq_prompt = f"{text} \n\n\n\n{prompt_text}"
         data = {
             "messages": [
                 {
                     "role": "user",
+                    "content": groq_prompt
                 }
             ],
             "model": model
             headers['Authorization'] = f'Bearer {token}'
+        llama_prompt = f"{text} \n\n\n\n{prompt_text}"
+        logging.debug(f"llama: Complete prompt is: {llama_prompt}")
         data = {
+            "prompt": llama_prompt
         }
+        #logging.debug(f"llama: Submitting request to API endpoint {llama_prompt}")
         print("llama: Submitting request to API endpoint")
         response = requests.post(api_url, headers=headers, json=data)
         response_data = response.json()
             'content-type': 'application/json',
         }
         # FIXME
+        kobold_prompt = f"{text} \n\n\n\n{prompt_text}"
+        logging.debug(kobold_prompt)
         # Values literally c/p from the api docs....
         data = {
             "max_context_length": 8096,
             "max_length": 4096,
+            "prompt": kobold_prompt,
         }
         logging.debug("kobold: Submitting request to API endpoint")
             'content-type': 'application/json',
         }
+        #prompt_text = "I like to eat cake and bake cakes. I am a baker. I work in a french bakery baking cakes. It is a fun job. I have been baking cakes for ten years. I also bake lots of other baked goods, but cakes are my favorite."
+        #prompt_text += f"\n\n{text}"  # Uncomment this line if you want to include the text variable
+        ooba_prompt = f"{text}\n\n\n\n{prompt_text}"
         data =  {
             "mode": "chat",
         gr.components.Textbox(label="URL of video to be Transcribed/Summarized"),
         gr.components.Number(value=2, label="Number of Speakers (for Diarization)"),
         gr.components.Dropdown(choices=whisper_models, value="small.en", label="Whisper Model (Can ignore this)"),
+        gr.components.Textbox(label="Custom Prompt", value="Please provide a detailed, bulleted list of the points made throughout the transcribed video and any supporting arguments made for said points", lines=3),
         gr.components.Number(value=0, label="Offset time to start transcribing from\n\n (helpful if you only want part of a video/lecture)")
     ]
 ####################################################################################################################################
 # Main()
 #
 def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model="small.en", offset=0, vad_filter=False, download_video_flag=False, demo_mode=False):
     if input_path is None and args.user_interface:
         return []