oceansweep commited on
Commit
d6b96dc
1 Parent(s): 62a9bae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -31
app.py CHANGED
@@ -45,23 +45,22 @@ import yt_dlp
45
  #
46
  #
47
  # Usage:
48
- # Transcribe a single URL:
49
- # python diarize.py https://example.com/video.mp4
50
  #
51
- # Transcribe a single URL and have the resulting transcription summarized:
52
- # python diarize.py https://example.com/video.mp4
53
  #
54
- # Transcribe a list of files:
55
- # python diarize.py ./path/to/your/text_file.txt
56
  #
57
- # Transcribe a local file:
58
- # python diarize.py /path/to/your/localfile.mp4
59
- #
60
- # Transcribe a local file and have it summarized:
61
- # python diarize.py ./input.mp4 --api_name openai --api_key <your_openai_api_key>
62
- #
63
- # Transcribe a list of files and have them all summarized:
64
- # python diarize.py path_to_your_text_file.txt --api_name <openai> --api_key <your_openai_api_key>
 
65
  #
66
  ###
67
 
@@ -350,7 +349,7 @@ def process_local_file(file_path):
350
  # Video Download/Handling
351
  #
352
 
353
- def process_url(input_path, num_speakers=2, whisper_model="small.en", offset=0, api_name=None, api_key=None, vad_filter=False, download_video_flag=False, demo_mode=True):
354
  if demo_mode:
355
  api_name = "huggingface"
356
  api_key = os.environ.get(HF_TOKEN)
@@ -793,7 +792,7 @@ def summarize_with_openai(api_key, file_path, model):
793
  }
794
 
795
  logging.debug("openai: Preparing data + prompt for submittal")
796
- prompt_text = f"{text} \n\n\n\nPlease provide a detailed, bulleted list of the points made throughout the transcribed video and any supporting arguments made for said points"
797
  data = {
798
  "model": model,
799
  "messages": [
@@ -803,7 +802,7 @@ def summarize_with_openai(api_key, file_path, model):
803
  },
804
  {
805
  "role": "user",
806
- "content": prompt_text
807
  }
808
  ],
809
  "max_tokens": 4096, # Adjust tokens as needed
@@ -846,7 +845,7 @@ def summarize_with_claude(api_key, file_path, model):
846
  logging.debug("anthropic: Prepping data + prompt for submittal")
847
  user_message = {
848
  "role": "user",
849
- "content": f"{text} \n\n\n\nPlease provide a detailed, bulleted list of the points made throughout the transcribed video and any supporting arguments made for said points"
850
  }
851
 
852
  data = {
@@ -913,10 +912,10 @@ def summarize_with_cohere(api_key, file_path, model):
913
  'Authorization': f'Bearer {api_key}'
914
  }
915
 
916
- prompt_text = f"{text} \n\nAs a professional summarizer, create a concise and comprehensive summary of the provided text."
917
  data = {
918
  "chat_history": [
919
- {"role": "USER", "message": prompt_text}
920
  ],
921
  "message": "Please provide a summary.",
922
  "model": model,
@@ -964,12 +963,12 @@ def summarize_with_groq(api_key, file_path, model):
964
  'Content-Type': 'application/json'
965
  }
966
 
967
- prompt_text = f"{text} \n\nAs a professional summarizer, create a concise and comprehensive summary of the provided text."
968
  data = {
969
  "messages": [
970
  {
971
  "role": "user",
972
- "content": prompt_text
973
  }
974
  ],
975
  "model": model
@@ -1021,12 +1020,13 @@ def summarize_with_llama(api_url, file_path, token):
1021
  headers['Authorization'] = f'Bearer {token}'
1022
 
1023
 
1024
- prompt_text = f"{text} \n\nAs a professional summarizer, create a concise and comprehensive summary of the provided text."
 
1025
  data = {
1026
- "prompt": prompt_text
1027
  }
1028
 
1029
- logging.debug("llama: Submitting request to API endpoint")
1030
  print("llama: Submitting request to API endpoint")
1031
  response = requests.post(api_url, headers=headers, json=data)
1032
  response_data = response.json()
@@ -1064,13 +1064,13 @@ def summarize_with_kobold(api_url, file_path):
1064
  'content-type': 'application/json',
1065
  }
1066
  # FIXME
1067
- prompt_text = f"{text} \n\nAs a professional summarizer, create a concise and comprehensive summary of the above text."
1068
- logging.debug(prompt_text)
1069
  # Values literally c/p from the api docs....
1070
  data = {
1071
  "max_context_length": 8096,
1072
  "max_length": 4096,
1073
- "prompt": prompt_text,
1074
  }
1075
 
1076
  logging.debug("kobold: Submitting request to API endpoint")
@@ -1114,9 +1114,9 @@ def summarize_with_oobabooga(api_url, file_path):
1114
  'content-type': 'application/json',
1115
  }
1116
 
1117
- prompt_text = "I like to eat cake and bake cakes. I am a baker. I work in a french bakery baking cakes. It is a fun job. I have been baking cakes for ten years. I also bake lots of other baked goods, but cakes are my favorite."
1118
- # prompt_text += f"\n\n{text}" # Uncomment this line if you want to include the text variable
1119
- prompt_text += "\n\nAs a professional summarizer, create a concise and comprehensive summary of the provided text."
1120
 
1121
  data = {
1122
  "mode": "chat",
@@ -1268,6 +1268,7 @@ def launch_ui(demo_mode=False):
1268
  gr.components.Textbox(label="URL of video to be Transcribed/Summarized"),
1269
  gr.components.Number(value=2, label="Number of Speakers (for Diarization)"),
1270
  gr.components.Dropdown(choices=whisper_models, value="small.en", label="Whisper Model (Can ignore this)"),
 
1271
  gr.components.Number(value=0, label="Offset time to start transcribing from\n\n (helpful if you only want part of a video/lecture)")
1272
  ]
1273
 
@@ -1316,6 +1317,7 @@ def launch_ui(demo_mode=False):
1316
  ####################################################################################################################################
1317
  # Main()
1318
  #
 
1319
  def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model="small.en", offset=0, vad_filter=False, download_video_flag=False, demo_mode=False):
1320
  if input_path is None and args.user_interface:
1321
  return []
 
45
  #
46
  #
47
  # Usage:
 
 
48
  #
49
+ # Download Audio only from URL -> Transcribe audio:
50
+ # python summarize.py https://www.youtube.com/watch?v=4nd1CDZP21s`
51
  #
52
+ # Download Audio+Video from URL -> Transcribe audio from Video:**
53
+ # python summarize.py -v https://www.youtube.com/watch?v=4nd1CDZP21s`
54
  #
55
+ # Download Audio only from URL -> Transcribe audio -> Summarize using (`anthropic`/`cohere`/`openai`/`llama` (llama.cpp)/`ooba` (oobabooga/text-gen-webui)/`kobold` (kobold.cpp)/`tabby` (Tabbyapi)) API:**
56
+ # python summarize.py -v https://www.youtube.com/watch?v=4nd1CDZP21s -api <your choice of API>` - Make sure to put your API key into `config.txt` under the appropriate API variable
57
+ #
58
+ # Download Audio+Video from a list of videos in a text file (can be file paths or URLs) and have them all summarized:**
59
+ # python summarize.py ./local/file_on_your/system --api_name <API_name>`
60
+ #
61
+ # Run it as a WebApp**
62
+ # python summarize.py -gui` - This requires you to either stuff your API keys into the `config.txt` file, or pass them into the app every time you want to use it.
63
+ # Can be helpful for setting up a shared instance, but not wanting people to perform inference on your server.
64
  #
65
  ###
66
 
 
349
  # Video Download/Handling
350
  #
351
 
352
+ def process_url(input_path, num_speakers=2, whisper_model="small.en", offset=0, api_name=None, api_key=None, vad_filter=False, download_video_flag=False,custom_prompt=None, demo_mode=True):
353
  if demo_mode:
354
  api_name = "huggingface"
355
  api_key = os.environ.get(HF_TOKEN)
 
792
  }
793
 
794
  logging.debug("openai: Preparing data + prompt for submittal")
795
+ openai_prompt = f"{text} \n\n\n\n{prompt_text}"
796
  data = {
797
  "model": model,
798
  "messages": [
 
802
  },
803
  {
804
  "role": "user",
805
+ "content": openai_prompt
806
  }
807
  ],
808
  "max_tokens": 4096, # Adjust tokens as needed
 
845
  logging.debug("anthropic: Prepping data + prompt for submittal")
846
  user_message = {
847
  "role": "user",
848
+ "content": f"{text} \n\n\n\n{prompt_text}"
849
  }
850
 
851
  data = {
 
912
  'Authorization': f'Bearer {api_key}'
913
  }
914
 
915
+ cohere_prompt = f"{text} \n\n\n\n{prompt_text}"
916
  data = {
917
  "chat_history": [
918
+ {"role": "USER", "message": cohere_prompt}
919
  ],
920
  "message": "Please provide a summary.",
921
  "model": model,
 
963
  'Content-Type': 'application/json'
964
  }
965
 
966
+ groq_prompt = f"{text} \n\n\n\n{prompt_text}"
967
  data = {
968
  "messages": [
969
  {
970
  "role": "user",
971
+ "content": groq_prompt
972
  }
973
  ],
974
  "model": model
 
1020
  headers['Authorization'] = f'Bearer {token}'
1021
 
1022
 
1023
+ llama_prompt = f"{text} \n\n\n\n{prompt_text}"
1024
+ logging.debug(f"llama: Complete prompt is: {llama_prompt}")
1025
  data = {
1026
+ "prompt": llama_prompt
1027
  }
1028
 
1029
+ #logging.debug(f"llama: Submitting request to API endpoint {llama_prompt}")
1030
  print("llama: Submitting request to API endpoint")
1031
  response = requests.post(api_url, headers=headers, json=data)
1032
  response_data = response.json()
 
1064
  'content-type': 'application/json',
1065
  }
1066
  # FIXME
1067
+ kobold_prompt = f"{text} \n\n\n\n{prompt_text}"
1068
+ logging.debug(kobold_prompt)
1069
  # Values literally c/p from the api docs....
1070
  data = {
1071
  "max_context_length": 8096,
1072
  "max_length": 4096,
1073
+ "prompt": kobold_prompt,
1074
  }
1075
 
1076
  logging.debug("kobold: Submitting request to API endpoint")
 
1114
  'content-type': 'application/json',
1115
  }
1116
 
1117
+ #prompt_text = "I like to eat cake and bake cakes. I am a baker. I work in a french bakery baking cakes. It is a fun job. I have been baking cakes for ten years. I also bake lots of other baked goods, but cakes are my favorite."
1118
+ #prompt_text += f"\n\n{text}" # Uncomment this line if you want to include the text variable
1119
+ ooba_prompt = f"{text}\n\n\n\n{prompt_text}"
1120
 
1121
  data = {
1122
  "mode": "chat",
 
1268
  gr.components.Textbox(label="URL of video to be Transcribed/Summarized"),
1269
  gr.components.Number(value=2, label="Number of Speakers (for Diarization)"),
1270
  gr.components.Dropdown(choices=whisper_models, value="small.en", label="Whisper Model (Can ignore this)"),
1271
+ gr.components.Textbox(label="Custom Prompt", value="Please provide a detailed, bulleted list of the points made throughout the transcribed video and any supporting arguments made for said points", lines=3),
1272
  gr.components.Number(value=0, label="Offset time to start transcribing from\n\n (helpful if you only want part of a video/lecture)")
1273
  ]
1274
 
 
1317
  ####################################################################################################################################
1318
  # Main()
1319
  #
1320
+
1321
  def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model="small.en", offset=0, vad_filter=False, download_video_flag=False, demo_mode=False):
1322
  if input_path is None and args.user_interface:
1323
  return []