oceansweep commited on
Commit
e3cd24c
1 Parent(s): 0911ebb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -75
app.py CHANGED
@@ -70,9 +70,9 @@ os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
70
  # Download Audio+Video from a list of videos in a text file (can be file paths or URLs) and have them all summarized:**
71
  # python summarize.py ./local/file_on_your/system --api_name <API_name>`
72
  #
73
- # Run it as a WebApp**
74
- # python summarize.py -gui` - This requires you to either stuff your API keys into the `config.txt` file, or pass them into the app every time you want to use it.
75
- # Can be helpful for setting up a shared instance, but not wanting people to perform inference on your server.
76
  #
77
  ###
78
 
@@ -172,7 +172,7 @@ print(r"""
172
  \__,_||_| \__,_||_| |_| \__| \_/\_/ \__,_| \__| \___||_| |_|
173
  """)
174
 
175
- ####################################################################################################################################
176
  # System Checks
177
  #
178
  #
@@ -234,12 +234,14 @@ def check_ffmpeg():
234
  else:
235
  logging.debug("ffmpeg not installed on the local system/in local PATH")
236
  print(
237
- "ffmpeg is not installed.\n\n You can either install it manually, or through your package manager of choice.\n Windows users, builds are here: https://www.gyan.dev/ffmpeg/builds/")
 
238
  if userOS == "Windows":
239
  download_ffmpeg()
240
  elif userOS == "Linux":
241
  print(
242
- "You should install ffmpeg using your platform's appropriate package manager, 'apt install ffmpeg','dnf install ffmpeg' or 'pacman', etc.")
 
243
  else:
244
  logging.debug("running an unsupported OS")
245
  print("You're running an unsupported/Un-tested OS")
@@ -298,10 +300,10 @@ def download_ffmpeg():
298
 
299
  #
300
  #
301
- ####################################################################################################################################
302
 
303
 
304
- ####################################################################################################################################
305
  # Processing Paths and local file handling
306
  #
307
  #
@@ -352,16 +354,17 @@ def process_local_file(file_path):
352
 
353
  #
354
  #
355
- ####################################################################################################################################
356
 
357
 
358
- ####################################################################################################################################
359
  # Video Download/Handling
360
  #
361
 
362
  def process_url(url, num_speakers, whisper_model, custom_prompt, offset, api_name, api_key, vad_filter,
363
  download_video, download_audio, chunk_size):
364
  video_file_path = None
 
365
  try:
366
  results = main(url, api_name=api_name, api_key=api_key, num_speakers=num_speakers,
367
  whisper_model=whisper_model, offset=offset, vad_filter=vad_filter,
@@ -534,10 +537,10 @@ def download_video(video_url, download_path, info_dict, download_video_flag):
534
 
535
  #
536
  #
537
- ####################################################################################################################################
538
 
539
 
540
- ####################################################################################################################################
541
  # Audio Transcription
542
  #
543
  # Convert video .m4a into .wav using ffmpeg
@@ -546,7 +549,13 @@ def download_video(video_url, download_path, info_dict, download_video_flag):
546
  #
547
 
548
  # os.system(r'.\Bin\ffmpeg.exe -ss 00:00:00 -i "{video_file_path}" -ar 16000 -ac 1 -c:a pcm_s16le "{out_path}"')
549
- def convert_to_wav(video_file_path, offset=0):
 
 
 
 
 
 
550
  print("Starting conversion process of .m4a to .WAV")
551
  out_path = os.path.splitext(video_file_path)[0] + ".wav"
552
 
@@ -641,10 +650,10 @@ def speech_to_text(audio_file_path, selected_source_lang='en', whisper_model='sm
641
 
642
  #
643
  #
644
- ####################################################################################################################################
645
 
646
 
647
- ####################################################################################################################################
648
  # Diarization
649
  #
650
  # TODO: https://huggingface.co/pyannote/speaker-diarization-3.1
@@ -666,7 +675,8 @@ def speech_to_text(audio_file_path, selected_source_lang='en', whisper_model='sm
666
  # import tqdm
667
  # import wave
668
  #
669
- # embedding_model = PretrainedSpeakerEmbedding( embedding_model, device=torch.device("cuda" if torch.cuda.is_available() else "cpu"))
 
670
  #
671
  #
672
  # _,file_ending = os.path.splitext(f'{video_file_path}')
@@ -761,10 +771,10 @@ def speech_to_text(audio_file_path, selected_source_lang='en', whisper_model='sm
761
  # raise RuntimeError("Error Running inference with local model", e)
762
  #
763
  #
764
- ####################################################################################################################################
765
 
766
 
767
- ####################################################################################################################################
768
  # Summarizers
769
  #
770
  #
@@ -1055,18 +1065,20 @@ def summarize_with_llama(api_url, file_path, token, custom_prompt):
1055
 
1056
 
1057
  # https://lite.koboldai.net/koboldcpp_api#/api%2Fv1/post_api_v1_generate
1058
- def summarize_with_kobold(api_url, file_path, custom_prompt):
1059
  try:
1060
  logging.debug("kobold: Loading JSON data")
1061
- with open(file_path, 'r') as file:
1062
  segments = json.load(file)
1063
 
1064
  logging.debug(f"kobold: Extracting text from segments file")
1065
  text = extract_text_from_segments(segments)
1066
 
 
1067
  headers = {
1068
  'accept': 'application/json',
1069
  'content-type': 'application/json',
 
1070
  }
1071
 
1072
  kobold_prompt = f"{text} \n\n\n\n{custom_prompt}"
@@ -1082,7 +1094,7 @@ def summarize_with_kobold(api_url, file_path, custom_prompt):
1082
 
1083
  logging.debug("kobold: Submitting request to API endpoint")
1084
  print("kobold: Submitting request to API endpoint")
1085
- response = requests.post(api_url, headers=headers, json=data)
1086
  response_data = response.json()
1087
  logging.debug("kobold: API Response Data: %s", response_data)
1088
 
@@ -1105,24 +1117,26 @@ def summarize_with_kobold(api_url, file_path, custom_prompt):
1105
 
1106
 
1107
  # https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API
1108
- def summarize_with_oobabooga(api_url, file_path, custom_prompt):
1109
  try:
1110
  logging.debug("ooba: Loading JSON data")
1111
- with open(file_path, 'r') as file:
1112
  segments = json.load(file)
1113
 
1114
  logging.debug(f"ooba: Extracting text from segments file\n\n\n")
1115
  text = extract_text_from_segments(segments)
1116
  logging.debug(f"ooba: Finished extracting text from segments file")
1117
 
 
1118
  headers = {
1119
  'accept': 'application/json',
1120
  'content-type': 'application/json',
1121
  }
1122
 
1123
- # prompt_text = "I like to eat cake and bake cakes. I am a baker. I work in a French bakery baking cakes. It is a fun job. I have been baking cakes for ten years. I also bake lots of other baked goods, but cakes are my favorite."
1124
- # prompt_text += f"\n\n{text}" # Uncomment this line if you want to include the text variable
1125
- ooba_prompt = "{text}\n\n\n\n{custom_prompt}"
 
1126
  logging.debug("ooba: Prompt being sent is {ooba_prompt}")
1127
 
1128
  data = {
@@ -1133,7 +1147,7 @@ def summarize_with_oobabooga(api_url, file_path, custom_prompt):
1133
 
1134
  logging.debug("ooba: Submitting request to API endpoint")
1135
  print("ooba: Submitting request to API endpoint")
1136
- response = requests.post(api_url, headers=headers, json=data, verify=False)
1137
  logging.debug("ooba: API Response Data: %s", response)
1138
 
1139
  if response.status_code == 200:
@@ -1161,28 +1175,28 @@ def save_summary_to_file(summary, file_path):
1161
 
1162
  #
1163
  #
1164
- ####################################################################################################################################
1165
 
1166
 
1167
- ####################################################################################################################################
1168
  # Gradio UI
1169
  #
1170
 
1171
  # Only to be used when configured with Gradio for HF Space
1172
- def summarize_with_huggingface(api_key, file_path):
1173
  logging.debug(f"huggingface: Summarization process starting...")
1174
 
1175
  model = "microsoft/Phi-3-mini-128k-instruct"
1176
  API_URL = f"https://api-inference.huggingface.co/models/{model}"
1177
- headers = {"Authorization": f"Bearer {api_key}"}
1178
 
1179
- with open(file_path, 'r') as file:
1180
  segments = json.load(file)
1181
  text = ''.join([segment['text'] for segment in segments])
1182
 
1183
  # FIXME adjust max_length and min_length as needed
1184
  data = {
1185
- "inputs": text,
1186
  "parameters": {"max_length": 4096, "min_length": 100}
1187
  }
1188
 
@@ -1197,24 +1211,26 @@ def summarize_with_huggingface(api_key, file_path):
1197
  response_data = response.json()
1198
  wait_time = response_data.get('estimated_time', 10)
1199
  return None, f"Model is loading, retrying in {int(wait_time)} seconds..."
 
1200
  # Sleep before retrying....
1201
- time.sleep(wait_time)
1202
 
1203
- if api_key == "":
1204
- api_key = os.environ.get(HF_TOKEN)
1205
- logging.debug("HUGGINGFACE API KEY CHECK: " + api_key)
1206
  try:
1207
  logging.debug("huggingface: Loading json data for summarization")
1208
- with open(file_path, 'r') as file:
1209
  segments = json.load(file)
1210
 
1211
  logging.debug("huggingface: Extracting text from the segments")
1212
  text = ' '.join([segment['text'] for segment in segments])
1213
 
1214
- api_key = os.environ.get(HF_TOKEN)
1215
- logging.debug("HUGGINGFACE API KEY CHECK #2: " + api_key)
1216
 
1217
  logging.debug("huggingface: Submitting request...")
 
1218
  response = requests.post(API_URL, headers=headers, json=data)
1219
 
1220
  if response.status_code == 200:
@@ -1230,8 +1246,11 @@ def summarize_with_huggingface(api_key, file_path):
1230
  print(f"Error occurred while processing summary with huggingface: {str(e)}")
1231
  return None
1232
 
1233
- def same_auth(username, password):
1234
- return username == password
 
 
 
1235
 
1236
 
1237
  def format_transcription(transcription_result):
@@ -1242,19 +1261,6 @@ def format_transcription(transcription_result):
1242
  return ""
1243
 
1244
 
1245
- def process_text(api_key, text_file):
1246
- summary, message = summarize_with_huggingface(api_key, text_file)
1247
- if summary:
1248
- # Show summary on success
1249
- return "Summary:", summary
1250
- else:
1251
- # Inform user about load/wait time
1252
- return "Notice:", message
1253
-
1254
-
1255
-
1256
-
1257
-
1258
  def format_file_path(file_path):
1259
  # Helper function to check file existence and return an appropriate path or message
1260
  return file_path if file_path and os.path.exists(file_path) else None
@@ -1294,7 +1300,9 @@ def launch_ui(demo_mode=False):
1294
  visible=False)
1295
  custom_prompt_input = gr.Textbox(
1296
  label="Custom Prompt (Customize your summary, or ask a different question)",
1297
- placeholder="Q: As a professional summarizer, create a concise and comprehensive summary of the provided text.\nA: Here is a detailed, bulleted list of the key points made in the transcribed video and supporting arguments:",
 
 
1298
  lines=3, visible=True)
1299
  offset_input = gr.Number(value=0, label="Offset (Seconds into the video to start transcribing at)",
1300
  visible=False)
@@ -1347,8 +1355,9 @@ def launch_ui(demo_mode=False):
1347
  fn=process_url,
1348
  inputs=all_inputs,
1349
  outputs=outputs,
1350
- title="Video Transcription and Summarization",
1351
- description="Submit a video URL for transcription and summarization. Ensure you input all necessary information including API keys."
 
1352
  )
1353
 
1354
  with gr.Tab("Transcription & Summarization History"):
@@ -1371,7 +1380,7 @@ def launch_ui(demo_mode=False):
1371
  #
1372
 
1373
  def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model="small.en", offset=0, vad_filter=False,
1374
- download_video_flag=False, demo_mode=False, custom_prompt=None):
1375
  if input_path is None and args.user_interface:
1376
  return []
1377
  start_time = time.monotonic()
@@ -1385,7 +1394,10 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model=
1385
  elif (info_dict := get_youtube(input_path)) and 'entries' in info_dict:
1386
  logging.debug("MAIN: YouTube playlist detected")
1387
  print(
1388
- "\n\nSorry, but playlists aren't currently supported. You can run the following command to generate a text file that you can then pass into this script though! (It may not work... playlist support seems spotty)" + """\n\n\tpython Get_Playlist_URLs.py <Youtube Playlist URL>\n\n\tThen,\n\n\tpython diarizer.py <playlist text file name>\n\n""")
 
 
 
1389
  return
1390
  else:
1391
  paths = [input_path]
@@ -1399,8 +1411,7 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model=
1399
  if info_dict:
1400
  logging.debug("MAIN: Creating path for video file...")
1401
  download_path = create_download_directory(info_dict['title'])
1402
- logging.debug("MAIN: Path created successfully")
1403
- logging.debug("MAIN: Downloading video from yt_dlp...")
1404
  try:
1405
  video_path = download_video(path, download_path, info_dict, download_video_flag)
1406
  except RuntimeError as e:
@@ -1431,6 +1442,17 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model=
1431
  logging.info(f"Transcription complete: {audio_file}")
1432
 
1433
  # Perform summarization based on the specified API
 
 
 
 
 
 
 
 
 
 
 
1434
  if api_name and api_key:
1435
  logging.debug(f"MAIN: Summarization being performed by {api_name}")
1436
  json_file_path = audio_file.replace('.wav', '.segments.json')
@@ -1441,6 +1463,15 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model=
1441
  summary = summarize_with_openai(openai_api_key, json_file_path, openai_model, custom_prompt)
1442
  except requests.exceptions.ConnectionError:
1443
  requests.status_code = "Connection: "
 
 
 
 
 
 
 
 
 
1444
  elif api_name.lower() == "anthropic":
1445
  anthropic_api_key = api_key if api_key else config.get('API', 'anthropic_api_key', fallback=None)
1446
  try:
@@ -1486,16 +1517,6 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model=
1486
  summary = summarize_with_oobabooga(ooba_ip, json_file_path, ooba_token, custom_prompt)
1487
  except requests.exceptions.ConnectionError:
1488
  requests.status_code = "Connection: "
1489
- elif api_name.lower() == "huggingface":
1490
- huggingface_api_key = os.environ.get(HF_TOKEN)
1491
- if (huggingface_api_key is None):
1492
- huggingface_api_key = api_key if api_key else config.get('API', 'huggingface_api_key', fallback=None)
1493
- try:
1494
- logging.debug(f"MAIN: Trying to summarize with huggingface")
1495
- summarize_with_huggingface(huggingface_api_key, json_file_path, custom_prompt)
1496
- except requests.exceptions.ConnectionError:
1497
- requests.status_code = "Connection: "
1498
-
1499
  else:
1500
  logging.warning(f"Unsupported API: {api_name}")
1501
  summary = None
@@ -1507,10 +1528,11 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model=
1507
  else:
1508
  logging.warning(f"Failed to generate summary using {api_name} API")
1509
  else:
1510
- logging.info("No API specified. Summarization will not be performed")
1511
  except Exception as e:
1512
  logging.error(f"Error processing path: {path}")
1513
  logging.error(str(e))
 
1514
  # end_time = time.monotonic()
1515
  # print("Total program execution time: " + timedelta(seconds=end_time - start_time))
1516
 
@@ -1522,6 +1544,7 @@ if __name__ == "__main__":
1522
  parser.add_argument('input_path', type=str, help='Path or URL of the video', nargs='?')
1523
  parser.add_argument('-v', '--video', action='store_true', help='Download the video instead of just the audio')
1524
  parser.add_argument('-api', '--api_name', type=str, help='API name for summarization (optional)')
 
1525
  parser.add_argument('-ns', '--num_speakers', type=int, default=2, help='Number of speakers (default: 2)')
1526
  parser.add_argument('-wm', '--whisper_model', type=str, default='small.en',
1527
  help='Whisper model (default: small.en)')
@@ -1575,7 +1598,7 @@ if __name__ == "__main__":
1575
  logging.info(f'API: {args.api_name}')
1576
  logging.info('Summarization will be performed.')
1577
  else:
1578
- logging.info('No API specified. Summarization will not be performed.')
1579
 
1580
  logging.debug("Platform check being performed...")
1581
  platform_check()
@@ -1590,7 +1613,7 @@ if __name__ == "__main__":
1590
  try:
1591
  results = main(args.input_path, api_name=args.api_name, api_key=args.api_key,
1592
  num_speakers=args.num_speakers, whisper_model=args.whisper_model, offset=args.offset,
1593
- vad_filter=args.vad_filter, download_video_flag=args.video)
1594
  logging.info('Transcription process completed.')
1595
  except Exception as e:
1596
  logging.error('An error occurred during the transcription process.')
 
70
  # Download Audio+Video from a list of videos in a text file (can be file paths or URLs) and have them all summarized:**
71
  # python summarize.py ./local/file_on_your/system --api_name <API_name>`
72
  #
73
+ # Run it as a WebApp** python summarize.py -gui` - This requires you to either stuff your API keys into the
74
+ # `config.txt` file, or pass them into the app every time you want to use it. Can be helpful for setting up a shared
75
+ # instance, but not wanting people to perform inference on your server.
76
  #
77
  ###
78
 
 
172
  \__,_||_| \__,_||_| |_| \__| \_/\_/ \__,_| \__| \___||_| |_|
173
  """)
174
 
175
+ #######################################################################################################################
176
  # System Checks
177
  #
178
  #
 
234
  else:
235
  logging.debug("ffmpeg not installed on the local system/in local PATH")
236
  print(
237
+ "ffmpeg is not installed.\n\n You can either install it manually, or through your package manager of "
238
+ "choice.\n Windows users, builds are here: https://www.gyan.dev/ffmpeg/builds/")
239
  if userOS == "Windows":
240
  download_ffmpeg()
241
  elif userOS == "Linux":
242
  print(
243
+ "You should install ffmpeg using your platform's appropriate package manager, 'apt install ffmpeg',"
244
+ "'dnf install ffmpeg' or 'pacman', etc.")
245
  else:
246
  logging.debug("running an unsupported OS")
247
  print("You're running an unsupported/Un-tested OS")
 
300
 
301
  #
302
  #
303
+ ########################################################################################################################
304
 
305
 
306
+ #######################################################################################################################
307
  # Processing Paths and local file handling
308
  #
309
  #
 
354
 
355
  #
356
  #
357
+ ########################################################################################################################
358
 
359
 
360
+ #######################################################################################################################
361
  # Video Download/Handling
362
  #
363
 
364
  def process_url(url, num_speakers, whisper_model, custom_prompt, offset, api_name, api_key, vad_filter,
365
  download_video, download_audio, chunk_size):
366
  video_file_path = None
367
+ print("API Name received:", api_name) # Debugging line
368
  try:
369
  results = main(url, api_name=api_name, api_key=api_key, num_speakers=num_speakers,
370
  whisper_model=whisper_model, offset=offset, vad_filter=vad_filter,
 
537
 
538
  #
539
  #
540
+ #######################################################################################################################
541
 
542
 
543
+ ######################################################################################################################
544
  # Audio Transcription
545
  #
546
  # Convert video .m4a into .wav using ffmpeg
 
549
  #
550
 
551
  # os.system(r'.\Bin\ffmpeg.exe -ss 00:00:00 -i "{video_file_path}" -ar 16000 -ac 1 -c:a pcm_s16le "{out_path}"')
552
+ def convert_to_wav(video_file_path, offset=0, overwrite=False):
553
+ out_path = os.path.splitext(video_file_path)[0] + ".wav"
554
+
555
+ if os.path.exists(out_path) and not overwrite:
556
+ print(f"File '{out_path}' already exists. Skipping conversion.")
557
+ logging.info(f"Skipping conversion as file already exists: {out_path}")
558
+ return out_path
559
  print("Starting conversion process of .m4a to .WAV")
560
  out_path = os.path.splitext(video_file_path)[0] + ".wav"
561
 
 
650
 
651
  #
652
  #
653
+ ######################################################################################################################
654
 
655
 
656
+ #######################################################################################################################
657
  # Diarization
658
  #
659
  # TODO: https://huggingface.co/pyannote/speaker-diarization-3.1
 
675
  # import tqdm
676
  # import wave
677
  #
678
+ # embedding_model = PretrainedSpeakerEmbedding( embedding_model, device=torch.device("cuda" if
679
+ # torch.cuda.is_available() else "cpu"))
680
  #
681
  #
682
  # _,file_ending = os.path.splitext(f'{video_file_path}')
 
771
  # raise RuntimeError("Error Running inference with local model", e)
772
  #
773
  #
774
+ ######################################################################################################################
775
 
776
 
777
+ #######################################################################################################################
778
  # Summarizers
779
  #
780
  #
 
1065
 
1066
 
1067
  # https://lite.koboldai.net/koboldcpp_api#/api%2Fv1/post_api_v1_generate
1068
+ def summarize_with_kobold(kobold_ip, json_file_path, kobold_token, custom_prompt):
1069
  try:
1070
  logging.debug("kobold: Loading JSON data")
1071
+ with open(json_file_path, 'r') as file:
1072
  segments = json.load(file)
1073
 
1074
  logging.debug(f"kobold: Extracting text from segments file")
1075
  text = extract_text_from_segments(segments)
1076
 
1077
+ # FIXME - API Key generated from copilot...kobold.cpp doesn't mention the header for it either...
1078
  headers = {
1079
  'accept': 'application/json',
1080
  'content-type': 'application/json',
1081
+ 'X_API_KEY': kobold_token
1082
  }
1083
 
1084
  kobold_prompt = f"{text} \n\n\n\n{custom_prompt}"
 
1094
 
1095
  logging.debug("kobold: Submitting request to API endpoint")
1096
  print("kobold: Submitting request to API endpoint")
1097
+ response = requests.post(kobold_ip, headers=headers, json=data)
1098
  response_data = response.json()
1099
  logging.debug("kobold: API Response Data: %s", response_data)
1100
 
 
1117
 
1118
 
1119
  # https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API
1120
+ def summarize_with_oobabooga(ooba_ip, json_file_path, ooba_token, custom_prompt):
1121
  try:
1122
  logging.debug("ooba: Loading JSON data")
1123
+ with open(json_file_path, 'r') as file:
1124
  segments = json.load(file)
1125
 
1126
  logging.debug(f"ooba: Extracting text from segments file\n\n\n")
1127
  text = extract_text_from_segments(segments)
1128
  logging.debug(f"ooba: Finished extracting text from segments file")
1129
 
1130
+ # FIXME - Add headers for ooba auth
1131
  headers = {
1132
  'accept': 'application/json',
1133
  'content-type': 'application/json',
1134
  }
1135
 
1136
+ # prompt_text = "I like to eat cake and bake cakes. I am a baker. I work in a French bakery baking cakes. It
1137
+ # is a fun job. I have been baking cakes for ten years. I also bake lots of other baked goods, but cakes are
1138
+ # my favorite." prompt_text += f"\n\n{text}" # Uncomment this line if you want to include the text variable
1139
+ ooba_prompt = f"{text}\n\n\n\n{custom_prompt}"
1140
  logging.debug("ooba: Prompt being sent is {ooba_prompt}")
1141
 
1142
  data = {
 
1147
 
1148
  logging.debug("ooba: Submitting request to API endpoint")
1149
  print("ooba: Submitting request to API endpoint")
1150
+ response = requests.post(ooba_ip, headers=headers, json=data, verify=False)
1151
  logging.debug("ooba: API Response Data: %s", response)
1152
 
1153
  if response.status_code == 200:
 
1175
 
1176
  #
1177
  #
1178
+ ########################################################################################################################
1179
 
1180
 
1181
+ #######################################################################################################################
1182
  # Gradio UI
1183
  #
1184
 
1185
  # Only to be used when configured with Gradio for HF Space
1186
+ def summarize_with_huggingface(huggingface_api_key, json_file_path, custom_prompt):
1187
  logging.debug(f"huggingface: Summarization process starting...")
1188
 
1189
  model = "microsoft/Phi-3-mini-128k-instruct"
1190
  API_URL = f"https://api-inference.huggingface.co/models/{model}"
1191
+ headers = {"Authorization": f"Bearer {huggingface_api_key}"}
1192
 
1193
+ with open(json_file_path, 'r') as file:
1194
  segments = json.load(file)
1195
  text = ''.join([segment['text'] for segment in segments])
1196
 
1197
  # FIXME adjust max_length and min_length as needed
1198
  data = {
1199
+ "inputs": text + "\n\n\n\n" + custom_prompt,
1200
  "parameters": {"max_length": 4096, "min_length": 100}
1201
  }
1202
 
 
1211
  response_data = response.json()
1212
  wait_time = response_data.get('estimated_time', 10)
1213
  return None, f"Model is loading, retrying in {int(wait_time)} seconds..."
1214
+ # FIXME : This is a hack, should be done better
1215
  # Sleep before retrying....
1216
+ # time.sleep(wait_time)
1217
 
1218
+ if huggingface_api_key == "":
1219
+ api_key = os.getenv(HF_TOKEN)
1220
+ logging.debug("HUGGINGFACE API KEY CHECK: " + huggingface_api_key)
1221
  try:
1222
  logging.debug("huggingface: Loading json data for summarization")
1223
+ with open(json_file_path, 'r') as file:
1224
  segments = json.load(file)
1225
 
1226
  logging.debug("huggingface: Extracting text from the segments")
1227
  text = ' '.join([segment['text'] for segment in segments])
1228
 
1229
+ #api_key = os.getenv('HF_TOKEN').replace('"', '')
1230
+ logging.debug("HUGGINGFACE API KEY CHECK #2: " + huggingface_api_key)
1231
 
1232
  logging.debug("huggingface: Submitting request...")
1233
+ logging.debug("huggingface: Printing request headers: %s", headers)
1234
  response = requests.post(API_URL, headers=headers, json=data)
1235
 
1236
  if response.status_code == 200:
 
1246
  print(f"Error occurred while processing summary with huggingface: {str(e)}")
1247
  return None
1248
 
1249
+ # FIXME
1250
+ # This is here for gradio authentication
1251
+ # Its just not setup.
1252
+ #def same_auth(username, password):
1253
+ # return username == password
1254
 
1255
 
1256
  def format_transcription(transcription_result):
 
1261
  return ""
1262
 
1263
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1264
  def format_file_path(file_path):
1265
  # Helper function to check file existence and return an appropriate path or message
1266
  return file_path if file_path and os.path.exists(file_path) else None
 
1300
  visible=False)
1301
  custom_prompt_input = gr.Textbox(
1302
  label="Custom Prompt (Customize your summary, or ask a different question)",
1303
+ placeholder="Q: As a professional summarizer, create a concise and comprehensive summary of the "
1304
+ "provided text.\nA: Here is a detailed, bulleted list of the key points made in the "
1305
+ "transcribed video and supporting arguments:",
1306
  lines=3, visible=True)
1307
  offset_input = gr.Number(value=0, label="Offset (Seconds into the video to start transcribing at)",
1308
  visible=False)
 
1355
  fn=process_url,
1356
  inputs=all_inputs,
1357
  outputs=outputs,
1358
+ title="TL/DW: Video Transcription and Summarization with Custom Prompt Support",
1359
+ description="Submit a video URL for transcription and summarization. Ensure you input all necessary "
1360
+ "information including API keys."
1361
  )
1362
 
1363
  with gr.Tab("Transcription & Summarization History"):
 
1380
  #
1381
 
1382
  def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model="small.en", offset=0, vad_filter=False,
1383
+ download_video_flag=False, demo_mode=False, custom_prompt=None, overwrite=False):
1384
  if input_path is None and args.user_interface:
1385
  return []
1386
  start_time = time.monotonic()
 
1394
  elif (info_dict := get_youtube(input_path)) and 'entries' in info_dict:
1395
  logging.debug("MAIN: YouTube playlist detected")
1396
  print(
1397
+ "\n\nSorry, but playlists aren't currently supported. You can run the following command to generate a "
1398
+ "text file that you can then pass into this script though! (It may not work... playlist support seems "
1399
+ "spotty)" + """\n\n\tpython Get_Playlist_URLs.py <Youtube Playlist URL>\n\n\tThen,\n\n\tpython
1400
+ diarizer.py <playlist text file name>\n\n""")
1401
  return
1402
  else:
1403
  paths = [input_path]
 
1411
  if info_dict:
1412
  logging.debug("MAIN: Creating path for video file...")
1413
  download_path = create_download_directory(info_dict['title'])
1414
+ logging.debug("MAIN: Path created successfully\n MAIN: Now Downloading video from yt_dlp...")
 
1415
  try:
1416
  video_path = download_video(path, download_path, info_dict, download_video_flag)
1417
  except RuntimeError as e:
 
1442
  logging.info(f"Transcription complete: {audio_file}")
1443
 
1444
  # Perform summarization based on the specified API
1445
+ logging.debug(f"MAIN: HF: Summarization being performed by HuggingFace")
1446
+ json_file_path = audio_file.replace('.wav', '.segments.json')
1447
+ if api_name == "huggingface":
1448
+ huggingface_api_key = os.getenv('HF_TOKEN').replace('"', '')
1449
+ if huggingface_api_key is None:
1450
+ huggingface_api_key = api_key if api_key else config.get('API', 'huggingface_api_key', fallback=None)
1451
+ try:
1452
+ logging.debug(f"MAIN: Trying to summarize with huggingface")
1453
+ summarize_with_huggingface(huggingface_api_key, json_file_path, custom_prompt)
1454
+ except requests.exceptions.ConnectionError:
1455
+ requests.status_code = "Connection: "
1456
  if api_name and api_key:
1457
  logging.debug(f"MAIN: Summarization being performed by {api_name}")
1458
  json_file_path = audio_file.replace('.wav', '.segments.json')
 
1463
  summary = summarize_with_openai(openai_api_key, json_file_path, openai_model, custom_prompt)
1464
  except requests.exceptions.ConnectionError:
1465
  requests.status_code = "Connection: "
1466
+ elif api_name.lower() == "huggingface":
1467
+ huggingface_api_key = os.getenv(HF_TOKEN)
1468
+ if huggingface_api_key is None:
1469
+ huggingface_api_key = api_key if api_key else config.get('API', 'huggingface_api_key', fallback=None)
1470
+ try:
1471
+ logging.debug(f"MAIN: Trying to summarize with huggingface")
1472
+ summarize_with_huggingface(huggingface_api_key, json_file_path, custom_prompt)
1473
+ except requests.exceptions.ConnectionError:
1474
+ requests.status_code = "Connection: "
1475
  elif api_name.lower() == "anthropic":
1476
  anthropic_api_key = api_key if api_key else config.get('API', 'anthropic_api_key', fallback=None)
1477
  try:
 
1517
  summary = summarize_with_oobabooga(ooba_ip, json_file_path, ooba_token, custom_prompt)
1518
  except requests.exceptions.ConnectionError:
1519
  requests.status_code = "Connection: "
 
 
 
 
 
 
 
 
 
 
1520
  else:
1521
  logging.warning(f"Unsupported API: {api_name}")
1522
  summary = None
 
1528
  else:
1529
  logging.warning(f"Failed to generate summary using {api_name} API")
1530
  else:
1531
+ logging.info("MAIN: #2 - No API specified. Summarization will not be performed")
1532
  except Exception as e:
1533
  logging.error(f"Error processing path: {path}")
1534
  logging.error(str(e))
1535
+ continue
1536
  # end_time = time.monotonic()
1537
  # print("Total program execution time: " + timedelta(seconds=end_time - start_time))
1538
 
 
1544
  parser.add_argument('input_path', type=str, help='Path or URL of the video', nargs='?')
1545
  parser.add_argument('-v', '--video', action='store_true', help='Download the video instead of just the audio')
1546
  parser.add_argument('-api', '--api_name', type=str, help='API name for summarization (optional)')
1547
+ parser.add_argument('--overwrite', action='store_true', help='Overwrite existing audio files')
1548
  parser.add_argument('-ns', '--num_speakers', type=int, default=2, help='Number of speakers (default: 2)')
1549
  parser.add_argument('-wm', '--whisper_model', type=str, default='small.en',
1550
  help='Whisper model (default: small.en)')
 
1598
  logging.info(f'API: {args.api_name}')
1599
  logging.info('Summarization will be performed.')
1600
  else:
1601
+ logging.info('MAIN: #1 No API specified. Summarization will not be performed.')
1602
 
1603
  logging.debug("Platform check being performed...")
1604
  platform_check()
 
1613
  try:
1614
  results = main(args.input_path, api_name=args.api_name, api_key=args.api_key,
1615
  num_speakers=args.num_speakers, whisper_model=args.whisper_model, offset=args.offset,
1616
+ vad_filter=args.vad_filter, download_video_flag=args.video, overwrite=args.overwrite)
1617
  logging.info('Transcription process completed.')
1618
  except Exception as e:
1619
  logging.error('An error occurred during the transcription process.')