oceansweep commited on
Commit
0c961d6
1 Parent(s): 8755180

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -39
app.py CHANGED
@@ -17,6 +17,10 @@ import gradio as gr
17
  import torch
18
  import yt_dlp
19
 
 
 
 
 
20
  #######
21
  # Function Sections
22
  #
@@ -39,7 +43,7 @@ import yt_dlp
39
  # 2. Usage of/Hardcoding HF_TOKEN as token for API calls
40
  # 3. Usage of HuggingFace for Inference
41
  # 4. Other stuff I can't remember. Will eventually do a diff and document them.
42
- #
43
 
44
 
45
  ####
@@ -63,10 +67,10 @@ import yt_dlp
63
  # llama.cpp)/`ooba` (oobabooga/text-gen-webui)/`kobold` (kobold.cpp)/`tabby` (Tabbyapi)) API:** python summarize.py
64
  # -v https://www.youtube.com/watch?v=4nd1CDZP21s -api <your choice of API>` - Make sure to put your API key into
65
  # `config.txt` under the appropriate API variable
66
- #
67
  # Download Audio+Video from a list of videos in a text file (can be file paths or URLs) and have them all summarized:**
68
  # python summarize.py ./local/file_on_your/system --api_name <API_name>`
69
- #
70
  # Run it as a WebApp**
71
  # python summarize.py -gui` - This requires you to either stuff your API keys into the `config.txt` file, or pass them into the app every time you want to use it.
72
  # Can be helpful for setting up a shared instance, but not wanting people to perform inference on your server.
@@ -120,7 +124,7 @@ output_path = config.get('Paths', 'output_path', fallback='results')
120
  processing_choice = config.get('Processing', 'processing_choice', fallback='cpu')
121
 
122
  # Log file
123
- #logging.basicConfig(filename='debug-runtime.log', encoding='utf-8', level=logging.DEBUG)
124
 
125
  #
126
  #
@@ -148,8 +152,8 @@ print(r"""
148
  | | | | / / | | | || |/\| |
149
  | | | |____ / / | |/ / \ /\ / _
150
  \_/ \_____//_/ |___/ \/ \/ (_)
151
-
152
-
153
  _ _
154
  | | | |
155
  | |_ ___ ___ | | ___ _ __ __ _
@@ -168,8 +172,8 @@ print(r"""
168
 
169
  ####################################################################################################################################
170
  # System Checks
171
- #
172
- #
173
 
174
  # Perform Platform Check
175
  userOS = ""
@@ -222,7 +226,7 @@ def decide_cpugpu():
222
 
223
  # check for existence of ffmpeg
224
  def check_ffmpeg():
225
- if shutil.which("ffmpeg") or (os.path.exists("Bin") and os.path.isfile(".\\Bin\\ffmpeg.exe")):
226
  logging.debug("ffmpeg found installed on the local system, in the local PATH, or in the './Bin' folder")
227
  pass
228
  else:
@@ -291,13 +295,13 @@ def download_ffmpeg():
291
 
292
 
293
  #
294
- #
295
  ####################################################################################################################################
296
 
297
 
298
  ####################################################################################################################################
299
  # Processing Paths and local file handling
300
- #
301
  #
302
 
303
  def read_paths_from_file(file_path):
@@ -488,7 +492,7 @@ def download_video(video_url, download_path, info_dict, download_video_flag):
488
  if userOS == "Windows":
489
  logging.debug("Running ffmpeg on Windows...")
490
  ffmpeg_command = [
491
- '.\\Bin\\ffmpeg.exe',
492
  '-i', video_file_path,
493
  '-i', audio_file_path,
494
  '-c:v', 'copy',
@@ -508,8 +512,8 @@ def download_video(video_url, download_path, info_dict, download_video_flag):
508
  ]
509
  subprocess.run(ffmpeg_command, check=True)
510
  else:
511
- logging.error("You shouldn't be here...")
512
- exit()
513
  os.remove(video_file_path)
514
  os.remove(audio_file_path)
515
 
@@ -529,7 +533,7 @@ def download_video(video_url, download_path, info_dict, download_video_flag):
529
  # https://www.gyan.dev/ffmpeg/builds/
530
  #
531
 
532
- #os.system(r'.\Bin\ffmpeg.exe -ss 00:00:00 -i "{video_file_path}" -ar 16000 -ac 1 -c:a pcm_s16le "{out_path}"')
533
  def convert_to_wav(video_file_path, offset=0):
534
  print("Starting conversion process of .m4a to .WAV")
535
  out_path = os.path.splitext(video_file_path)[0] + ".wav"
@@ -539,7 +543,8 @@ def convert_to_wav(video_file_path, offset=0):
539
  logging.debug("ffmpeg being ran on windows")
540
 
541
  if sys.platform.startswith('win'):
542
- ffmpeg_cmd = ".\\Bin\\ffmpeg.exe"
 
543
  else:
544
  ffmpeg_cmd = 'ffmpeg' # Assume 'ffmpeg' is in PATH for non-Windows systems
545
 
@@ -749,7 +754,7 @@ def speech_to_text(audio_file_path, selected_source_lang='en', whisper_model='sm
749
 
750
 
751
  ####################################################################################################################################
752
- #Summarizers
753
  #
754
  #
755
 
@@ -885,7 +890,6 @@ def summarize_with_claude(api_key, file_path, model, custom_prompt):
885
  # Summarize with Cohere
886
  def summarize_with_cohere(api_key, file_path, model, custom_prompt):
887
  try:
888
- logging.basicConfig(level=logging.DEBUG)
889
  logging.debug("cohere: Loading JSON data")
890
  with open(file_path, 'r') as file:
891
  segments = json.load(file)
@@ -1023,7 +1027,7 @@ def summarize_with_llama(api_url, file_path, token, custom_prompt):
1023
  logging.debug("API Response Data: %s", response_data)
1024
 
1025
  if response.status_code == 200:
1026
- #if 'X' in response_data:
1027
  logging.debug(response_data)
1028
  summary = response_data['content'].strip()
1029
  logging.debug("llama: Summarization successful")
@@ -1236,36 +1240,53 @@ def process_text(api_key, text_file):
1236
  return "Notice:", message
1237
 
1238
 
 
 
 
 
1239
  def launch_ui(demo_mode=False):
1240
  def process_url(url, num_speakers, whisper_model, custom_prompt, offset, api_name, api_key, vad_filter,
1241
  download_video):
 
1242
  try:
1243
- # Assuming 'main' is the function that handles the processing logic.
1244
- # Adjust parameters as needed based on your actual 'main' function implementation.
1245
  results = main(url, api_name=api_name, api_key=api_key, num_speakers=num_speakers,
1246
  whisper_model=whisper_model, offset=offset, vad_filter=vad_filter,
1247
  download_video_flag=download_video, custom_prompt=custom_prompt)
1248
 
1249
  if results:
1250
  transcription_result = results[0]
1251
- json_data = transcription_result['transcription']
1252
- summary_file_path = transcription_result.get('summary', "Summary not available.")
1253
  json_file_path = transcription_result['audio_file'].replace('.wav', '.segments.json')
 
 
1254
  video_file_path = transcription_result.get('video_path', None)
1255
- return json_data, summary_file_path, json_file_path, summary_file_path, video_file_path
 
 
 
 
 
 
 
 
 
1256
  else:
1257
- return "No results found.", "No summary available.", None, None, None, None
 
 
1258
  except Exception as e:
1259
- return str(e), "Error processing the request.", None, None, None, None
1260
 
1261
  inputs = [
1262
  gr.components.Textbox(label="URL", placeholder="Enter the video URL here"),
1263
  gr.components.Number(value=2, label="Number of Speakers"),
1264
  gr.components.Dropdown(choices=whisper_models, value="small.en", label="Whisper Model"),
1265
- gr.components.Textbox(label="Custom Prompt", placeholder="Q: As a professional summarizer, create a concise and comprehensive summary of the provided text.\nA: Here is a detailed, bulleted list of the key points made in the transcribed video and supporting arguments:", lines=3),
 
 
1266
  gr.components.Number(value=0, label="Offset"),
1267
  gr.components.Dropdown(
1268
  choices=["huggingface", "openai", "anthropic", "cohere", "groq", "llama", "kobold", "ooba"],
 
1269
  label="API Name"),
1270
  gr.components.Textbox(label="API Key", placeholder="Enter your API key here"),
1271
  gr.components.Checkbox(label="VAD Filter", value=False),
@@ -1292,6 +1313,68 @@ def launch_ui(demo_mode=False):
1292
  iface.launch(share=False)
1293
 
1294
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1295
  #
1296
  #
1297
  #####################################################################################################################################
@@ -1303,6 +1386,7 @@ def launch_ui(demo_mode=False):
1303
 
1304
  def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model="small.en", offset=0, vad_filter=False,
1305
  download_video_flag=False, demo_mode=False, custom_prompt=None):
 
1306
  if input_path is None and args.user_interface:
1307
  return []
1308
  start_time = time.monotonic()
@@ -1332,7 +1416,12 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model=
1332
  download_path = create_download_directory(info_dict['title'])
1333
  logging.debug("MAIN: Path created successfully")
1334
  logging.debug("MAIN: Downloading video from yt_dlp...")
1335
- video_path = download_video(path, download_path, info_dict, download_video_flag)
 
 
 
 
 
1336
  logging.debug("MAIN: Video downloaded successfully")
1337
  logging.debug("MAIN: Converting video file to WAV...")
1338
  audio_file = convert_to_wav(video_path, offset)
@@ -1362,6 +1451,7 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model=
1362
  json_file_path = audio_file.replace('.wav', '.segments.json')
1363
  if api_name.lower() == 'openai':
1364
  api_key = openai_api_key
 
1365
  try:
1366
  logging.debug(f"MAIN: trying to summarize with openAI")
1367
  summary = summarize_with_openai(api_key, json_file_path, openai_model, custom_prompt)
@@ -1436,7 +1526,7 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model=
1436
  logging.error(f"Error processing path: {path}")
1437
  logging.error(str(e))
1438
  end_time = time.monotonic()
1439
- #print("Total program execution time: " + timedelta(seconds=end_time - start_time))
1440
 
1441
  return results
1442
 
@@ -1451,15 +1541,16 @@ if __name__ == "__main__":
1451
  help='Whisper model (default: small.en)')
1452
  parser.add_argument('-off', '--offset', type=int, default=0, help='Offset in seconds (default: 0)')
1453
  parser.add_argument('-vad', '--vad_filter', action='store_true', help='Enable VAD filter')
1454
- parser.add_argument('-log', '--log_level', type=str, default='INFO',
1455
- choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help='Log level (default: INFO)')
 
1456
  parser.add_argument('-ui', '--user_interface', action='store_true', help='Launch the Gradio user interface')
1457
  parser.add_argument('-demo', '--demo_mode', action='store_true', help='Enable demo mode')
1458
  parser.add_argument('-prompt', '--custom_prompt', type=str,
1459
  help='Pass in a custom prompt to be used in place of the existing one.(Probably should just modify the script itself...)')
1460
- #parser.add_argument('--log_file', action=str, help='Where to save logfile (non-default)')
1461
  args = parser.parse_args()
1462
-
1463
  custom_prompt = args.custom_prompt
1464
  if custom_prompt == "":
1465
  logging.debug(f"Custom prompt defined, will use \n\nf{custom_prompt} \n\nas the prompt")
@@ -1469,31 +1560,31 @@ if __name__ == "__main__":
1469
  args.custom_prompt = "\n\nQ: As a professional summarizer, create a concise and comprehensive summary of the provided text.\nA: Here is a detailed, bulleted list of the key points made in the transcribed video and supporting arguments:"
1470
  print("No custom prompt defined, will use default")
1471
 
1472
- print(f"Is CUDA available: {torch.cuda.is_available()}")
1473
  # True
1474
- print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
1475
  # Tesla T4
1476
 
1477
  # Since this is running in HF....
1478
  args.user_interface = True
1479
  if args.user_interface:
 
 
1480
  launch_ui(demo_mode=args.demo_mode)
1481
  else:
1482
  if not args.input_path:
1483
  parser.print_help()
1484
  sys.exit(1)
1485
 
1486
- logging.basicConfig(level=getattr(logging, args.log_level), format='%(asctime)s - %(levelname)s - %(message)s')
1487
-
1488
  logging.info('Starting the transcription and summarization process.')
1489
  logging.info(f'Input path: {args.input_path}')
1490
  logging.info(f'API Name: {args.api_name}')
1491
- logging.debug(f'API Key: {args.api_key}') # ehhhhh
1492
  logging.info(f'Number of speakers: {args.num_speakers}')
1493
  logging.info(f'Whisper model: {args.whisper_model}')
1494
  logging.info(f'Offset: {args.offset}')
1495
  logging.info(f'VAD filter: {args.vad_filter}')
1496
- logging.info(f'Log Level: {args.log_level}') #lol
1497
 
1498
  if args.api_name and args.api_key:
1499
  logging.info(f'API: {args.api_name}')
 
17
  import torch
18
  import yt_dlp
19
 
20
+ log_level = "DEBUG"
21
+ logging.basicConfig(level=getattr(logging, log_level), format='%(asctime)s - %(levelname)s - %(message)s')
22
+ os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
23
+
24
  #######
25
  # Function Sections
26
  #
 
43
  # 2. Usage of/Hardcoding HF_TOKEN as token for API calls
44
  # 3. Usage of HuggingFace for Inference
45
  # 4. Other stuff I can't remember. Will eventually do a diff and document them.
46
+ #
47
 
48
 
49
  ####
 
67
  # llama.cpp)/`ooba` (oobabooga/text-gen-webui)/`kobold` (kobold.cpp)/`tabby` (Tabbyapi)) API:** python summarize.py
68
  # -v https://www.youtube.com/watch?v=4nd1CDZP21s -api <your choice of API>` - Make sure to put your API key into
69
  # `config.txt` under the appropriate API variable
70
+ #
71
  # Download Audio+Video from a list of videos in a text file (can be file paths or URLs) and have them all summarized:**
72
  # python summarize.py ./local/file_on_your/system --api_name <API_name>`
73
+ #
74
  # Run it as a WebApp**
75
  # python summarize.py -gui` - This requires you to either stuff your API keys into the `config.txt` file, or pass them into the app every time you want to use it.
76
  # Can be helpful for setting up a shared instance, but not wanting people to perform inference on your server.
 
124
  processing_choice = config.get('Processing', 'processing_choice', fallback='cpu')
125
 
126
  # Log file
127
+ # logging.basicConfig(filename='debug-runtime.log', encoding='utf-8', level=logging.DEBUG)
128
 
129
  #
130
  #
 
152
  | | | | / / | | | || |/\| |
153
  | | | |____ / / | |/ / \ /\ / _
154
  \_/ \_____//_/ |___/ \/ \/ (_)
155
+
156
+
157
  _ _
158
  | | | |
159
  | |_ ___ ___ | | ___ _ __ __ _
 
172
 
173
  ####################################################################################################################################
174
  # System Checks
175
+ #
176
+ #
177
 
178
  # Perform Platform Check
179
  userOS = ""
 
226
 
227
  # check for existence of ffmpeg
228
  def check_ffmpeg():
229
+ if shutil.which("ffmpeg") or (os.path.exists("..\\Bin") and os.path.isfile("..\\Bin\\ffmpeg.exe")):
230
  logging.debug("ffmpeg found installed on the local system, in the local PATH, or in the './Bin' folder")
231
  pass
232
  else:
 
295
 
296
 
297
  #
298
+ #
299
  ####################################################################################################################################
300
 
301
 
302
  ####################################################################################################################################
303
  # Processing Paths and local file handling
304
+ #
305
  #
306
 
307
  def read_paths_from_file(file_path):
 
492
  if userOS == "Windows":
493
  logging.debug("Running ffmpeg on Windows...")
494
  ffmpeg_command = [
495
+ '..\\Bin\\ffmpeg.exe',
496
  '-i', video_file_path,
497
  '-i', audio_file_path,
498
  '-c:v', 'copy',
 
512
  ]
513
  subprocess.run(ffmpeg_command, check=True)
514
  else:
515
+ logging.error("ffmpeg: Unsupported operating system for video download and merging.")
516
+ raise RuntimeError("ffmpeg: Unsupported operating system for video download and merging.")
517
  os.remove(video_file_path)
518
  os.remove(audio_file_path)
519
 
 
533
  # https://www.gyan.dev/ffmpeg/builds/
534
  #
535
 
536
+ # os.system(r'.\Bin\ffmpeg.exe -ss 00:00:00 -i "{video_file_path}" -ar 16000 -ac 1 -c:a pcm_s16le "{out_path}"')
537
  def convert_to_wav(video_file_path, offset=0):
538
  print("Starting conversion process of .m4a to .WAV")
539
  out_path = os.path.splitext(video_file_path)[0] + ".wav"
 
543
  logging.debug("ffmpeg being ran on windows")
544
 
545
  if sys.platform.startswith('win'):
546
+ ffmpeg_cmd = "..\\Bin\\ffmpeg.exe"
547
+ logging.debug(f"ffmpeg_cmd: {ffmpeg_cmd}")
548
  else:
549
  ffmpeg_cmd = 'ffmpeg' # Assume 'ffmpeg' is in PATH for non-Windows systems
550
 
 
754
 
755
 
756
  ####################################################################################################################################
757
+ # Summarizers
758
  #
759
  #
760
 
 
890
  # Summarize with Cohere
891
  def summarize_with_cohere(api_key, file_path, model, custom_prompt):
892
  try:
 
893
  logging.debug("cohere: Loading JSON data")
894
  with open(file_path, 'r') as file:
895
  segments = json.load(file)
 
1027
  logging.debug("API Response Data: %s", response_data)
1028
 
1029
  if response.status_code == 200:
1030
+ # if 'X' in response_data:
1031
  logging.debug(response_data)
1032
  summary = response_data['content'].strip()
1033
  logging.debug("llama: Summarization successful")
 
1240
  return "Notice:", message
1241
 
1242
 
1243
+ def format_file_path(file_path):
1244
+ # Helper function to check file existence and return an appropriate path or message
1245
+ return file_path if file_path and os.path.exists(file_path) else None
1246
+
1247
  def launch_ui(demo_mode=False):
1248
  def process_url(url, num_speakers, whisper_model, custom_prompt, offset, api_name, api_key, vad_filter,
1249
  download_video):
1250
+ video_file_path = None
1251
  try:
 
 
1252
  results = main(url, api_name=api_name, api_key=api_key, num_speakers=num_speakers,
1253
  whisper_model=whisper_model, offset=offset, vad_filter=vad_filter,
1254
  download_video_flag=download_video, custom_prompt=custom_prompt)
1255
 
1256
  if results:
1257
  transcription_result = results[0]
 
 
1258
  json_file_path = transcription_result['audio_file'].replace('.wav', '.segments.json')
1259
+ summary_file_path = transcription_result.get('summary', None)
1260
+
1261
  video_file_path = transcription_result.get('video_path', None)
1262
+ if summary:
1263
+ transcription_result['summary'] = summary
1264
+ summary_file_path = json_file_path.replace('.segments.json', '_summary.txt')
1265
+ transcription_result['summary_file_path'] = summary_file_path
1266
+ logging.info(f"Summary generated using {api_name} API")
1267
+ save_summary_to_file(summary, json_file_path)
1268
+ return transcription_result['transcription'], "Summary available.", json_file_path, summary_file_path, video_file_path
1269
+ else:
1270
+ return transcription_result[
1271
+ 'transcription'], "Summary not available.", json_file_path, None, video_file_path
1272
  else:
1273
+ logging.warning(f"Failed to generate summary using {api_name} API")
1274
+ return "No results found.", "Summary not available.", None, None, None
1275
+
1276
  except Exception as e:
1277
+ return str(e), "Error processing the request.", None, None, None
1278
 
1279
  inputs = [
1280
  gr.components.Textbox(label="URL", placeholder="Enter the video URL here"),
1281
  gr.components.Number(value=2, label="Number of Speakers"),
1282
  gr.components.Dropdown(choices=whisper_models, value="small.en", label="Whisper Model"),
1283
+ gr.components.Textbox(label="Custom Prompt",
1284
+ placeholder="Q: As a professional summarizer, create a concise and comprehensive summary of the provided text.\nA: Here is a detailed, bulleted list of the key points made in the transcribed video and supporting arguments:",
1285
+ lines=3),
1286
  gr.components.Number(value=0, label="Offset"),
1287
  gr.components.Dropdown(
1288
  choices=["huggingface", "openai", "anthropic", "cohere", "groq", "llama", "kobold", "ooba"],
1289
+ value="huggingface",
1290
  label="API Name"),
1291
  gr.components.Textbox(label="API Key", placeholder="Enter your API key here"),
1292
  gr.components.Checkbox(label="VAD Filter", value=False),
 
1313
  iface.launch(share=False)
1314
 
1315
 
1316
+
1317
+
1318
+ a = """def launch_ui(demo_mode=False):
1319
+ def process_url(url, num_speakers, whisper_model, custom_prompt, offset, api_name, api_key, vad_filter,
1320
+ download_video):
1321
+ try:
1322
+ results = main(url, api_name=api_name, api_key=api_key, num_speakers=num_speakers,
1323
+ whisper_model=whisper_model, offset=offset, vad_filter=vad_filter,
1324
+ download_video_flag=download_video, custom_prompt=custom_prompt)
1325
+
1326
+ if results:
1327
+ transcription_result = results[0]
1328
+ json_data = transcription_result['transcription']
1329
+ json_file_path = transcription_result['audio_file'].replace('.wav', '.segments.json')
1330
+ summary_file_path = transcription_result.get('summary', "Summary not available.")
1331
+ video_file_path = transcription_result.get('video_path', None)
1332
+
1333
+ json_file_path = format_file_path(json_file_path)
1334
+ summary_file_path = format_file_path(summary_file_path)
1335
+
1336
+ return json_data, "Summary available", json_file_path, summary_file_path, video_file_path
1337
+ else:
1338
+ return "No results found.", "No summary available.", None, None, None
1339
+ except Exception as e:
1340
+ return str(e), "Error processing the request.", None, None, None, None
1341
+
1342
+ inputs = [
1343
+ gr.components.Textbox(label="URL", placeholder="Enter the video URL here"),
1344
+ gr.components.Number(value=2, label="Number of Speakers"),
1345
+ gr.components.Dropdown(choices=whisper_models, value="small.en", label="Whisper Model"),
1346
+ gr.components.Textbox(label="Custom Prompt",
1347
+ placeholder="Q: As a professional summarizer, create a concise and comprehensive summary of the provided text.\nA: Here is a detailed, bulleted list of the key points made in the transcribed video and supporting arguments:",
1348
+ lines=3),
1349
+ gr.components.Number(value=0, label="Offset"),
1350
+ gr.components.Dropdown(
1351
+ choices=["huggingface", "openai", "anthropic", "cohere", "groq", "llama", "kobold", "ooba"],
1352
+ label="API Name"),
1353
+ gr.components.Textbox(label="API Key", placeholder="Enter your API key here"),
1354
+ gr.components.Checkbox(label="VAD Filter", value=False),
1355
+ gr.components.Checkbox(label="Download Video", value=False)
1356
+ ]
1357
+
1358
+ outputs = [
1359
+ gr.components.Textbox(label="Transcription"),
1360
+ gr.components.Textbox(label="Summary or Status Message"),
1361
+ gr.components.File(label="Download Transcription as JSON", visible=lambda x: x != "File not available"),
1362
+ gr.components.File(label="Download Summary as Text", visible=lambda x: x != "File not available"),
1363
+ gr.components.File(label="Download Video", visible=lambda x: x is not None)
1364
+ ]
1365
+
1366
+ iface = gr.Interface(
1367
+ fn=process_url,
1368
+ inputs=inputs,
1369
+ outputs=outputs,
1370
+ title="Video Transcription and Summarization",
1371
+ description="Submit a video URL for transcription and summarization. Ensure you input all necessary information including API keys.",
1372
+ theme="bethecloud/storj_theme" # Adjust theme as necessary
1373
+ )
1374
+
1375
+ iface.launch(share=False)
1376
+ """
1377
+
1378
  #
1379
  #
1380
  #####################################################################################################################################
 
1386
 
1387
  def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model="small.en", offset=0, vad_filter=False,
1388
  download_video_flag=False, demo_mode=False, custom_prompt=None):
1389
+ global summary
1390
  if input_path is None and args.user_interface:
1391
  return []
1392
  start_time = time.monotonic()
 
1416
  download_path = create_download_directory(info_dict['title'])
1417
  logging.debug("MAIN: Path created successfully")
1418
  logging.debug("MAIN: Downloading video from yt_dlp...")
1419
+ try:
1420
+ video_path = download_video(path, download_path, info_dict, download_video_flag)
1421
+ except RuntimeError as e:
1422
+ logging.error(f"Error downloading video: {str(e)}")
1423
+ #FIXME - figure something out for handling this situation....
1424
+ continue
1425
  logging.debug("MAIN: Video downloaded successfully")
1426
  logging.debug("MAIN: Converting video file to WAV...")
1427
  audio_file = convert_to_wav(video_path, offset)
 
1451
  json_file_path = audio_file.replace('.wav', '.segments.json')
1452
  if api_name.lower() == 'openai':
1453
  api_key = openai_api_key
1454
+ logging.debug(f"MAIN: API Key in main: {api_key}")
1455
  try:
1456
  logging.debug(f"MAIN: trying to summarize with openAI")
1457
  summary = summarize_with_openai(api_key, json_file_path, openai_model, custom_prompt)
 
1526
  logging.error(f"Error processing path: {path}")
1527
  logging.error(str(e))
1528
  end_time = time.monotonic()
1529
+ # print("Total program execution time: " + timedelta(seconds=end_time - start_time))
1530
 
1531
  return results
1532
 
 
1541
  help='Whisper model (default: small.en)')
1542
  parser.add_argument('-off', '--offset', type=int, default=0, help='Offset in seconds (default: 0)')
1543
  parser.add_argument('-vad', '--vad_filter', action='store_true', help='Enable VAD filter')
1544
+ # Give app.py verbose logging - DEBUG
1545
+ parser.add_argument('-log', '--log_level', type=str, default='DEBUG',
1546
+ choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help='Log level (default: DEBUG)')
1547
  parser.add_argument('-ui', '--user_interface', action='store_true', help='Launch the Gradio user interface')
1548
  parser.add_argument('-demo', '--demo_mode', action='store_true', help='Enable demo mode')
1549
  parser.add_argument('-prompt', '--custom_prompt', type=str,
1550
  help='Pass in a custom prompt to be used in place of the existing one.(Probably should just modify the script itself...)')
1551
+ # parser.add_argument('--log_file', action=str, help='Where to save logfile (non-default)')
1552
  args = parser.parse_args()
1553
+ logging.basicConfig(level=getattr(logging, args.log_level), format='%(asctime)s - %(levelname)s - %(message)s')
1554
  custom_prompt = args.custom_prompt
1555
  if custom_prompt == "":
1556
  logging.debug(f"Custom prompt defined, will use \n\nf{custom_prompt} \n\nas the prompt")
 
1560
  args.custom_prompt = "\n\nQ: As a professional summarizer, create a concise and comprehensive summary of the provided text.\nA: Here is a detailed, bulleted list of the key points made in the transcribed video and supporting arguments:"
1561
  print("No custom prompt defined, will use default")
1562
 
1563
+ # print(f"Is CUDA available: {torch.cuda.is_available()}")
1564
  # True
1565
+ # print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
1566
  # Tesla T4
1567
 
1568
  # Since this is running in HF....
1569
  args.user_interface = True
1570
  if args.user_interface:
1571
+ log_level = "DEBUG"
1572
+ logging.basicConfig(level=getattr(logging, log_level), format='%(asctime)s - %(levelname)s - %(message)s')
1573
  launch_ui(demo_mode=args.demo_mode)
1574
  else:
1575
  if not args.input_path:
1576
  parser.print_help()
1577
  sys.exit(1)
1578
 
1579
+ logging.debug('Logging configured')
 
1580
  logging.info('Starting the transcription and summarization process.')
1581
  logging.info(f'Input path: {args.input_path}')
1582
  logging.info(f'API Name: {args.api_name}')
 
1583
  logging.info(f'Number of speakers: {args.num_speakers}')
1584
  logging.info(f'Whisper model: {args.whisper_model}')
1585
  logging.info(f'Offset: {args.offset}')
1586
  logging.info(f'VAD filter: {args.vad_filter}')
1587
+ logging.info(f'Log Level: {args.log_level}') # lol
1588
 
1589
  if args.api_name and args.api_key:
1590
  logging.info(f'API: {args.api_name}')