oceansweep commited on
Commit
c8eaa51
1 Parent(s): 0c961d6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -53
app.py CHANGED
@@ -17,10 +17,7 @@ import gradio as gr
17
  import torch
18
  import yt_dlp
19
 
20
- log_level = "DEBUG"
21
- logging.basicConfig(level=getattr(logging, log_level), format='%(asctime)s - %(levelname)s - %(message)s')
22
  os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
23
-
24
  #######
25
  # Function Sections
26
  #
@@ -226,7 +223,7 @@ def decide_cpugpu():
226
 
227
  # check for existence of ffmpeg
228
  def check_ffmpeg():
229
- if shutil.which("ffmpeg") or (os.path.exists("..\\Bin") and os.path.isfile("..\\Bin\\ffmpeg.exe")):
230
  logging.debug("ffmpeg found installed on the local system, in the local PATH, or in the './Bin' folder")
231
  pass
232
  else:
@@ -492,7 +489,7 @@ def download_video(video_url, download_path, info_dict, download_video_flag):
492
  if userOS == "Windows":
493
  logging.debug("Running ffmpeg on Windows...")
494
  ffmpeg_command = [
495
- '..\\Bin\\ffmpeg.exe',
496
  '-i', video_file_path,
497
  '-i', audio_file_path,
498
  '-c:v', 'copy',
@@ -890,6 +887,7 @@ def summarize_with_claude(api_key, file_path, model, custom_prompt):
890
  # Summarize with Cohere
891
  def summarize_with_cohere(api_key, file_path, model, custom_prompt):
892
  try:
 
893
  logging.debug("cohere: Loading JSON data")
894
  with open(file_path, 'r') as file:
895
  segments = json.load(file)
@@ -1245,48 +1243,14 @@ def format_file_path(file_path):
1245
  return file_path if file_path and os.path.exists(file_path) else None
1246
 
1247
  def launch_ui(demo_mode=False):
1248
- def process_url(url, num_speakers, whisper_model, custom_prompt, offset, api_name, api_key, vad_filter,
1249
- download_video):
1250
- video_file_path = None
1251
- try:
1252
- results = main(url, api_name=api_name, api_key=api_key, num_speakers=num_speakers,
1253
- whisper_model=whisper_model, offset=offset, vad_filter=vad_filter,
1254
- download_video_flag=download_video, custom_prompt=custom_prompt)
1255
-
1256
- if results:
1257
- transcription_result = results[0]
1258
- json_file_path = transcription_result['audio_file'].replace('.wav', '.segments.json')
1259
- summary_file_path = transcription_result.get('summary', None)
1260
-
1261
- video_file_path = transcription_result.get('video_path', None)
1262
- if summary:
1263
- transcription_result['summary'] = summary
1264
- summary_file_path = json_file_path.replace('.segments.json', '_summary.txt')
1265
- transcription_result['summary_file_path'] = summary_file_path
1266
- logging.info(f"Summary generated using {api_name} API")
1267
- save_summary_to_file(summary, json_file_path)
1268
- return transcription_result['transcription'], "Summary available.", json_file_path, summary_file_path, video_file_path
1269
- else:
1270
- return transcription_result[
1271
- 'transcription'], "Summary not available.", json_file_path, None, video_file_path
1272
- else:
1273
- logging.warning(f"Failed to generate summary using {api_name} API")
1274
- return "No results found.", "Summary not available.", None, None, None
1275
-
1276
- except Exception as e:
1277
- return str(e), "Error processing the request.", None, None, None
1278
-
1279
  inputs = [
1280
  gr.components.Textbox(label="URL", placeholder="Enter the video URL here"),
1281
  gr.components.Number(value=2, label="Number of Speakers"),
1282
  gr.components.Dropdown(choices=whisper_models, value="small.en", label="Whisper Model"),
1283
- gr.components.Textbox(label="Custom Prompt",
1284
- placeholder="Q: As a professional summarizer, create a concise and comprehensive summary of the provided text.\nA: Here is a detailed, bulleted list of the key points made in the transcribed video and supporting arguments:",
1285
- lines=3),
1286
  gr.components.Number(value=0, label="Offset"),
1287
  gr.components.Dropdown(
1288
  choices=["huggingface", "openai", "anthropic", "cohere", "groq", "llama", "kobold", "ooba"],
1289
- value="huggingface",
1290
  label="API Name"),
1291
  gr.components.Textbox(label="API Key", placeholder="Enter your API key here"),
1292
  gr.components.Checkbox(label="VAD Filter", value=False),
@@ -1296,18 +1260,43 @@ def launch_ui(demo_mode=False):
1296
  outputs = [
1297
  gr.components.Textbox(label="Transcription"),
1298
  gr.components.Textbox(label="Summary or Status Message"),
1299
- gr.components.File(label="Download Transcription as JSON", visible=lambda x: x is not None),
1300
- gr.components.File(label="Download Summary as Text", visible=lambda x: x is not None),
1301
  gr.components.File(label="Download Video", visible=lambda x: x is not None)
1302
  ]
1303
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1304
  iface = gr.Interface(
1305
  fn=process_url,
1306
  inputs=inputs,
1307
  outputs=outputs,
1308
  title="Video Transcription and Summarization",
1309
- description="Submit a video URL for transcription and summarization. Ensure you input all necessary information including API keys.",
1310
- theme="bethecloud/storj_theme" # Adjust theme as necessary
1311
  )
1312
 
1313
  iface.launch(share=False)
@@ -1386,7 +1375,6 @@ a = """def launch_ui(demo_mode=False):
1386
 
1387
  def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model="small.en", offset=0, vad_filter=False,
1388
  download_video_flag=False, demo_mode=False, custom_prompt=None):
1389
- global summary
1390
  if input_path is None and args.user_interface:
1391
  return []
1392
  start_time = time.monotonic()
@@ -1451,7 +1439,6 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model=
1451
  json_file_path = audio_file.replace('.wav', '.segments.json')
1452
  if api_name.lower() == 'openai':
1453
  api_key = openai_api_key
1454
- logging.debug(f"MAIN: API Key in main: {api_key}")
1455
  try:
1456
  logging.debug(f"MAIN: trying to summarize with openAI")
1457
  summary = summarize_with_openai(api_key, json_file_path, openai_model, custom_prompt)
@@ -1541,16 +1528,15 @@ if __name__ == "__main__":
1541
  help='Whisper model (default: small.en)')
1542
  parser.add_argument('-off', '--offset', type=int, default=0, help='Offset in seconds (default: 0)')
1543
  parser.add_argument('-vad', '--vad_filter', action='store_true', help='Enable VAD filter')
1544
- # Give app.py verbose logging - DEBUG
1545
- parser.add_argument('-log', '--log_level', type=str, default='DEBUG',
1546
- choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help='Log level (default: DEBUG)')
1547
  parser.add_argument('-ui', '--user_interface', action='store_true', help='Launch the Gradio user interface')
1548
  parser.add_argument('-demo', '--demo_mode', action='store_true', help='Enable demo mode')
1549
  parser.add_argument('-prompt', '--custom_prompt', type=str,
1550
  help='Pass in a custom prompt to be used in place of the existing one.(Probably should just modify the script itself...)')
1551
  # parser.add_argument('--log_file', action=str, help='Where to save logfile (non-default)')
1552
  args = parser.parse_args()
1553
- logging.basicConfig(level=getattr(logging, args.log_level), format='%(asctime)s - %(levelname)s - %(message)s')
1554
  custom_prompt = args.custom_prompt
1555
  if custom_prompt == "":
1556
  logging.debug(f"Custom prompt defined, will use \n\nf{custom_prompt} \n\nas the prompt")
@@ -1568,18 +1554,18 @@ if __name__ == "__main__":
1568
  # Since this is running in HF....
1569
  args.user_interface = True
1570
  if args.user_interface:
1571
- log_level = "DEBUG"
1572
- logging.basicConfig(level=getattr(logging, log_level), format='%(asctime)s - %(levelname)s - %(message)s')
1573
  launch_ui(demo_mode=args.demo_mode)
1574
  else:
1575
  if not args.input_path:
1576
  parser.print_help()
1577
  sys.exit(1)
1578
 
1579
- logging.debug('Logging configured')
 
1580
  logging.info('Starting the transcription and summarization process.')
1581
  logging.info(f'Input path: {args.input_path}')
1582
  logging.info(f'API Name: {args.api_name}')
 
1583
  logging.info(f'Number of speakers: {args.num_speakers}')
1584
  logging.info(f'Whisper model: {args.whisper_model}')
1585
  logging.info(f'Offset: {args.offset}')
 
17
  import torch
18
  import yt_dlp
19
 
 
 
20
  os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
 
21
  #######
22
  # Function Sections
23
  #
 
223
 
224
  # check for existence of ffmpeg
225
  def check_ffmpeg():
226
+ if shutil.which("ffmpeg") or (os.path.exists("Bin") and os.path.isfile(".\\Bin\\ffmpeg.exe")):
227
  logging.debug("ffmpeg found installed on the local system, in the local PATH, or in the './Bin' folder")
228
  pass
229
  else:
 
489
  if userOS == "Windows":
490
  logging.debug("Running ffmpeg on Windows...")
491
  ffmpeg_command = [
492
+ '.\\Bin\\ffmpeg.exe',
493
  '-i', video_file_path,
494
  '-i', audio_file_path,
495
  '-c:v', 'copy',
 
887
  # Summarize with Cohere
888
  def summarize_with_cohere(api_key, file_path, model, custom_prompt):
889
  try:
890
+ logging.basicConfig(level=logging.DEBUG)
891
  logging.debug("cohere: Loading JSON data")
892
  with open(file_path, 'r') as file:
893
  segments = json.load(file)
 
1243
  return file_path if file_path and os.path.exists(file_path) else None
1244
 
1245
  def launch_ui(demo_mode=False):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1246
  inputs = [
1247
  gr.components.Textbox(label="URL", placeholder="Enter the video URL here"),
1248
  gr.components.Number(value=2, label="Number of Speakers"),
1249
  gr.components.Dropdown(choices=whisper_models, value="small.en", label="Whisper Model"),
1250
+ gr.components.Textbox(label="Custom Prompt", placeholder="Q: As a professional summarizer, create a concise and comprehensive summary of the provided text.\nA: Here is a detailed, bulleted list of the key points made in the transcribed video and supporting arguments:", lines=3),
 
 
1251
  gr.components.Number(value=0, label="Offset"),
1252
  gr.components.Dropdown(
1253
  choices=["huggingface", "openai", "anthropic", "cohere", "groq", "llama", "kobold", "ooba"],
 
1254
  label="API Name"),
1255
  gr.components.Textbox(label="API Key", placeholder="Enter your API key here"),
1256
  gr.components.Checkbox(label="VAD Filter", value=False),
 
1260
  outputs = [
1261
  gr.components.Textbox(label="Transcription"),
1262
  gr.components.Textbox(label="Summary or Status Message"),
1263
+ gr.components.File(label="Download Transcription as JSON", visible=lambda x: x != "File not available"),
1264
+ gr.components.File(label="Download Summary as Text", visible=lambda x: x != "File not available"),
1265
  gr.components.File(label="Download Video", visible=lambda x: x is not None)
1266
  ]
1267
 
1268
+ def process_url(url, num_speakers, whisper_model, custom_prompt, offset, api_name, api_key, vad_filter,
1269
+ download_video):
1270
+ video_file_path = None
1271
+ try:
1272
+ results = main(url, api_name=api_name, api_key=api_key, num_speakers=num_speakers,
1273
+ whisper_model=whisper_model, offset=offset, vad_filter=vad_filter,
1274
+ download_video_flag=download_video, custom_prompt=custom_prompt)
1275
+ if results:
1276
+ transcription_result = results[0]
1277
+ json_file_path = transcription_result['audio_file'].replace('.wav', '.segments.json')
1278
+ summary_file_path = json_file_path.replace('.segments.json', '_summary.txt')
1279
+
1280
+ json_file_path = format_file_path(json_file_path)
1281
+ summary_file_path = format_file_path(summary_file_path)
1282
+
1283
+ if summary_file_path and os.path.exists(summary_file_path):
1284
+ return transcription_result[
1285
+ 'transcription'], "Summary available", json_file_path, summary_file_path, video_file_path
1286
+ else:
1287
+ return transcription_result[
1288
+ 'transcription'], "Summary not available", json_file_path, None, video_file_path
1289
+ else:
1290
+ return "No results found.", "Summary not available", None, None, None
1291
+ except Exception as e:
1292
+ return str(e), "Error processing the request.", None, None, None
1293
+
1294
  iface = gr.Interface(
1295
  fn=process_url,
1296
  inputs=inputs,
1297
  outputs=outputs,
1298
  title="Video Transcription and Summarization",
1299
+ description="Submit a video URL for transcription and summarization. Ensure you input all necessary information including API keys."
 
1300
  )
1301
 
1302
  iface.launch(share=False)
 
1375
 
1376
  def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model="small.en", offset=0, vad_filter=False,
1377
  download_video_flag=False, demo_mode=False, custom_prompt=None):
 
1378
  if input_path is None and args.user_interface:
1379
  return []
1380
  start_time = time.monotonic()
 
1439
  json_file_path = audio_file.replace('.wav', '.segments.json')
1440
  if api_name.lower() == 'openai':
1441
  api_key = openai_api_key
 
1442
  try:
1443
  logging.debug(f"MAIN: trying to summarize with openAI")
1444
  summary = summarize_with_openai(api_key, json_file_path, openai_model, custom_prompt)
 
1528
  help='Whisper model (default: small.en)')
1529
  parser.add_argument('-off', '--offset', type=int, default=0, help='Offset in seconds (default: 0)')
1530
  parser.add_argument('-vad', '--vad_filter', action='store_true', help='Enable VAD filter')
1531
+ parser.add_argument('-log', '--log_level', type=str, default='INFO',
1532
+ choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help='Log level (default: INFO)')
 
1533
  parser.add_argument('-ui', '--user_interface', action='store_true', help='Launch the Gradio user interface')
1534
  parser.add_argument('-demo', '--demo_mode', action='store_true', help='Enable demo mode')
1535
  parser.add_argument('-prompt', '--custom_prompt', type=str,
1536
  help='Pass in a custom prompt to be used in place of the existing one.(Probably should just modify the script itself...)')
1537
  # parser.add_argument('--log_file', action=str, help='Where to save logfile (non-default)')
1538
  args = parser.parse_args()
1539
+
1540
  custom_prompt = args.custom_prompt
1541
  if custom_prompt == "":
1542
  logging.debug(f"Custom prompt defined, will use \n\nf{custom_prompt} \n\nas the prompt")
 
1554
  # Since this is running in HF....
1555
  args.user_interface = True
1556
  if args.user_interface:
 
 
1557
  launch_ui(demo_mode=args.demo_mode)
1558
  else:
1559
  if not args.input_path:
1560
  parser.print_help()
1561
  sys.exit(1)
1562
 
1563
+ logging.basicConfig(level=getattr(logging, args.log_level), format='%(asctime)s - %(levelname)s - %(message)s')
1564
+
1565
  logging.info('Starting the transcription and summarization process.')
1566
  logging.info(f'Input path: {args.input_path}')
1567
  logging.info(f'API Name: {args.api_name}')
1568
+ logging.debug(f'API Key: {args.api_key}') # ehhhhh
1569
  logging.info(f'Number of speakers: {args.num_speakers}')
1570
  logging.info(f'Whisper model: {args.whisper_model}')
1571
  logging.info(f'Offset: {args.offset}')