Spaces:
Running
Running
oceansweep
commited on
Commit
•
e3cd24c
1
Parent(s):
0911ebb
Update app.py
Browse files
app.py
CHANGED
@@ -70,9 +70,9 @@ os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
|
|
70 |
# Download Audio+Video from a list of videos in a text file (can be file paths or URLs) and have them all summarized:**
|
71 |
# python summarize.py ./local/file_on_your/system --api_name <API_name>`
|
72 |
#
|
73 |
-
#
|
74 |
-
#
|
75 |
-
#
|
76 |
#
|
77 |
###
|
78 |
|
@@ -172,7 +172,7 @@ print(r"""
|
|
172 |
\__,_||_| \__,_||_| |_| \__| \_/\_/ \__,_| \__| \___||_| |_|
|
173 |
""")
|
174 |
|
175 |
-
|
176 |
# System Checks
|
177 |
#
|
178 |
#
|
@@ -234,12 +234,14 @@ def check_ffmpeg():
|
|
234 |
else:
|
235 |
logging.debug("ffmpeg not installed on the local system/in local PATH")
|
236 |
print(
|
237 |
-
"ffmpeg is not installed.\n\n You can either install it manually, or through your package manager of
|
|
|
238 |
if userOS == "Windows":
|
239 |
download_ffmpeg()
|
240 |
elif userOS == "Linux":
|
241 |
print(
|
242 |
-
"You should install ffmpeg using your platform's appropriate package manager, 'apt install ffmpeg',
|
|
|
243 |
else:
|
244 |
logging.debug("running an unsupported OS")
|
245 |
print("You're running an unsupported/Un-tested OS")
|
@@ -298,10 +300,10 @@ def download_ffmpeg():
|
|
298 |
|
299 |
#
|
300 |
#
|
301 |
-
|
302 |
|
303 |
|
304 |
-
|
305 |
# Processing Paths and local file handling
|
306 |
#
|
307 |
#
|
@@ -352,16 +354,17 @@ def process_local_file(file_path):
|
|
352 |
|
353 |
#
|
354 |
#
|
355 |
-
|
356 |
|
357 |
|
358 |
-
|
359 |
# Video Download/Handling
|
360 |
#
|
361 |
|
362 |
def process_url(url, num_speakers, whisper_model, custom_prompt, offset, api_name, api_key, vad_filter,
|
363 |
download_video, download_audio, chunk_size):
|
364 |
video_file_path = None
|
|
|
365 |
try:
|
366 |
results = main(url, api_name=api_name, api_key=api_key, num_speakers=num_speakers,
|
367 |
whisper_model=whisper_model, offset=offset, vad_filter=vad_filter,
|
@@ -534,10 +537,10 @@ def download_video(video_url, download_path, info_dict, download_video_flag):
|
|
534 |
|
535 |
#
|
536 |
#
|
537 |
-
|
538 |
|
539 |
|
540 |
-
|
541 |
# Audio Transcription
|
542 |
#
|
543 |
# Convert video .m4a into .wav using ffmpeg
|
@@ -546,7 +549,13 @@ def download_video(video_url, download_path, info_dict, download_video_flag):
|
|
546 |
#
|
547 |
|
548 |
# os.system(r'.\Bin\ffmpeg.exe -ss 00:00:00 -i "{video_file_path}" -ar 16000 -ac 1 -c:a pcm_s16le "{out_path}"')
|
549 |
-
def convert_to_wav(video_file_path, offset=0):
|
|
|
|
|
|
|
|
|
|
|
|
|
550 |
print("Starting conversion process of .m4a to .WAV")
|
551 |
out_path = os.path.splitext(video_file_path)[0] + ".wav"
|
552 |
|
@@ -641,10 +650,10 @@ def speech_to_text(audio_file_path, selected_source_lang='en', whisper_model='sm
|
|
641 |
|
642 |
#
|
643 |
#
|
644 |
-
|
645 |
|
646 |
|
647 |
-
|
648 |
# Diarization
|
649 |
#
|
650 |
# TODO: https://huggingface.co/pyannote/speaker-diarization-3.1
|
@@ -666,7 +675,8 @@ def speech_to_text(audio_file_path, selected_source_lang='en', whisper_model='sm
|
|
666 |
# import tqdm
|
667 |
# import wave
|
668 |
#
|
669 |
-
#
|
|
|
670 |
#
|
671 |
#
|
672 |
# _,file_ending = os.path.splitext(f'{video_file_path}')
|
@@ -761,10 +771,10 @@ def speech_to_text(audio_file_path, selected_source_lang='en', whisper_model='sm
|
|
761 |
# raise RuntimeError("Error Running inference with local model", e)
|
762 |
#
|
763 |
#
|
764 |
-
|
765 |
|
766 |
|
767 |
-
|
768 |
# Summarizers
|
769 |
#
|
770 |
#
|
@@ -1055,18 +1065,20 @@ def summarize_with_llama(api_url, file_path, token, custom_prompt):
|
|
1055 |
|
1056 |
|
1057 |
# https://lite.koboldai.net/koboldcpp_api#/api%2Fv1/post_api_v1_generate
|
1058 |
-
def summarize_with_kobold(
|
1059 |
try:
|
1060 |
logging.debug("kobold: Loading JSON data")
|
1061 |
-
with open(
|
1062 |
segments = json.load(file)
|
1063 |
|
1064 |
logging.debug(f"kobold: Extracting text from segments file")
|
1065 |
text = extract_text_from_segments(segments)
|
1066 |
|
|
|
1067 |
headers = {
|
1068 |
'accept': 'application/json',
|
1069 |
'content-type': 'application/json',
|
|
|
1070 |
}
|
1071 |
|
1072 |
kobold_prompt = f"{text} \n\n\n\n{custom_prompt}"
|
@@ -1082,7 +1094,7 @@ def summarize_with_kobold(api_url, file_path, custom_prompt):
|
|
1082 |
|
1083 |
logging.debug("kobold: Submitting request to API endpoint")
|
1084 |
print("kobold: Submitting request to API endpoint")
|
1085 |
-
response = requests.post(
|
1086 |
response_data = response.json()
|
1087 |
logging.debug("kobold: API Response Data: %s", response_data)
|
1088 |
|
@@ -1105,24 +1117,26 @@ def summarize_with_kobold(api_url, file_path, custom_prompt):
|
|
1105 |
|
1106 |
|
1107 |
# https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API
|
1108 |
-
def summarize_with_oobabooga(
|
1109 |
try:
|
1110 |
logging.debug("ooba: Loading JSON data")
|
1111 |
-
with open(
|
1112 |
segments = json.load(file)
|
1113 |
|
1114 |
logging.debug(f"ooba: Extracting text from segments file\n\n\n")
|
1115 |
text = extract_text_from_segments(segments)
|
1116 |
logging.debug(f"ooba: Finished extracting text from segments file")
|
1117 |
|
|
|
1118 |
headers = {
|
1119 |
'accept': 'application/json',
|
1120 |
'content-type': 'application/json',
|
1121 |
}
|
1122 |
|
1123 |
-
# prompt_text = "I like to eat cake and bake cakes. I am a baker. I work in a French bakery baking cakes. It
|
1124 |
-
#
|
1125 |
-
|
|
|
1126 |
logging.debug("ooba: Prompt being sent is {ooba_prompt}")
|
1127 |
|
1128 |
data = {
|
@@ -1133,7 +1147,7 @@ def summarize_with_oobabooga(api_url, file_path, custom_prompt):
|
|
1133 |
|
1134 |
logging.debug("ooba: Submitting request to API endpoint")
|
1135 |
print("ooba: Submitting request to API endpoint")
|
1136 |
-
response = requests.post(
|
1137 |
logging.debug("ooba: API Response Data: %s", response)
|
1138 |
|
1139 |
if response.status_code == 200:
|
@@ -1161,28 +1175,28 @@ def save_summary_to_file(summary, file_path):
|
|
1161 |
|
1162 |
#
|
1163 |
#
|
1164 |
-
|
1165 |
|
1166 |
|
1167 |
-
|
1168 |
# Gradio UI
|
1169 |
#
|
1170 |
|
1171 |
# Only to be used when configured with Gradio for HF Space
|
1172 |
-
def summarize_with_huggingface(
|
1173 |
logging.debug(f"huggingface: Summarization process starting...")
|
1174 |
|
1175 |
model = "microsoft/Phi-3-mini-128k-instruct"
|
1176 |
API_URL = f"https://api-inference.huggingface.co/models/{model}"
|
1177 |
-
headers = {"Authorization": f"Bearer {
|
1178 |
|
1179 |
-
with open(
|
1180 |
segments = json.load(file)
|
1181 |
text = ''.join([segment['text'] for segment in segments])
|
1182 |
|
1183 |
# FIXME adjust max_length and min_length as needed
|
1184 |
data = {
|
1185 |
-
"inputs": text,
|
1186 |
"parameters": {"max_length": 4096, "min_length": 100}
|
1187 |
}
|
1188 |
|
@@ -1197,24 +1211,26 @@ def summarize_with_huggingface(api_key, file_path):
|
|
1197 |
response_data = response.json()
|
1198 |
wait_time = response_data.get('estimated_time', 10)
|
1199 |
return None, f"Model is loading, retrying in {int(wait_time)} seconds..."
|
|
|
1200 |
# Sleep before retrying....
|
1201 |
-
time.sleep(wait_time)
|
1202 |
|
1203 |
-
if
|
1204 |
-
api_key = os.
|
1205 |
-
logging.debug("HUGGINGFACE API KEY CHECK: " +
|
1206 |
try:
|
1207 |
logging.debug("huggingface: Loading json data for summarization")
|
1208 |
-
with open(
|
1209 |
segments = json.load(file)
|
1210 |
|
1211 |
logging.debug("huggingface: Extracting text from the segments")
|
1212 |
text = ' '.join([segment['text'] for segment in segments])
|
1213 |
|
1214 |
-
api_key = os.
|
1215 |
-
logging.debug("HUGGINGFACE API KEY CHECK #2: " +
|
1216 |
|
1217 |
logging.debug("huggingface: Submitting request...")
|
|
|
1218 |
response = requests.post(API_URL, headers=headers, json=data)
|
1219 |
|
1220 |
if response.status_code == 200:
|
@@ -1230,8 +1246,11 @@ def summarize_with_huggingface(api_key, file_path):
|
|
1230 |
print(f"Error occurred while processing summary with huggingface: {str(e)}")
|
1231 |
return None
|
1232 |
|
1233 |
-
|
1234 |
-
|
|
|
|
|
|
|
1235 |
|
1236 |
|
1237 |
def format_transcription(transcription_result):
|
@@ -1242,19 +1261,6 @@ def format_transcription(transcription_result):
|
|
1242 |
return ""
|
1243 |
|
1244 |
|
1245 |
-
def process_text(api_key, text_file):
|
1246 |
-
summary, message = summarize_with_huggingface(api_key, text_file)
|
1247 |
-
if summary:
|
1248 |
-
# Show summary on success
|
1249 |
-
return "Summary:", summary
|
1250 |
-
else:
|
1251 |
-
# Inform user about load/wait time
|
1252 |
-
return "Notice:", message
|
1253 |
-
|
1254 |
-
|
1255 |
-
|
1256 |
-
|
1257 |
-
|
1258 |
def format_file_path(file_path):
|
1259 |
# Helper function to check file existence and return an appropriate path or message
|
1260 |
return file_path if file_path and os.path.exists(file_path) else None
|
@@ -1294,7 +1300,9 @@ def launch_ui(demo_mode=False):
|
|
1294 |
visible=False)
|
1295 |
custom_prompt_input = gr.Textbox(
|
1296 |
label="Custom Prompt (Customize your summary, or ask a different question)",
|
1297 |
-
placeholder="Q: As a professional summarizer, create a concise and comprehensive summary of the
|
|
|
|
|
1298 |
lines=3, visible=True)
|
1299 |
offset_input = gr.Number(value=0, label="Offset (Seconds into the video to start transcribing at)",
|
1300 |
visible=False)
|
@@ -1347,8 +1355,9 @@ def launch_ui(demo_mode=False):
|
|
1347 |
fn=process_url,
|
1348 |
inputs=all_inputs,
|
1349 |
outputs=outputs,
|
1350 |
-
title="Video Transcription and Summarization",
|
1351 |
-
description="Submit a video URL for transcription and summarization. Ensure you input all necessary
|
|
|
1352 |
)
|
1353 |
|
1354 |
with gr.Tab("Transcription & Summarization History"):
|
@@ -1371,7 +1380,7 @@ def launch_ui(demo_mode=False):
|
|
1371 |
#
|
1372 |
|
1373 |
def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model="small.en", offset=0, vad_filter=False,
|
1374 |
-
download_video_flag=False, demo_mode=False, custom_prompt=None):
|
1375 |
if input_path is None and args.user_interface:
|
1376 |
return []
|
1377 |
start_time = time.monotonic()
|
@@ -1385,7 +1394,10 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model=
|
|
1385 |
elif (info_dict := get_youtube(input_path)) and 'entries' in info_dict:
|
1386 |
logging.debug("MAIN: YouTube playlist detected")
|
1387 |
print(
|
1388 |
-
"\n\nSorry, but playlists aren't currently supported. You can run the following command to generate a
|
|
|
|
|
|
|
1389 |
return
|
1390 |
else:
|
1391 |
paths = [input_path]
|
@@ -1399,8 +1411,7 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model=
|
|
1399 |
if info_dict:
|
1400 |
logging.debug("MAIN: Creating path for video file...")
|
1401 |
download_path = create_download_directory(info_dict['title'])
|
1402 |
-
logging.debug("MAIN: Path created successfully")
|
1403 |
-
logging.debug("MAIN: Downloading video from yt_dlp...")
|
1404 |
try:
|
1405 |
video_path = download_video(path, download_path, info_dict, download_video_flag)
|
1406 |
except RuntimeError as e:
|
@@ -1431,6 +1442,17 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model=
|
|
1431 |
logging.info(f"Transcription complete: {audio_file}")
|
1432 |
|
1433 |
# Perform summarization based on the specified API
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1434 |
if api_name and api_key:
|
1435 |
logging.debug(f"MAIN: Summarization being performed by {api_name}")
|
1436 |
json_file_path = audio_file.replace('.wav', '.segments.json')
|
@@ -1441,6 +1463,15 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model=
|
|
1441 |
summary = summarize_with_openai(openai_api_key, json_file_path, openai_model, custom_prompt)
|
1442 |
except requests.exceptions.ConnectionError:
|
1443 |
requests.status_code = "Connection: "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1444 |
elif api_name.lower() == "anthropic":
|
1445 |
anthropic_api_key = api_key if api_key else config.get('API', 'anthropic_api_key', fallback=None)
|
1446 |
try:
|
@@ -1486,16 +1517,6 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model=
|
|
1486 |
summary = summarize_with_oobabooga(ooba_ip, json_file_path, ooba_token, custom_prompt)
|
1487 |
except requests.exceptions.ConnectionError:
|
1488 |
requests.status_code = "Connection: "
|
1489 |
-
elif api_name.lower() == "huggingface":
|
1490 |
-
huggingface_api_key = os.environ.get(HF_TOKEN)
|
1491 |
-
if (huggingface_api_key is None):
|
1492 |
-
huggingface_api_key = api_key if api_key else config.get('API', 'huggingface_api_key', fallback=None)
|
1493 |
-
try:
|
1494 |
-
logging.debug(f"MAIN: Trying to summarize with huggingface")
|
1495 |
-
summarize_with_huggingface(huggingface_api_key, json_file_path, custom_prompt)
|
1496 |
-
except requests.exceptions.ConnectionError:
|
1497 |
-
requests.status_code = "Connection: "
|
1498 |
-
|
1499 |
else:
|
1500 |
logging.warning(f"Unsupported API: {api_name}")
|
1501 |
summary = None
|
@@ -1507,10 +1528,11 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model=
|
|
1507 |
else:
|
1508 |
logging.warning(f"Failed to generate summary using {api_name} API")
|
1509 |
else:
|
1510 |
-
logging.info("No API specified. Summarization will not be performed")
|
1511 |
except Exception as e:
|
1512 |
logging.error(f"Error processing path: {path}")
|
1513 |
logging.error(str(e))
|
|
|
1514 |
# end_time = time.monotonic()
|
1515 |
# print("Total program execution time: " + timedelta(seconds=end_time - start_time))
|
1516 |
|
@@ -1522,6 +1544,7 @@ if __name__ == "__main__":
|
|
1522 |
parser.add_argument('input_path', type=str, help='Path or URL of the video', nargs='?')
|
1523 |
parser.add_argument('-v', '--video', action='store_true', help='Download the video instead of just the audio')
|
1524 |
parser.add_argument('-api', '--api_name', type=str, help='API name for summarization (optional)')
|
|
|
1525 |
parser.add_argument('-ns', '--num_speakers', type=int, default=2, help='Number of speakers (default: 2)')
|
1526 |
parser.add_argument('-wm', '--whisper_model', type=str, default='small.en',
|
1527 |
help='Whisper model (default: small.en)')
|
@@ -1575,7 +1598,7 @@ if __name__ == "__main__":
|
|
1575 |
logging.info(f'API: {args.api_name}')
|
1576 |
logging.info('Summarization will be performed.')
|
1577 |
else:
|
1578 |
-
logging.info('No API specified. Summarization will not be performed.')
|
1579 |
|
1580 |
logging.debug("Platform check being performed...")
|
1581 |
platform_check()
|
@@ -1590,7 +1613,7 @@ if __name__ == "__main__":
|
|
1590 |
try:
|
1591 |
results = main(args.input_path, api_name=args.api_name, api_key=args.api_key,
|
1592 |
num_speakers=args.num_speakers, whisper_model=args.whisper_model, offset=args.offset,
|
1593 |
-
vad_filter=args.vad_filter, download_video_flag=args.video)
|
1594 |
logging.info('Transcription process completed.')
|
1595 |
except Exception as e:
|
1596 |
logging.error('An error occurred during the transcription process.')
|
|
|
70 |
# Download Audio+Video from a list of videos in a text file (can be file paths or URLs) and have them all summarized:**
|
71 |
# python summarize.py ./local/file_on_your/system --api_name <API_name>`
|
72 |
#
|
73 |
+
# Run it as a WebApp** python summarize.py -gui` - This requires you to either stuff your API keys into the
|
74 |
+
# `config.txt` file, or pass them into the app every time you want to use it. Can be helpful for setting up a shared
|
75 |
+
# instance, but not wanting people to perform inference on your server.
|
76 |
#
|
77 |
###
|
78 |
|
|
|
172 |
\__,_||_| \__,_||_| |_| \__| \_/\_/ \__,_| \__| \___||_| |_|
|
173 |
""")
|
174 |
|
175 |
+
#######################################################################################################################
|
176 |
# System Checks
|
177 |
#
|
178 |
#
|
|
|
234 |
else:
|
235 |
logging.debug("ffmpeg not installed on the local system/in local PATH")
|
236 |
print(
|
237 |
+
"ffmpeg is not installed.\n\n You can either install it manually, or through your package manager of "
|
238 |
+
"choice.\n Windows users, builds are here: https://www.gyan.dev/ffmpeg/builds/")
|
239 |
if userOS == "Windows":
|
240 |
download_ffmpeg()
|
241 |
elif userOS == "Linux":
|
242 |
print(
|
243 |
+
"You should install ffmpeg using your platform's appropriate package manager, 'apt install ffmpeg',"
|
244 |
+
"'dnf install ffmpeg' or 'pacman', etc.")
|
245 |
else:
|
246 |
logging.debug("running an unsupported OS")
|
247 |
print("You're running an unsupported/Un-tested OS")
|
|
|
300 |
|
301 |
#
|
302 |
#
|
303 |
+
########################################################################################################################
|
304 |
|
305 |
|
306 |
+
#######################################################################################################################
|
307 |
# Processing Paths and local file handling
|
308 |
#
|
309 |
#
|
|
|
354 |
|
355 |
#
|
356 |
#
|
357 |
+
########################################################################################################################
|
358 |
|
359 |
|
360 |
+
#######################################################################################################################
|
361 |
# Video Download/Handling
|
362 |
#
|
363 |
|
364 |
def process_url(url, num_speakers, whisper_model, custom_prompt, offset, api_name, api_key, vad_filter,
|
365 |
download_video, download_audio, chunk_size):
|
366 |
video_file_path = None
|
367 |
+
print("API Name received:", api_name) # Debugging line
|
368 |
try:
|
369 |
results = main(url, api_name=api_name, api_key=api_key, num_speakers=num_speakers,
|
370 |
whisper_model=whisper_model, offset=offset, vad_filter=vad_filter,
|
|
|
537 |
|
538 |
#
|
539 |
#
|
540 |
+
#######################################################################################################################
|
541 |
|
542 |
|
543 |
+
######################################################################################################################
|
544 |
# Audio Transcription
|
545 |
#
|
546 |
# Convert video .m4a into .wav using ffmpeg
|
|
|
549 |
#
|
550 |
|
551 |
# os.system(r'.\Bin\ffmpeg.exe -ss 00:00:00 -i "{video_file_path}" -ar 16000 -ac 1 -c:a pcm_s16le "{out_path}"')
|
552 |
+
def convert_to_wav(video_file_path, offset=0, overwrite=False):
|
553 |
+
out_path = os.path.splitext(video_file_path)[0] + ".wav"
|
554 |
+
|
555 |
+
if os.path.exists(out_path) and not overwrite:
|
556 |
+
print(f"File '{out_path}' already exists. Skipping conversion.")
|
557 |
+
logging.info(f"Skipping conversion as file already exists: {out_path}")
|
558 |
+
return out_path
|
559 |
print("Starting conversion process of .m4a to .WAV")
|
560 |
out_path = os.path.splitext(video_file_path)[0] + ".wav"
|
561 |
|
|
|
650 |
|
651 |
#
|
652 |
#
|
653 |
+
######################################################################################################################
|
654 |
|
655 |
|
656 |
+
#######################################################################################################################
|
657 |
# Diarization
|
658 |
#
|
659 |
# TODO: https://huggingface.co/pyannote/speaker-diarization-3.1
|
|
|
675 |
# import tqdm
|
676 |
# import wave
|
677 |
#
|
678 |
+
# embedding_model = PretrainedSpeakerEmbedding( embedding_model, device=torch.device("cuda" if
|
679 |
+
# torch.cuda.is_available() else "cpu"))
|
680 |
#
|
681 |
#
|
682 |
# _,file_ending = os.path.splitext(f'{video_file_path}')
|
|
|
771 |
# raise RuntimeError("Error Running inference with local model", e)
|
772 |
#
|
773 |
#
|
774 |
+
######################################################################################################################
|
775 |
|
776 |
|
777 |
+
#######################################################################################################################
|
778 |
# Summarizers
|
779 |
#
|
780 |
#
|
|
|
1065 |
|
1066 |
|
1067 |
# https://lite.koboldai.net/koboldcpp_api#/api%2Fv1/post_api_v1_generate
|
1068 |
+
def summarize_with_kobold(kobold_ip, json_file_path, kobold_token, custom_prompt):
|
1069 |
try:
|
1070 |
logging.debug("kobold: Loading JSON data")
|
1071 |
+
with open(json_file_path, 'r') as file:
|
1072 |
segments = json.load(file)
|
1073 |
|
1074 |
logging.debug(f"kobold: Extracting text from segments file")
|
1075 |
text = extract_text_from_segments(segments)
|
1076 |
|
1077 |
+
# FIXME - API Key generated from copilot...kobold.cpp doesn't mention the header for it either...
|
1078 |
headers = {
|
1079 |
'accept': 'application/json',
|
1080 |
'content-type': 'application/json',
|
1081 |
+
'X_API_KEY': kobold_token
|
1082 |
}
|
1083 |
|
1084 |
kobold_prompt = f"{text} \n\n\n\n{custom_prompt}"
|
|
|
1094 |
|
1095 |
logging.debug("kobold: Submitting request to API endpoint")
|
1096 |
print("kobold: Submitting request to API endpoint")
|
1097 |
+
response = requests.post(kobold_ip, headers=headers, json=data)
|
1098 |
response_data = response.json()
|
1099 |
logging.debug("kobold: API Response Data: %s", response_data)
|
1100 |
|
|
|
1117 |
|
1118 |
|
1119 |
# https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API
|
1120 |
+
def summarize_with_oobabooga(ooba_ip, json_file_path, ooba_token, custom_prompt):
|
1121 |
try:
|
1122 |
logging.debug("ooba: Loading JSON data")
|
1123 |
+
with open(json_file_path, 'r') as file:
|
1124 |
segments = json.load(file)
|
1125 |
|
1126 |
logging.debug(f"ooba: Extracting text from segments file\n\n\n")
|
1127 |
text = extract_text_from_segments(segments)
|
1128 |
logging.debug(f"ooba: Finished extracting text from segments file")
|
1129 |
|
1130 |
+
# FIXME - Add headers for ooba auth
|
1131 |
headers = {
|
1132 |
'accept': 'application/json',
|
1133 |
'content-type': 'application/json',
|
1134 |
}
|
1135 |
|
1136 |
+
# prompt_text = "I like to eat cake and bake cakes. I am a baker. I work in a French bakery baking cakes. It
|
1137 |
+
# is a fun job. I have been baking cakes for ten years. I also bake lots of other baked goods, but cakes are
|
1138 |
+
# my favorite." prompt_text += f"\n\n{text}" # Uncomment this line if you want to include the text variable
|
1139 |
+
ooba_prompt = f"{text}\n\n\n\n{custom_prompt}"
|
1140 |
logging.debug("ooba: Prompt being sent is {ooba_prompt}")
|
1141 |
|
1142 |
data = {
|
|
|
1147 |
|
1148 |
logging.debug("ooba: Submitting request to API endpoint")
|
1149 |
print("ooba: Submitting request to API endpoint")
|
1150 |
+
response = requests.post(ooba_ip, headers=headers, json=data, verify=False)
|
1151 |
logging.debug("ooba: API Response Data: %s", response)
|
1152 |
|
1153 |
if response.status_code == 200:
|
|
|
1175 |
|
1176 |
#
|
1177 |
#
|
1178 |
+
########################################################################################################################
|
1179 |
|
1180 |
|
1181 |
+
#######################################################################################################################
|
1182 |
# Gradio UI
|
1183 |
#
|
1184 |
|
1185 |
# Only to be used when configured with Gradio for HF Space
|
1186 |
+
def summarize_with_huggingface(huggingface_api_key, json_file_path, custom_prompt):
|
1187 |
logging.debug(f"huggingface: Summarization process starting...")
|
1188 |
|
1189 |
model = "microsoft/Phi-3-mini-128k-instruct"
|
1190 |
API_URL = f"https://api-inference.huggingface.co/models/{model}"
|
1191 |
+
headers = {"Authorization": f"Bearer {huggingface_api_key}"}
|
1192 |
|
1193 |
+
with open(json_file_path, 'r') as file:
|
1194 |
segments = json.load(file)
|
1195 |
text = ''.join([segment['text'] for segment in segments])
|
1196 |
|
1197 |
# FIXME adjust max_length and min_length as needed
|
1198 |
data = {
|
1199 |
+
"inputs": text + "\n\n\n\n" + custom_prompt,
|
1200 |
"parameters": {"max_length": 4096, "min_length": 100}
|
1201 |
}
|
1202 |
|
|
|
1211 |
response_data = response.json()
|
1212 |
wait_time = response_data.get('estimated_time', 10)
|
1213 |
return None, f"Model is loading, retrying in {int(wait_time)} seconds..."
|
1214 |
+
# FIXME : This is a hack, should be done better
|
1215 |
# Sleep before retrying....
|
1216 |
+
# time.sleep(wait_time)
|
1217 |
|
1218 |
+
if huggingface_api_key == "":
|
1219 |
+
api_key = os.getenv(HF_TOKEN)
|
1220 |
+
logging.debug("HUGGINGFACE API KEY CHECK: " + huggingface_api_key)
|
1221 |
try:
|
1222 |
logging.debug("huggingface: Loading json data for summarization")
|
1223 |
+
with open(json_file_path, 'r') as file:
|
1224 |
segments = json.load(file)
|
1225 |
|
1226 |
logging.debug("huggingface: Extracting text from the segments")
|
1227 |
text = ' '.join([segment['text'] for segment in segments])
|
1228 |
|
1229 |
+
#api_key = os.getenv('HF_TOKEN').replace('"', '')
|
1230 |
+
logging.debug("HUGGINGFACE API KEY CHECK #2: " + huggingface_api_key)
|
1231 |
|
1232 |
logging.debug("huggingface: Submitting request...")
|
1233 |
+
logging.debug("huggingface: Printing request headers: %s", headers)
|
1234 |
response = requests.post(API_URL, headers=headers, json=data)
|
1235 |
|
1236 |
if response.status_code == 200:
|
|
|
1246 |
print(f"Error occurred while processing summary with huggingface: {str(e)}")
|
1247 |
return None
|
1248 |
|
1249 |
+
# FIXME
|
1250 |
+
# This is here for gradio authentication
|
1251 |
+
# Its just not setup.
|
1252 |
+
#def same_auth(username, password):
|
1253 |
+
# return username == password
|
1254 |
|
1255 |
|
1256 |
def format_transcription(transcription_result):
|
|
|
1261 |
return ""
|
1262 |
|
1263 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1264 |
def format_file_path(file_path):
|
1265 |
# Helper function to check file existence and return an appropriate path or message
|
1266 |
return file_path if file_path and os.path.exists(file_path) else None
|
|
|
1300 |
visible=False)
|
1301 |
custom_prompt_input = gr.Textbox(
|
1302 |
label="Custom Prompt (Customize your summary, or ask a different question)",
|
1303 |
+
placeholder="Q: As a professional summarizer, create a concise and comprehensive summary of the "
|
1304 |
+
"provided text.\nA: Here is a detailed, bulleted list of the key points made in the "
|
1305 |
+
"transcribed video and supporting arguments:",
|
1306 |
lines=3, visible=True)
|
1307 |
offset_input = gr.Number(value=0, label="Offset (Seconds into the video to start transcribing at)",
|
1308 |
visible=False)
|
|
|
1355 |
fn=process_url,
|
1356 |
inputs=all_inputs,
|
1357 |
outputs=outputs,
|
1358 |
+
title="TL/DW: Video Transcription and Summarization with Custom Prompt Support",
|
1359 |
+
description="Submit a video URL for transcription and summarization. Ensure you input all necessary "
|
1360 |
+
"information including API keys."
|
1361 |
)
|
1362 |
|
1363 |
with gr.Tab("Transcription & Summarization History"):
|
|
|
1380 |
#
|
1381 |
|
1382 |
def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model="small.en", offset=0, vad_filter=False,
|
1383 |
+
download_video_flag=False, demo_mode=False, custom_prompt=None, overwrite=False):
|
1384 |
if input_path is None and args.user_interface:
|
1385 |
return []
|
1386 |
start_time = time.monotonic()
|
|
|
1394 |
elif (info_dict := get_youtube(input_path)) and 'entries' in info_dict:
|
1395 |
logging.debug("MAIN: YouTube playlist detected")
|
1396 |
print(
|
1397 |
+
"\n\nSorry, but playlists aren't currently supported. You can run the following command to generate a "
|
1398 |
+
"text file that you can then pass into this script though! (It may not work... playlist support seems "
|
1399 |
+
"spotty)" + """\n\n\tpython Get_Playlist_URLs.py <Youtube Playlist URL>\n\n\tThen,\n\n\tpython
|
1400 |
+
diarizer.py <playlist text file name>\n\n""")
|
1401 |
return
|
1402 |
else:
|
1403 |
paths = [input_path]
|
|
|
1411 |
if info_dict:
|
1412 |
logging.debug("MAIN: Creating path for video file...")
|
1413 |
download_path = create_download_directory(info_dict['title'])
|
1414 |
+
logging.debug("MAIN: Path created successfully\n MAIN: Now Downloading video from yt_dlp...")
|
|
|
1415 |
try:
|
1416 |
video_path = download_video(path, download_path, info_dict, download_video_flag)
|
1417 |
except RuntimeError as e:
|
|
|
1442 |
logging.info(f"Transcription complete: {audio_file}")
|
1443 |
|
1444 |
# Perform summarization based on the specified API
|
1445 |
+
logging.debug(f"MAIN: HF: Summarization being performed by HuggingFace")
|
1446 |
+
json_file_path = audio_file.replace('.wav', '.segments.json')
|
1447 |
+
if api_name == "huggingface":
|
1448 |
+
huggingface_api_key = os.getenv('HF_TOKEN').replace('"', '')
|
1449 |
+
if huggingface_api_key is None:
|
1450 |
+
huggingface_api_key = api_key if api_key else config.get('API', 'huggingface_api_key', fallback=None)
|
1451 |
+
try:
|
1452 |
+
logging.debug(f"MAIN: Trying to summarize with huggingface")
|
1453 |
+
summarize_with_huggingface(huggingface_api_key, json_file_path, custom_prompt)
|
1454 |
+
except requests.exceptions.ConnectionError:
|
1455 |
+
requests.status_code = "Connection: "
|
1456 |
if api_name and api_key:
|
1457 |
logging.debug(f"MAIN: Summarization being performed by {api_name}")
|
1458 |
json_file_path = audio_file.replace('.wav', '.segments.json')
|
|
|
1463 |
summary = summarize_with_openai(openai_api_key, json_file_path, openai_model, custom_prompt)
|
1464 |
except requests.exceptions.ConnectionError:
|
1465 |
requests.status_code = "Connection: "
|
1466 |
+
elif api_name.lower() == "huggingface":
|
1467 |
+
huggingface_api_key = os.getenv(HF_TOKEN)
|
1468 |
+
if huggingface_api_key is None:
|
1469 |
+
huggingface_api_key = api_key if api_key else config.get('API', 'huggingface_api_key', fallback=None)
|
1470 |
+
try:
|
1471 |
+
logging.debug(f"MAIN: Trying to summarize with huggingface")
|
1472 |
+
summarize_with_huggingface(huggingface_api_key, json_file_path, custom_prompt)
|
1473 |
+
except requests.exceptions.ConnectionError:
|
1474 |
+
requests.status_code = "Connection: "
|
1475 |
elif api_name.lower() == "anthropic":
|
1476 |
anthropic_api_key = api_key if api_key else config.get('API', 'anthropic_api_key', fallback=None)
|
1477 |
try:
|
|
|
1517 |
summary = summarize_with_oobabooga(ooba_ip, json_file_path, ooba_token, custom_prompt)
|
1518 |
except requests.exceptions.ConnectionError:
|
1519 |
requests.status_code = "Connection: "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1520 |
else:
|
1521 |
logging.warning(f"Unsupported API: {api_name}")
|
1522 |
summary = None
|
|
|
1528 |
else:
|
1529 |
logging.warning(f"Failed to generate summary using {api_name} API")
|
1530 |
else:
|
1531 |
+
logging.info("MAIN: #2 - No API specified. Summarization will not be performed")
|
1532 |
except Exception as e:
|
1533 |
logging.error(f"Error processing path: {path}")
|
1534 |
logging.error(str(e))
|
1535 |
+
continue
|
1536 |
# end_time = time.monotonic()
|
1537 |
# print("Total program execution time: " + timedelta(seconds=end_time - start_time))
|
1538 |
|
|
|
1544 |
parser.add_argument('input_path', type=str, help='Path or URL of the video', nargs='?')
|
1545 |
parser.add_argument('-v', '--video', action='store_true', help='Download the video instead of just the audio')
|
1546 |
parser.add_argument('-api', '--api_name', type=str, help='API name for summarization (optional)')
|
1547 |
+
parser.add_argument('--overwrite', action='store_true', help='Overwrite existing audio files')
|
1548 |
parser.add_argument('-ns', '--num_speakers', type=int, default=2, help='Number of speakers (default: 2)')
|
1549 |
parser.add_argument('-wm', '--whisper_model', type=str, default='small.en',
|
1550 |
help='Whisper model (default: small.en)')
|
|
|
1598 |
logging.info(f'API: {args.api_name}')
|
1599 |
logging.info('Summarization will be performed.')
|
1600 |
else:
|
1601 |
+
logging.info('MAIN: #1 No API specified. Summarization will not be performed.')
|
1602 |
|
1603 |
logging.debug("Platform check being performed...")
|
1604 |
platform_check()
|
|
|
1613 |
try:
|
1614 |
results = main(args.input_path, api_name=args.api_name, api_key=args.api_key,
|
1615 |
num_speakers=args.num_speakers, whisper_model=args.whisper_model, offset=args.offset,
|
1616 |
+
vad_filter=args.vad_filter, download_video_flag=args.video, overwrite=args.overwrite)
|
1617 |
logging.info('Transcription process completed.')
|
1618 |
except Exception as e:
|
1619 |
logging.error('An error occurred during the transcription process.')
|