Vid-Summarizer / app.py
oceansweep's picture
Update app.py
fa80e45 verified
raw
history blame
No virus
41.8 kB
#!/usr/bin/env python3
# Std Lib Imports
import argparse
import atexit
import json
import logging
import os
import signal
import sys
import time
import webbrowser
#
# Local Library Imports
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), 'App_Function_Libraries')))
from App_Function_Libraries.Book_Ingestion_Lib import ingest_folder, ingest_text_file
from App_Function_Libraries.Chunk_Lib import semantic_chunk_long_file#, rolling_summarize_function,
from App_Function_Libraries.Gradio_Related import launch_ui
from App_Function_Libraries.Local_LLM_Inference_Engine_Lib import cleanup_process, local_llm_function
from App_Function_Libraries.Local_Summarization_Lib import summarize_with_local_llm
from App_Function_Libraries.Summarization_General_Lib import summarize_with_openai, summarize_with_anthropic, \
summarize_with_cohere, summarize_with_groq, perform_transcription, perform_summarization
from App_Function_Libraries.Audio_Transcription_Lib import speech_to_text
from App_Function_Libraries.Local_File_Processing_Lib import read_paths_from_file, process_local_file
from App_Function_Libraries.DB.DB_Manager import add_media_to_database
from App_Function_Libraries.Utils.System_Checks_Lib import cuda_check, platform_check, check_ffmpeg
from App_Function_Libraries.Utils.Utils import load_and_log_configs, create_download_directory, extract_text_from_segments, \
cleanup_downloads
from App_Function_Libraries.Video_DL_Ingestion_Lib import download_video, extract_video_info
#
# 3rd-Party Module Imports
import requests
# OpenAI Tokenizer support
#
# Other Tokenizers
#
#######################
# Logging Setup
#
log_level = "DEBUG"
logging.basicConfig(level=getattr(logging, log_level), format='%(asctime)s - %(levelname)s - %(message)s')
os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
#
#############
# Global variables setup
#custom_prompt_input = ("Above is the transcript of a video. Please read through the transcript carefully. Identify the "
#"main topics that are discussed over the course of the transcript. Then, summarize the key points about each main "
#"topic in bullet points. The bullet points should cover the key information conveyed about each topic in the video, "
#"but should be much shorter than the full transcript. Please output your bullet point summary inside <bulletpoints> "
#"tags.")
#
# Global variables
whisper_models = ["small", "medium", "small.en", "medium.en", "medium", "large", "large-v1", "large-v2", "large-v3",
"distil-large-v2", "distil-medium.en", "distil-small.en"]
server_mode = False
share_public = False
#
#
#######################
#######################
# Function Sections
#
abc_xyz = """
Database Setup
Config Loading
System Checks
DataBase Functions
Processing Paths and local file handling
Video Download/Handling
Audio Transcription
Diarization
Chunking-related Techniques & Functions
Tokenization-related Techniques & Functions
Summarizers
Gradio UI
Main
"""
#
#
#######################
#######################
#
# TL/DW: Too Long Didn't Watch
#
# Project originally created by https://github.com/the-crypt-keeper
# Modifications made by https://github.com/rmusser01
# All credit to the original authors, I've just glued shit together.
#
#
# Usage:
#
# Download Audio only from URL -> Transcribe audio:
# python summarize.py https://www.youtube.com/watch?v=4nd1CDZP21s`
#
# Download Audio+Video from URL -> Transcribe audio from Video:**
# python summarize.py -v https://www.youtube.com/watch?v=4nd1CDZP21s`
#
# Download Audio only from URL -> Transcribe audio -> Summarize using (`anthropic`/`cohere`/`openai`/`llama` (llama.cpp)/`ooba` (oobabooga/text-gen-webui)/`kobold` (kobold.cpp)/`tabby` (Tabbyapi)) API:**
# python summarize.py -v https://www.youtube.com/watch?v=4nd1CDZP21s -api <your choice of API>` - Make sure to put your API key into `config.txt` under the appropriate API variable
#
# Download Audio+Video from a list of videos in a text file (can be file paths or URLs) and have them all summarized:**
# python summarize.py ./local/file_on_your/system --api_name <API_name>`
#
# Run it as a WebApp**
# python summarize.py -gui` - This requires you to either stuff your API keys into the `config.txt` file, or pass them into the app every time you want to use it.
# Can be helpful for setting up a shared instance, but not wanting people to perform inference on your server.
#
#######################
#######################
# Random issues I've encountered and how I solved them:
# 1. Something about cuda nn library missing, even though cuda is installed...
# https://github.com/tensorflow/tensorflow/issues/54784 - Basically, installing zlib made it go away. idk.
# Or https://github.com/SYSTRAN/faster-whisper/issues/85
#
# 2. ERROR: Could not install packages due to an OSError: [WinError 2] The system cannot find the file specified: 'C:\\Python312\\Scripts\\dateparser-download.exe' -> 'C:\\Python312\\Scripts\\dateparser-download.exe.deleteme'
# Resolved through adding --user to the pip install command
#
# 3. Windows: Could not locate cudnn_ops_infer64_8.dll. Please make sure it is in your library path!
#
# 4.
#
# 5.
#
#
#
#######################
#######################
# DB Setup
# Handled by SQLite_DB.py
#######################
#######################
# Config loading
#
# 1.
# 2.
#
#
#######################
#######################
# System Startup Notice
#
# Dirty hack - sue me. - FIXME - fix this...
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
whisper_models = ["small", "medium", "small.en", "medium.en", "medium", "large", "large-v1", "large-v2", "large-v3",
"distil-large-v2", "distil-medium.en", "distil-small.en"]
source_languages = {
"en": "English",
"zh": "Chinese",
"de": "German",
"es": "Spanish",
"ru": "Russian",
"ko": "Korean",
"fr": "French"
}
source_language_list = [key[0] for key in source_languages.items()]
def print_hello():
print(r"""_____ _ ________ _ _
|_ _|| | / /| _ \| | | | _
| | | | / / | | | || | | |(_)
| | | | / / | | | || |/\| |
| | | |____ / / | |/ / \ /\ / _
\_/ \_____//_/ |___/ \/ \/ (_)
_ _
| | | |
| |_ ___ ___ | | ___ _ __ __ _
| __| / _ \ / _ \ | | / _ \ | '_ \ / _` |
| |_ | (_) || (_) | | || (_) || | | || (_| | _
\__| \___/ \___/ |_| \___/ |_| |_| \__, |( )
__/ ||/
|___/
_ _ _ _ _ _ _
| |(_) | | ( )| | | | | |
__| | _ __| | _ __ |/ | |_ __ __ __ _ | |_ ___ | |__
/ _` || | / _` || '_ \ | __| \ \ /\ / / / _` || __| / __|| '_ \
| (_| || || (_| || | | | | |_ \ V V / | (_| || |_ | (__ | | | |
\__,_||_| \__,_||_| |_| \__| \_/\_/ \__,_| \__| \___||_| |_|
""")
time.sleep(1)
return
#
#
#######################
#######################
# System Check Functions
#
# 1. platform_check()
# 2. cuda_check()
# 3. decide_cpugpu()
# 4. check_ffmpeg()
# 5. download_ffmpeg()
#
#######################
#######################
# DB Functions
#
# create_tables()
# add_keyword()
# delete_keyword()
# add_keyword()
# add_media_with_keywords()
# search_db()
# format_results()
# search_and_display()
# export_to_csv()
# is_valid_url()
# is_valid_date()
#
########################################################################################################################
########################################################################################################################
# Processing Paths and local file handling
#
# Function List
# 1. read_paths_from_file(file_path)
# 2. process_path(path)
# 3. process_local_file(file_path)
# 4. read_paths_from_file(file_path: str) -> List[str]
#
#
########################################################################################################################
#######################################################################################################################
# Online Article Extraction / Handling
#
# Function List
# 1. get_page_title(url)
# 2. get_article_text(url)
# 3. get_article_title(article_url_arg)
#
#
#######################################################################################################################
#######################################################################################################################
# Video Download/Handling
# Video-DL-Ingestion-Lib
#
# Function List
# 1. get_video_info(url)
# 2. create_download_directory(title)
# 3. sanitize_filename(title)
# 4. normalize_title(title)
# 5. get_youtube(video_url)
# 6. get_playlist_videos(playlist_url)
# 7. download_video(video_url, download_path, info_dict, download_video_flag)
# 8. save_to_file(video_urls, filename)
# 9. save_summary_to_file(summary, file_path)
# 10. process_url(url, num_speakers, whisper_model, custom_prompt, offset, api_name, api_key, vad_filter, download_video, download_audio, rolling_summarization, detail_level, question_box, keywords, ) # FIXME - UPDATE
#
#
#######################################################################################################################
#######################################################################################################################
# Audio Transcription
#
# Function List
# 1. convert_to_wav(video_file_path, offset=0, overwrite=False)
# 2. speech_to_text(audio_file_path, selected_source_lang='en', whisper_model='small.en', vad_filter=False)
#
#
#######################################################################################################################
#######################################################################################################################
# Diarization
#
# Function List 1. speaker_diarize(video_file_path, segments, embedding_model = "pyannote/embedding",
# embedding_size=512, num_speakers=0)
#
#
#######################################################################################################################
#######################################################################################################################
# Chunking-related Techniques & Functions
#
#
# FIXME
#
#
#######################################################################################################################
#######################################################################################################################
# Tokenization-related Functions
#
#
# FIXME
#
#
#######################################################################################################################
#######################################################################################################################
# Website-related Techniques & Functions
#
#
#
#
#######################################################################################################################
#######################################################################################################################
# Summarizers
#
# Function List
# 1. extract_text_from_segments(segments: List[Dict]) -> str
# 2. summarize_with_openai(api_key, file_path, custom_prompt_arg)
# 3. summarize_with_anthropic(api_key, file_path, model, custom_prompt_arg, max_retries=3, retry_delay=5)
# 4. summarize_with_cohere(api_key, file_path, model, custom_prompt_arg)
# 5. summarize_with_groq(api_key, file_path, model, custom_prompt_arg)
#
#################################
# Local Summarization
#
# Function List
#
# 1. summarize_with_local_llm(file_path, custom_prompt_arg)
# 2. summarize_with_llama(api_url, file_path, token, custom_prompt)
# 3. summarize_with_kobold(api_url, file_path, kobold_api_token, custom_prompt)
# 4. summarize_with_oobabooga(api_url, file_path, ooba_api_token, custom_prompt)
# 5. summarize_with_vllm(vllm_api_url, vllm_api_key_function_arg, llm_model, text, vllm_custom_prompt_function_arg)
# 6. summarize_with_tabbyapi(tabby_api_key, tabby_api_IP, text, tabby_model, custom_prompt)
# 7. save_summary_to_file(summary, file_path)
#
#######################################################################################################################
#######################################################################################################################
# Summarization with Detail
#
# FIXME - see 'Old_Chunking_Lib.py'
#
#
#######################################################################################################################
#######################################################################################################################
# Gradio UI
#
#
#
#
#
#################################################################################################################
#
#######################################################################################################################
# Local LLM Setup / Running
#
# Function List
# 1. download_latest_llamafile(repo, asset_name_prefix, output_filename)
# 2. download_file(url, dest_path, expected_checksum=None, max_retries=3, delay=5)
# 3. verify_checksum(file_path, expected_checksum)
# 4. cleanup_process()
# 5. signal_handler(sig, frame)
# 6. local_llm_function()
# 7. launch_in_new_terminal_windows(executable, args)
# 8. launch_in_new_terminal_linux(executable, args)
# 9. launch_in_new_terminal_mac(executable, args)
#
#
#######################################################################################################################
#######################################################################################################################
# Helper Functions for Main() & process_url()
#
#
#
#######################################################################################################################
######################################################################################################################
# Main()
#
def main(input_path, api_name=None, api_key=None,
num_speakers=2,
whisper_model="small.en",
offset=0,
vad_filter=False,
download_video_flag=False,
custom_prompt=None,
overwrite=False,
rolling_summarization=False,
detail=0.01,
keywords=None,
llm_model=None,
time_based=False,
set_chunk_txt_by_words=False,
set_max_txt_chunk_words=0,
set_chunk_txt_by_sentences=False,
set_max_txt_chunk_sentences=0,
set_chunk_txt_by_paragraphs=False,
set_max_txt_chunk_paragraphs=0,
set_chunk_txt_by_tokens=False,
set_max_txt_chunk_tokens=0,
ingest_text_file=False,
chunk=False,
max_chunk_size=2000,
chunk_overlap=100,
chunk_unit='tokens',
summarize_chunks=None,
diarize=False
):
global detail_level_number, summary, audio_file, transcription_text, info_dict
detail_level = detail
print(f"Keywords: {keywords}")
if not input_path:
return []
start_time = time.monotonic()
paths = [input_path] if not os.path.isfile(input_path) else read_paths_from_file(input_path)
results = []
for path in paths:
try:
if path.startswith('http'):
info_dict, title = extract_video_info(path)
download_path = create_download_directory(title)
video_path = download_video(path, download_path, info_dict, download_video_flag)
if video_path:
if diarize:
audio_file, segments = perform_transcription(video_path, offset, whisper_model, vad_filter, diarize=True)
transcription_text = {'audio_file': audio_file, 'transcription': segments}
else:
audio_file, segments = perform_transcription(video_path, offset, whisper_model, vad_filter)
transcription_text = {'audio_file': audio_file, 'transcription': segments}
# FIXME rolling summarization
if rolling_summarization == True:
pass
# text = extract_text_from_segments(segments)
# detail = detail_level
# additional_instructions = custom_prompt_input
# chunk_text_by_words = set_chunk_txt_by_words
# max_words = set_max_txt_chunk_words
# chunk_text_by_sentences = set_chunk_txt_by_sentences
# max_sentences = set_max_txt_chunk_sentences
# chunk_text_by_paragraphs = set_chunk_txt_by_paragraphs
# max_paragraphs = set_max_txt_chunk_paragraphs
# chunk_text_by_tokens = set_chunk_txt_by_tokens
# max_tokens = set_max_txt_chunk_tokens
# # FIXME
# summarize_recursively = rolling_summarization
# verbose = False
# model = None
# summary = rolling_summarize_function(text, detail, api_name, api_key, model, custom_prompt_input,
# chunk_text_by_words,
# max_words, chunk_text_by_sentences,
# max_sentences, chunk_text_by_paragraphs,
# max_paragraphs, chunk_text_by_tokens,
# max_tokens, summarize_recursively, verbose
# )
elif api_name:
summary = perform_summarization(api_name, transcription_text, custom_prompt_input, api_key)
else:
summary = None
if summary:
# Save the summary file in the download_path directory
summary_file_path = os.path.join(download_path, f"{transcription_text}_summary.txt")
with open(summary_file_path, 'w') as file:
file.write(summary)
add_media_to_database(path, info_dict, segments, summary, keywords, custom_prompt_input, whisper_model)
else:
logging.error(f"Failed to download video: {path}")
# FIXME - make sure this doesn't break ingesting multiple videos vs multiple text files
# FIXME - Need to update so that chunking is fully handled.
elif chunk and path.lower().endswith('.txt'):
chunks = semantic_chunk_long_file(path, max_chunk_size, chunk_overlap)
if chunks:
chunks_data = {
"file_path": path,
"chunk_unit": chunk_unit,
"max_chunk_size": max_chunk_size,
"chunk_overlap": chunk_overlap,
"chunks": []
}
summaries_data = {
"file_path": path,
"summarization_method": summarize_chunks,
"summaries": []
}
for i, chunk_text in enumerate(chunks):
chunk_info = {
"chunk_id": i + 1,
"text": chunk_text
}
chunks_data["chunks"].append(chunk_info)
if summarize_chunks:
summary = None
if summarize_chunks == 'openai':
summary = summarize_with_openai(api_key, chunk_text, custom_prompt)
elif summarize_chunks == 'anthropic':
summary = summarize_with_anthropic(api_key, chunk_text, custom_prompt)
elif summarize_chunks == 'cohere':
summary = summarize_with_cohere(api_key, chunk_text, custom_prompt)
elif summarize_chunks == 'groq':
summary = summarize_with_groq(api_key, chunk_text, custom_prompt)
elif summarize_chunks == 'local-llm':
summary = summarize_with_local_llm(chunk_text, custom_prompt)
# FIXME - Add more summarization methods as needed
if summary:
summary_info = {
"chunk_id": i + 1,
"summary": summary
}
summaries_data["summaries"].append(summary_info)
else:
logging.warning(f"Failed to generate summary for chunk {i + 1}")
# Save chunks to a single JSON file
chunks_file_path = f"{path}_chunks.json"
with open(chunks_file_path, 'w', encoding='utf-8') as f:
json.dump(chunks_data, f, ensure_ascii=False, indent=2)
logging.info(f"All chunks saved to {chunks_file_path}")
# Save summaries to a single JSON file (if summarization was performed)
if summarize_chunks:
summaries_file_path = f"{path}_summaries.json"
with open(summaries_file_path, 'w', encoding='utf-8') as f:
json.dump(summaries_data, f, ensure_ascii=False, indent=2)
logging.info(f"All summaries saved to {summaries_file_path}")
logging.info(f"File {path} chunked into {len(chunks)} parts using {chunk_unit} as the unit.")
else:
logging.error(f"Failed to chunk file {path}")
# Handle downloading of URLs from a text file or processing local video/audio files
else:
download_path, info_dict, urls_or_media_file = process_local_file(path)
if isinstance(urls_or_media_file, list):
# Text file containing URLs
for url in urls_or_media_file:
for item in urls_or_media_file:
if item.startswith(('http://', 'https://')):
info_dict, title = extract_video_info(url)
download_path = create_download_directory(title)
video_path = download_video(url, download_path, info_dict, download_video_flag)
if video_path:
if diarize:
audio_file, segments = perform_transcription(video_path, offset, whisper_model, vad_filter, diarize=True)
else:
audio_file, segments = perform_transcription(video_path, offset, whisper_model, vad_filter)
transcription_text = {'audio_file': audio_file, 'transcription': segments}
if rolling_summarization:
text = extract_text_from_segments(segments)
# FIXME
#summary = summarize_with_detail_openai(text, detail=detail)
elif api_name:
summary = perform_summarization(api_name, transcription_text, custom_prompt_input, api_key)
else:
summary = None
if summary:
# Save the summary file in the download_path directory
summary_file_path = os.path.join(download_path, f"{transcription_text}_summary.txt")
with open(summary_file_path, 'w') as file:
file.write(summary)
add_media_to_database(url, info_dict, segments, summary, keywords, custom_prompt_input, whisper_model)
else:
logging.error(f"Failed to download video: {url}")
else:
# Video or audio or txt file
media_path = urls_or_media_file
if media_path.lower().endswith(('.txt', '.md')):
if media_path.lower().endswith('.txt'):
# Handle text file ingestion
result = ingest_text_file(media_path)
logging.info(result)
elif media_path.lower().endswith(('.mp4', '.avi', '.mov')):
if diarize:
audio_file, segments = perform_transcription(media_path, offset, whisper_model, vad_filter, diarize=True)
else:
audio_file, segments = perform_transcription(media_path, offset, whisper_model, vad_filter)
elif media_path.lower().endswith(('.wav', '.mp3', '.m4a')):
if diarize:
segments = speech_to_text(media_path, whisper_model=whisper_model, vad_filter=vad_filter, diarize=True)
else:
segments = speech_to_text(media_path, whisper_model=whisper_model, vad_filter=vad_filter)
else:
logging.error(f"Unsupported media file format: {media_path}")
continue
transcription_text = {'media_path': path, 'audio_file': media_path, 'transcription': segments}
# FIXME
if rolling_summarization:
# text = extract_text_from_segments(segments)
# summary = summarize_with_detail_openai(text, detail=detail)
pass
elif api_name:
summary = perform_summarization(api_name, transcription_text, custom_prompt_input, api_key)
else:
summary = None
if summary:
# Save the summary file in the download_path directory
summary_file_path = os.path.join(download_path, f"{transcription_text}_summary.txt")
with open(summary_file_path, 'w') as file:
file.write(summary)
add_media_to_database(path, info_dict, segments, summary, keywords, custom_prompt_input, whisper_model)
except Exception as e:
logging.error(f"Error processing {path}: {str(e)}")
continue
return transcription_text
def signal_handler(sig, frame):
logging.info('Signal handler called with signal: %s', sig)
cleanup_process()
sys.exit(0)
############################## MAIN ##############################
#
#
if __name__ == "__main__":
# Register signal handlers
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
# Logging setup
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# Load Config
loaded_config_data = load_and_log_configs()
if loaded_config_data:
logging.info("Main: Configuration loaded successfully")
# You can access the configuration data like this:
# print(f"OpenAI API Key: {config_data['api_keys']['openai']}")
# print(f"Anthropic Model: {config_data['models']['anthropic']}")
# print(f"Kobold API IP: {config_data['local_apis']['kobold']['ip']}")
# print(f"Output Path: {config_data['output_path']}")
# print(f"Processing Choice: {config_data['processing_choice']}")
else:
print("Failed to load configuration")
# Print ascii_art
print_hello()
transcription_text = None
parser = argparse.ArgumentParser(
description='Transcribe and summarize videos.',
epilog='''
Sample commands:
1. Simple Sample command structure:
summarize.py <path_to_video> -api openai -k tag_one tag_two tag_three
2. Rolling Summary Sample command structure:
summarize.py <path_to_video> -api openai -prompt "custom_prompt_goes_here-is-appended-after-transcription" -roll -detail 0.01 -k tag_one tag_two tag_three
3. FULL Sample command structure:
summarize.py <path_to_video> -api openai -ns 2 -wm small.en -off 0 -vad -log INFO -prompt "custom_prompt" -overwrite -roll -detail 0.01 -k tag_one tag_two tag_three
4. Sample command structure for UI:
summarize.py -gui -log DEBUG
''',
formatter_class=argparse.RawTextHelpFormatter
)
parser.add_argument('input_path', type=str, help='Path or URL of the video', nargs='?')
parser.add_argument('-v', '--video', action='store_true', help='Download the video instead of just the audio')
parser.add_argument('-api', '--api_name', type=str, help='API name for summarization (optional)')
parser.add_argument('-key', '--api_key', type=str, help='API key for summarization (optional)')
parser.add_argument('-ns', '--num_speakers', type=int, default=2, help='Number of speakers (default: 2)')
parser.add_argument('-wm', '--whisper_model', type=str, default='small',
help='Whisper model (default: small)| Options: tiny.en, tiny, base.en, base, small.en, small, medium.en, '
'medium, large-v1, large-v2, large-v3, large, distil-large-v2, distil-medium.en, '
'distil-small.en')
parser.add_argument('-off', '--offset', type=int, default=0, help='Offset in seconds (default: 0)')
parser.add_argument('-vad', '--vad_filter', action='store_true', help='Enable VAD filter')
parser.add_argument('-log', '--log_level', type=str, default='INFO',
choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help='Log level (default: INFO)')
parser.add_argument('-gui', '--user_interface', action='store_true', default=True, help="Launch the Gradio user interface")
parser.add_argument('-demo', '--demo_mode', action='store_true', help='Enable demo mode')
parser.add_argument('-prompt', '--custom_prompt', type=str,
help='Pass in a custom prompt to be used in place of the existing one.\n (Probably should just '
'modify the script itself...)')
parser.add_argument('-overwrite', '--overwrite', action='store_true', help='Overwrite existing files')
parser.add_argument('-roll', '--rolling_summarization', action='store_true', help='Enable rolling summarization')
parser.add_argument('-detail', '--detail_level', type=float, help='Mandatory if rolling summarization is enabled, '
'defines the chunk size.\n Default is 0.01(lots '
'of chunks) -> 1.00 (few chunks)\n Currently '
'only OpenAI works. ',
default=0.01, )
parser.add_argument('-model', '--llm_model', type=str, default='',
help='Model to use for LLM summarization (only used for vLLM/TabbyAPI)')
parser.add_argument('-k', '--keywords', nargs='+', default=['cli_ingest_no_tag'],
help='Keywords for tagging the media, can use multiple separated by spaces (default: cli_ingest_no_tag)')
parser.add_argument('--log_file', type=str, help='Where to save logfile (non-default)')
parser.add_argument('--local_llm', action='store_true',
help="Use a local LLM from the script(Downloads llamafile from github and 'mistral-7b-instruct-v0.2.Q8' - 8GB model from Huggingface)")
parser.add_argument('--server_mode', action='store_true',
help='Run in server mode (This exposes the GUI/Server to the network)')
parser.add_argument('--share_public', type=int, default=7860,
help="This will use Gradio's built-in ngrok tunneling to share the server publicly on the internet. Specify the port to use (default: 7860)")
parser.add_argument('--port', type=int, default=7860, help='Port to run the server on')
parser.add_argument('--ingest_text_file', action='store_true',
help='Ingest .txt files as content instead of treating them as URL lists')
parser.add_argument('--text_title', type=str, help='Title for the text file being ingested')
parser.add_argument('--text_author', type=str, help='Author of the text file being ingested')
parser.add_argument('--diarize', action='store_true', help='Enable speaker diarization')
# parser.add_argument('--offload', type=int, default=20, help='Numbers of layers to offload to GPU for Llamafile usage')
# parser.add_argument('-o', '--output_path', type=str, help='Path to save the output file')
args = parser.parse_args()
# Set Chunking values/variables
set_chunk_txt_by_words = False
set_max_txt_chunk_words = 0
set_chunk_txt_by_sentences = False
set_max_txt_chunk_sentences = 0
set_chunk_txt_by_paragraphs = False
set_max_txt_chunk_paragraphs = 0
set_chunk_txt_by_tokens = False
set_max_txt_chunk_tokens = 0
if args.share_public:
share_public = args.share_public
else:
share_public = None
if args.server_mode:
server_mode = args.server_mode
else:
server_mode = None
if args.server_mode is True:
server_mode = True
if args.port:
server_port = args.port
else:
server_port = None
########## Logging setup
logger = logging.getLogger()
logger.setLevel(getattr(logging, args.log_level))
# Create console handler
console_handler = logging.StreamHandler()
console_handler.setLevel(getattr(logging, args.log_level))
console_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
console_handler.setFormatter(console_formatter)
if args.log_file:
# Create file handler
file_handler = logging.FileHandler(args.log_file)
file_handler.setLevel(getattr(logging, args.log_level))
file_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
file_handler.setFormatter(file_formatter)
logger.addHandler(file_handler)
logger.info(f"Log file created at: {args.log_file}")
# Check if the user wants to use the local LLM from the script
local_llm = args.local_llm
logging.info(f'Local LLM flag: {local_llm}')
# Check if the user wants to ingest a text file (singular or multiple from a folder)
if args.input_path is not None:
if os.path.isdir(args.input_path) and args.ingest_text_file:
results = ingest_folder(args.input_path, keywords=args.keywords)
for result in results:
print(result)
elif args.input_path.lower().endswith('.txt') and args.ingest_text_file:
result = ingest_text_file(args.input_path, title=args.text_title, author=args.text_author,
keywords=args.keywords)
print(result)
sys.exit(0)
# Launch the GUI
# This is huggingface so:
if args.user_interface:
if local_llm:
local_llm_function()
time.sleep(2)
webbrowser.open_new_tab('http://127.0.0.1:7860')
launch_ui()
elif not args.input_path:
parser.print_help()
sys.exit(1)
else:
logging.info('Starting the transcription and summarization process.')
logging.info(f'Input path: {args.input_path}')
logging.info(f'API Name: {args.api_name}')
logging.info(f'Number of speakers: {args.num_speakers}')
logging.info(f'Whisper model: {args.whisper_model}')
logging.info(f'Offset: {args.offset}')
logging.info(f'VAD filter: {args.vad_filter}')
logging.info(f'Log Level: {args.log_level}')
logging.info(f'Demo Mode: {args.demo_mode}')
logging.info(f'Custom Prompt: {args.custom_prompt}')
logging.info(f'Overwrite: {args.overwrite}')
logging.info(f'Rolling Summarization: {args.rolling_summarization}')
logging.info(f'User Interface: {args.user_interface}')
logging.info(f'Video Download: {args.video}')
# logging.info(f'Save File location: {args.output_path}')
# logging.info(f'Log File location: {args.log_file}')
global api_name
api_name = args.api_name
########## Custom Prompt setup
custom_prompt_input = args.custom_prompt
if not args.custom_prompt:
logging.debug("No custom prompt defined, will use default")
args.custom_prompt_input = (
"\n\nabove is the transcript of a video. "
"Please read through the transcript carefully. Identify the main topics that are "
"discussed over the course of the transcript. Then, summarize the key points about each "
"main topic in a concise bullet point. The bullet points should cover the key "
"information conveyed about each topic in the video, but should be much shorter than "
"the full transcript. Please output your bullet point summary inside <bulletpoints> "
"tags."
)
print("No custom prompt defined, will use default")
custom_prompt_input = args.custom_prompt
else:
logging.debug(f"Custom prompt defined, will use \n\nf{custom_prompt_input} \n\nas the prompt")
print(f"Custom Prompt has been defined. Custom prompt: \n\n {args.custom_prompt}")
summary = None # Initialize to ensure it's always defined
if args.detail_level == None:
args.detail_level = 0.01
# FIXME
# if args.api_name and args.rolling_summarization and any(
# key.startswith(args.api_name) and value is not None for key, value in api_keys.items()):
# logging.info(f'MAIN: API used: {args.api_name}')
# logging.info('MAIN: Rolling Summarization will be performed.')
elif args.api_name:
logging.info(f'MAIN: API used: {args.api_name}')
logging.info('MAIN: Summarization (not rolling) will be performed.')
else:
logging.info('No API specified. Summarization will not be performed.')
logging.debug("Platform check being performed...")
platform_check()
logging.debug("CUDA check being performed...")
cuda_check()
processing_choice = "cpu"
logging.debug("ffmpeg check being performed...")
check_ffmpeg()
# download_ffmpeg()
llm_model = args.llm_model or None
# FIXME - dirty hack
args.time_based = False
try:
results = main(args.input_path, api_name=args.api_name, api_key=args.api_key,
num_speakers=args.num_speakers, whisper_model=args.whisper_model, offset=args.offset,
vad_filter=args.vad_filter, download_video_flag=args.video, custom_prompt=args.custom_prompt_input,
overwrite=args.overwrite, rolling_summarization=args.rolling_summarization,
detail=args.detail_level, keywords=args.keywords, llm_model=args.llm_model,
time_based=args.time_based, set_chunk_txt_by_words=set_chunk_txt_by_words,
set_max_txt_chunk_words=set_max_txt_chunk_words,
set_chunk_txt_by_sentences=set_chunk_txt_by_sentences,
set_max_txt_chunk_sentences=set_max_txt_chunk_sentences,
set_chunk_txt_by_paragraphs=set_chunk_txt_by_paragraphs,
set_max_txt_chunk_paragraphs=set_max_txt_chunk_paragraphs,
set_chunk_txt_by_tokens=set_chunk_txt_by_tokens,
set_max_txt_chunk_tokens=set_max_txt_chunk_tokens)
logging.info('Transcription process completed.')
atexit.register(cleanup_process)
except Exception as e:
logging.error('An error occurred during the transcription process.')
logging.error(str(e))
sys.exit(1)
finally:
cleanup_process()