Spaces:

oceansweep
/

tldw

Sleeping

App Files Files Community

oceansweep commited on Sep 2

Commit

451d270

•

1 Parent(s): cce77c2

Upload 3 files

Browse files

Files changed (3) hide show

App_Function_Libraries/Utils/System_Checks_Lib.py +184 -0
App_Function_Libraries/Utils/Utils.py +614 -0
App_Function_Libraries/Utils/__init__.py +0 -0

App_Function_Libraries/Utils/System_Checks_Lib.py ADDED Viewed

	@@ -0,0 +1,184 @@

+# System_Checks_Lib.py
+#########################################
+# System Checks Library
+# This library is used to check the system for the necessary dependencies to run the script.
+# It checks for the OS, the availability of the GPU, and the availability of the ffmpeg executable.
+# If the GPU is available, it asks the user if they would like to use it for processing.
+# If ffmpeg is not found, it asks the user if they would like to download it.
+# The script will exit if the user chooses not to download ffmpeg.
+####
+####################
+# Function List
+#
+# 1. platform_check()
+# 2. cuda_check()
+# 3. decide_cpugpu()
+# 4. check_ffmpeg()
+# 5. download_ffmpeg()
+#
+####################
+# Import necessary libraries
+import logging
+import os
+import platform
+import requests
+import shutil
+import subprocess
+import zipfile
+# Import Local Libraries
+#from App_Function_Libraries import
+#
+#######################################################################################################################
+# Function Definitions
+#
+def platform_check():
+    global userOS
+    if platform.system() == "Linux":
+        print("Linux OS detected \n Running Linux appropriate commands")
+        userOS = "Linux"
+    elif platform.system() == "Windows":
+        print("Windows OS detected \n Running Windows appropriate commands")
+        userOS = "Windows"
+    else:
+        print("Other OS detected \n Maybe try running things manually?")
+        exit()
+# Check for NVIDIA GPU and CUDA availability
+def cuda_check():
+    global processing_choice
+    try:
+        # Run nvidia-smi to capture its output
+        nvidia_smi_output = subprocess.check_output("nvidia-smi", shell=True).decode()
+        # Look for CUDA version in the output
+        if "CUDA Version" in nvidia_smi_output:
+            cuda_version = next(
+                (line.split(":")[-1].strip() for line in nvidia_smi_output.splitlines() if "CUDA Version" in line),
+                "Not found")
+            print(f"NVIDIA GPU with CUDA Version {cuda_version} is available.")
+            processing_choice = "cuda"
+        else:
+            print("CUDA is not installed or configured correctly.")
+            processing_choice = "cpu"
+    except subprocess.CalledProcessError as e:
+        print(f"Failed to run 'nvidia-smi': {str(e)}")
+        processing_choice = "cpu"
+    except Exception as e:
+        print(f"An error occurred: {str(e)}")
+        processing_choice = "cpu"
+    # Optionally, check for the CUDA_VISIBLE_DEVICES env variable as an additional check
+    if "CUDA_VISIBLE_DEVICES" in os.environ:
+        print("CUDA_VISIBLE_DEVICES is set:", os.environ["CUDA_VISIBLE_DEVICES"])
+    else:
+        print("CUDA_VISIBLE_DEVICES not set.")
+# Ask user if they would like to use either their GPU or their CPU for transcription
+def decide_cpugpu():
+    global processing_choice
+    processing_input = input("Would you like to use your GPU or CPU for transcription? (1/cuda)GPU/(2/cpu)CPU): ")
+    if processing_choice == "cuda" and (processing_input.lower() == "cuda" or processing_input == "1"):
+        print("You've chosen to use the GPU.")
+        logging.debug("GPU is being used for processing")
+        processing_choice = "cuda"
+    elif processing_input.lower() == "cpu" or processing_input == "2":
+        print("You've chosen to use the CPU.")
+        logging.debug("CPU is being used for processing")
+        processing_choice = "cpu"
+    else:
+        print("Invalid choice. Please select either GPU or CPU.")
+# check for existence of ffmpeg
+def check_ffmpeg():
+    if shutil.which("ffmpeg") or (os.path.exists("Bin") and os.path.isfile(".\\Bin\\ffmpeg.exe")):
+        logging.debug("ffmpeg found installed on the local system, in the local PATH, or in the './Bin' folder")
+        pass
+    else:
+        logging.debug("ffmpeg not installed on the local system/in local PATH")
+        print(
+            "ffmpeg is not installed.\n\n You can either install it manually, or through your package manager of "
+            "choice.\n Windows users, builds are here: https://www.gyan.dev/ffmpeg/builds/")
+        if userOS == "Windows":
+            download_ffmpeg()
+        elif userOS == "Linux":
+            print(
+                "You should install ffmpeg using your platform's appropriate package manager, 'apt install ffmpeg',"
+                "'dnf install ffmpeg' or 'pacman', etc.")
+        else:
+            logging.debug("running an unsupported OS")
+            print("You're running an unspported/Un-tested OS")
+            exit_script = input("Let's exit the script, unless you're feeling lucky? (y/n)")
+            if exit_script == "y" or "yes" or "1":
+                exit()
+# Download ffmpeg
+def download_ffmpeg():
+    user_choice = input("Do you want to download ffmpeg? (y)Yes/(n)No: ")
+    if user_choice.lower() in ['yes', 'y', '1']:
+        print("Downloading ffmpeg")
+        url = "https://www.gyan.dev/ffmpeg/builds/ffmpeg-release-essentials.zip"
+        response = requests.get(url)
+        if response.status_code == 200:
+            print("Saving ffmpeg zip file")
+            logging.debug("Saving ffmpeg zip file")
+            zip_path = "ffmpeg-release-essentials.zip"
+            with open(zip_path, 'wb') as file:
+                file.write(response.content)
+            logging.debug("Extracting the 'ffmpeg.exe' file from the zip")
+            print("Extracting ffmpeg.exe from zip file to '/Bin' folder")
+            with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+                # Find the ffmpeg.exe file within the zip
+                ffmpeg_path = None
+                for file_info in zip_ref.infolist():
+                    if file_info.filename.endswith("ffmpeg.exe"):
+                        ffmpeg_path = file_info.filename
+                        break
+                if ffmpeg_path is None:
+                    logging.error("ffmpeg.exe not found in the zip file.")
+                    print("ffmpeg.exe not found in the zip file.")
+                    return
+                logging.debug("checking if the './Bin' folder exists, creating if not")
+                bin_folder = "Bin"
+                if not os.path.exists(bin_folder):
+                    logging.debug("Creating a folder for './Bin', it didn't previously exist")
+                    os.makedirs(bin_folder)
+                logging.debug("Extracting 'ffmpeg.exe' to the './Bin' folder")
+                zip_ref.extract(ffmpeg_path, path=bin_folder)
+                logging.debug("Moving 'ffmpeg.exe' to the './Bin' folder")
+                src_path = os.path.join(bin_folder, ffmpeg_path)
+                dst_path = os.path.join(bin_folder, "ffmpeg.exe")
+                shutil.move(src_path, dst_path)
+            logging.debug("Removing ffmpeg zip file")
+            print("Deleting zip file (we've already extracted ffmpeg.exe, no worries)")
+            os.remove(zip_path)
+            logging.debug("ffmpeg.exe has been downloaded and extracted to the './Bin' folder.")
+            print("ffmpeg.exe has been successfully downloaded and extracted to the './Bin' folder.")
+        else:
+            logging.error("Failed to download the zip file.")
+            print("Failed to download the zip file.")
+    else:
+        logging.debug("User chose to not download ffmpeg")
+        print("ffmpeg will not be downloaded.")
+#
+#
+#######################################################################################################################

App_Function_Libraries/Utils/Utils.py ADDED Viewed

	@@ -0,0 +1,614 @@

+# Utils.py
+#########################################
+# General Utilities Library
+# This library is used to hold random utilities used by various other libraries.
+#
+####
+####################
+# Function List
+#
+# 1. extract_text_from_segments(segments: List[Dict]) -> str
+# 2. download_file(url, dest_path, expected_checksum=None, max_retries=3, delay=5)
+# 3. verify_checksum(file_path, expected_checksum)
+# 4. create_download_directory(title)
+# 5. sanitize_filename(filename)
+# 6. normalize_title(title)
+# 7.
+#
+#
+#
+####################
+# Import necessary libraries
+import configparser
+import hashlib
+import json
+import logging
+import os
+import re
+import time
+from datetime import timedelta
+from urllib.parse import urlparse, parse_qs, urlencode, urlunparse
+import requests
+import unicodedata
+from tqdm import tqdm
+#######################################################################################################################
+# Function Definitions
+#
+def extract_text_from_segments(segments):
+    logging.debug(f"Segments received: {segments}")
+    logging.debug(f"Type of segments: {type(segments)}")
+    def extract_text_recursive(data):
+        if isinstance(data, dict):
+            for key, value in data.items():
+                if key == 'Text':
+                    return value
+                elif isinstance(value, (dict, list)):
+                    result = extract_text_recursive(value)
+                    if result:
+                        return result
+        elif isinstance(data, list):
+            return ' '.join(filter(None, [extract_text_recursive(item) for item in data]))
+        return None
+    text = extract_text_recursive(segments)
+    if text:
+        return text.strip()
+    else:
+        logging.error(f"Unable to extract text from segments: {segments}")
+        return "Error: Unable to extract transcription"
+def import_data(file):
+    # Implement this function to import data from a file
+    pass
+#
+#
+#######################
+# Temp file cleanup
+#
+# Global list to keep track of downloaded files
+downloaded_files = []
+def cleanup_downloads():
+    """Function to clean up downloaded files when the server exits."""
+    for file_path in downloaded_files:
+        try:
+            if os.path.exists(file_path):
+                os.remove(file_path)
+                print(f"Cleaned up file: {file_path}")
+        except Exception as e:
+            print(f"Error cleaning up file {file_path}: {e}")
+#
+#
+#######################################################################################################################
+#######################################################################################################################
+# Config loading
+#
+def load_comprehensive_config():
+    # Get the directory of the current script
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    # Go up one level to the project root directory
+    project_root = os.path.dirname(current_dir)
+    # Construct the path to the config file in the project root directory
+    config_path = os.path.join(project_root, 'config.txt')
+    # Create a ConfigParser object
+    config = configparser.ConfigParser()
+    # Read the configuration file
+    files_read = config.read(config_path)
+    if not files_read:
+        raise FileNotFoundError(f"Config file not found at {config_path}")
+    return config
+# FIXME - update to include prompt path in return statement
+def load_and_log_configs():
+    try:
+        config = load_comprehensive_config()
+        if config is None:
+            logging.error("Config is None, cannot proceed")
+            return None
+        # API Keys
+        anthropic_api_key = config.get('API', 'anthropic_api_key', fallback=None)
+        logging.debug(
+            f"Loaded Anthropic API Key: {anthropic_api_key[:5]}...{anthropic_api_key[-5:] if anthropic_api_key else None}")
+        cohere_api_key = config.get('API', 'cohere_api_key', fallback=None)
+        logging.debug(
+            f"Loaded Cohere API Key: {cohere_api_key[:5]}...{cohere_api_key[-5:] if cohere_api_key else None}")
+        groq_api_key = config.get('API', 'groq_api_key', fallback=None)
+        logging.debug(f"Loaded Groq API Key: {groq_api_key[:5]}...{groq_api_key[-5:] if groq_api_key else None}")
+        openai_api_key = config.get('API', 'openai_api_key', fallback=None)
+        logging.debug(
+            f"Loaded OpenAI API Key: {openai_api_key[:5]}...{openai_api_key[-5:] if openai_api_key else None}")
+        huggingface_api_key = config.get('API', 'huggingface_api_key', fallback=None)
+        logging.debug(
+            f"Loaded HuggingFace API Key: {huggingface_api_key[:5]}...{huggingface_api_key[-5:] if huggingface_api_key else None}")
+        openrouter_api_key = config.get('API', 'openrouter_api_key', fallback=None)
+        logging.debug(
+            f"Loaded OpenRouter API Key: {openrouter_api_key[:5]}...{openrouter_api_key[-5:] if openrouter_api_key else None}")
+        deepseek_api_key = config.get('API', 'deepseek_api_key', fallback=None)
+        logging.debug(
+            f"Loaded DeepSeek API Key: {deepseek_api_key[:5]}...{deepseek_api_key[-5:] if deepseek_api_key else None}")
+        mistral_api_key = config.get('API', 'mistral_api_key', fallback=None)
+        logging.debug(
+            f"Loaded Mistral API Key: {mistral_api_key[:5]}...{mistral_api_key[-5:] if mistral_api_key else None}")
+        # Models
+        anthropic_model = config.get('API', 'anthropic_model', fallback='claude-3-sonnet-20240229')
+        cohere_model = config.get('API', 'cohere_model', fallback='command-r-plus')
+        groq_model = config.get('API', 'groq_model', fallback='llama3-70b-8192')
+        openai_model = config.get('API', 'openai_model', fallback='gpt-4-turbo')
+        huggingface_model = config.get('API', 'huggingface_model', fallback='CohereForAI/c4ai-command-r-plus')
+        openrouter_model = config.get('API', 'openrouter_model', fallback='microsoft/wizardlm-2-8x22b')
+        deepseek_model = config.get('API', 'deepseek_model', fallback='deepseek-chat')
+        mistral_model = config.get('API', 'mistral_model', fallback='mistral-large-latest')
+        logging.debug(f"Loaded Anthropic Model: {anthropic_model}")
+        logging.debug(f"Loaded Cohere Model: {cohere_model}")
+        logging.debug(f"Loaded Groq Model: {groq_model}")
+        logging.debug(f"Loaded OpenAI Model: {openai_model}")
+        logging.debug(f"Loaded HuggingFace Model: {huggingface_model}")
+        logging.debug(f"Loaded OpenRouter Model: {openrouter_model}")
+        logging.debug(f"Loaded Deepseek Model: {deepseek_model}")
+        logging.debug(f"Loaded Mistral Model: {mistral_model}")
+        # Local-Models
+        kobold_api_ip = config.get('Local-API', 'kobold_api_IP', fallback='http://127.0.0.1:5000/api/v1/generate')
+        kobold_api_key = config.get('Local-API', 'kobold_api_key', fallback='')
+        llama_api_IP = config.get('Local-API', 'llama_api_IP', fallback='http://127.0.0.1:8080/v1/chat/completions')
+        llama_api_key = config.get('Local-API', 'llama_api_key', fallback='')
+        ooba_api_IP = config.get('Local-API', 'ooba_api_IP', fallback='http://127.0.0.1:5000/v1/chat/completions')
+        ooba_api_key = config.get('Local-API', 'ooba_api_key', fallback='')
+        tabby_api_IP = config.get('Local-API', 'tabby_api_IP', fallback='http://127.0.0.1:5000/api/v1/generate')
+        tabby_api_key = config.get('Local-API', 'tabby_api_key', fallback=None)
+        tabby_model = config.get('models', 'tabby_model', fallback=None)
+        vllm_api_url = config.get('Local-API', 'vllm_api_IP', fallback='http://127.0.0.1:500/api/v1/chat/completions')
+        vllm_api_key = config.get('Local-API', 'vllm_api_key', fallback=None)
+        vllm_model = config.get('Local-API', 'vllm_model', fallback=None)
+        ollama_api_url = config.get('Local-API', 'ollama_api_IP', fallback='http://127.0.0.1:11434/api/generate')
+        ollama_api_key = config.get('Local-API', 'ollama_api_key', fallback=None)
+        ollama_model = config.get('Local-API', 'ollama_model', fallback=None)
+        aphrodite_api_url = config.get('Local-API', 'aphrodite_api_IP', fallback='http://127.0.0.1:8080/v1/chat/completions')
+        aphrodite_api_key = config.get('Local-API', 'aphrodite_api_key', fallback='')
+        logging.debug(f"Loaded Kobold API IP: {kobold_api_ip}")
+        logging.debug(f"Loaded Llama API IP: {llama_api_IP}")
+        logging.debug(f"Loaded Ooba API IP: {ooba_api_IP}")
+        logging.debug(f"Loaded Tabby API IP: {tabby_api_IP}")
+        logging.debug(f"Loaded VLLM API URL: {vllm_api_url}")
+        # Retrieve output paths from the configuration file
+        output_path = config.get('Paths', 'output_path', fallback='results')
+        logging.debug(f"Output path set to: {output_path}")
+        # Retrieve processing choice from the configuration file
+        processing_choice = config.get('Processing', 'processing_choice', fallback='cpu')
+        logging.debug(f"Processing choice set to: {processing_choice}")
+        # Prompts - FIXME
+        prompt_path = config.get('Prompts', 'prompt_path', fallback='prompts.db')
+        return {
+            'api_keys': {
+                'anthropic': anthropic_api_key,
+                'cohere': cohere_api_key,
+                'groq': groq_api_key,
+                'openai': openai_api_key,
+                'huggingface': huggingface_api_key,
+                'openrouter': openrouter_api_key,
+                'deepseek': deepseek_api_key,
+                'mistral': mistral_api_key,
+                'kobold': kobold_api_key,
+                'llama': llama_api_key,
+                'ooba': ooba_api_key,
+                'tabby': tabby_api_key,
+                'vllm': vllm_api_key,
+                'ollama': ollama_api_key
+            },
+            'models': {
+                'anthropic': anthropic_model,
+                'cohere': cohere_model,
+                'groq': groq_model,
+                'openai': openai_model,
+                'huggingface': huggingface_model,
+                'openrouter': openrouter_model,
+                'deepseek': deepseek_model,
+                'mistral': mistral_model,
+                'vllm': vllm_model,
+                'tabby': tabby_model,
+                'ollama': ollama_model
+            },
+            'local_api_ip': {
+                'kobold': kobold_api_ip,
+                'llama': llama_api_IP,
+                'ooba': ooba_api_IP,
+                'tabby': tabby_api_IP,
+                'vllm': vllm_api_url,
+                'ollama': ollama_api_url,
+                'aphrodite': aphrodite_api_url
+            },
+            'output_path': output_path,
+            'processing_choice': processing_choice
+        }
+    except Exception as e:
+        logging.error(f"Error loading config: {str(e)}")
+        return None
+#
+# End of Config loading
+#######################################################################################################################
+#######################################################################################################################
+#
+# Prompt Handling Functions
+#
+# End of Prompt Handling Functions
+### #############################################################################################################
+#######################################################################################################################
+#
+# Misc-Functions
+# Log file
+# logging.basicConfig(filename='debug-runtime.log', encoding='utf-8', level=logging.DEBUG)
+def format_metadata_as_text(metadata):
+    if not metadata:
+        return "No metadata available"
+    formatted_text = "Video Metadata:\n"
+    for key, value in metadata.items():
+        if value is not None:
+            if isinstance(value, list):
+                # Join list items with commas
+                formatted_value = ", ".join(str(item) for item in value)
+            elif key == 'upload_date' and len(str(value)) == 8:
+                # Format date as YYYY-MM-DD
+                formatted_value = f"{value[:4]}-{value[4:6]}-{value[6:]}"
+            elif key in ['view_count', 'like_count']:
+                # Format large numbers with commas
+                formatted_value = f"{value:,}"
+            elif key == 'duration':
+                # Convert seconds to HH:MM:SS format
+                hours, remainder = divmod(value, 3600)
+                minutes, seconds = divmod(remainder, 60)
+                formatted_value = f"{hours:02d}:{minutes:02d}:{seconds:02d}"
+            else:
+                formatted_value = str(value)
+            formatted_text += f"{key.capitalize()}: {formatted_value}\n"
+    return formatted_text.strip()
+# # Example usage:
+# example_metadata = {
+#     'title': 'Sample Video Title',
+#     'uploader': 'Channel Name',
+#     'upload_date': '20230615',
+#     'view_count': 1000000,
+#     'like_count': 50000,
+#     'duration': 3725,  # 1 hour, 2 minutes, 5 seconds
+#     'tags': ['tag1', 'tag2', 'tag3'],
+#     'description': 'This is a sample video description.'
+# }
+#
+# print(format_metadata_as_text(example_metadata))
+def convert_to_seconds(time_str):
+    if not time_str:
+        return 0
+    # If it's already a number, assume it's in seconds
+    if time_str.isdigit():
+        return int(time_str)
+    # Parse time string in format HH:MM:SS, MM:SS, or SS
+    time_parts = time_str.split(':')
+    if len(time_parts) == 3:
+        return int(timedelta(hours=int(time_parts[0]),
+                             minutes=int(time_parts[1]),
+                             seconds=int(time_parts[2])).total_seconds())
+    elif len(time_parts) == 2:
+        return int(timedelta(minutes=int(time_parts[0]),
+                             seconds=int(time_parts[1])).total_seconds())
+    elif len(time_parts) == 1:
+        return int(time_parts[0])
+    else:
+        raise ValueError(f"Invalid time format: {time_str}")
+#
+# End of Misc-Functions
+#######################################################################################################################
+#######################################################################################################################
+#
+# File-saving Function Definitions
+def save_to_file(video_urls, filename):
+    with open(filename, 'w') as file:
+        file.write('\n'.join(video_urls))
+    print(f"Video URLs saved to {filename}")
+def save_segments_to_json(segments, file_name="transcription_segments.json"):
+    """
+    Save transcription segments to a JSON file.
+    Parameters:
+    segments (list): List of transcription segments
+    file_name (str): Name of the JSON file to save (default: "transcription_segments.json")
+    Returns:
+    str: Path to the saved JSON file
+    """
+    # Ensure the Results directory exists
+    os.makedirs("Results", exist_ok=True)
+    # Full path for the JSON file
+    json_file_path = os.path.join("Results", file_name)
+    # Save segments to JSON file
+    with open(json_file_path, 'w', encoding='utf-8') as json_file:
+        json.dump(segments, json_file, ensure_ascii=False, indent=4)
+    return json_file_path
+def download_file(url, dest_path, expected_checksum=None, max_retries=3, delay=5):
+    temp_path = dest_path + '.tmp'
+    for attempt in range(max_retries):
+        try:
+            # Check if a partial download exists and get its size
+            resume_header = {}
+            if os.path.exists(temp_path):
+                resume_header = {'Range': f'bytes={os.path.getsize(temp_path)}-'}
+            response = requests.get(url, stream=True, headers=resume_header)
+            response.raise_for_status()
+            # Get the total file size from headers
+            total_size = int(response.headers.get('content-length', 0))
+            initial_pos = os.path.getsize(temp_path) if os.path.exists(temp_path) else 0
+            mode = 'ab' if 'Range' in response.headers else 'wb'
+            with open(temp_path, mode) as temp_file, tqdm(
+                total=total_size, unit='B', unit_scale=True, desc=dest_path, initial=initial_pos, ascii=True
+            ) as pbar:
+                for chunk in response.iter_content(chunk_size=8192):
+                    if chunk:  # filter out keep-alive new chunks
+                        temp_file.write(chunk)
+                        pbar.update(len(chunk))
+            # Verify the checksum if provided
+            if expected_checksum:
+                if not verify_checksum(temp_path, expected_checksum):
+                    os.remove(temp_path)
+                    raise ValueError("Downloaded file's checksum does not match the expected checksum")
+            # Move the file to the final destination
+            os.rename(temp_path, dest_path)
+            print("Download complete and verified!")
+            return dest_path
+        except Exception as e:
+            print(f"Attempt {attempt + 1} failed: {e}")
+            if attempt < max_retries - 1:
+                print(f"Retrying in {delay} seconds...")
+                time.sleep(delay)
+            else:
+                print("Max retries reached. Download failed.")
+                raise
+def create_download_directory(title):
+    base_dir = "Results"
+    # Remove characters that are illegal in Windows filenames and normalize
+    safe_title = normalize_title(title)
+    logging.debug(f"{title} successfully normalized")
+    session_path = os.path.join(base_dir, safe_title)
+    if not os.path.exists(session_path):
+        os.makedirs(session_path, exist_ok=True)
+        logging.debug(f"Created directory for downloaded video: {session_path}")
+    else:
+        logging.debug(f"Directory already exists for downloaded video: {session_path}")
+    return session_path
+def safe_read_file(file_path):
+    encodings = ['utf-8', 'utf-16', 'ascii', 'latin-1', 'iso-8859-1', 'cp1252']
+    for encoding in encodings:
+        try:
+            with open(file_path, 'r', encoding=encoding) as file:
+                return file.read()
+        except UnicodeDecodeError:
+            continue
+        except FileNotFoundError:
+            return f"File not found: {file_path}"
+        except Exception as e:
+            return f"An error occurred: {e}"
+    return f"Unable to decode the file {file_path} with any of the attempted encodings: {encodings}"
+#
+# End of Files-saving Function Definitions
+#######################################################################################################################
+#######################################################################################################################
+#
+# UUID-Functions
+def generate_unique_filename(base_path, base_filename):
+    """Generate a unique filename by appending a counter if necessary."""
+    filename = base_filename
+    counter = 1
+    while os.path.exists(os.path.join(base_path, filename)):
+        name, ext = os.path.splitext(base_filename)
+        filename = f"{name}_{counter}{ext}"
+        counter += 1
+    return filename
+def generate_unique_identifier(file_path):
+    filename = os.path.basename(file_path)
+    timestamp = int(time.time())
+    # Generate a hash of the file content
+    hasher = hashlib.md5()
+    with open(file_path, 'rb') as f:
+        buf = f.read()
+        hasher.update(buf)
+    content_hash = hasher.hexdigest()[:8]  # Use first 8 characters of the hash
+    return f"local:{timestamp}:{content_hash}:{filename}"
+#
+# End of UUID-Functions
+#######################################################################################################################
+#######################################################################################################################
+#
+# Backup code
+#
+# End of backup code
+#######################################################################################################################
+#######################################################################################################################
+#
+# Sanitization/Verification Functions
+# Helper function to validate URL format
+def is_valid_url(url: str) -> bool:
+    regex = re.compile(
+        r'^(?:http|ftp)s?://'  # http:// or https://
+        r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|'  # domain...
+        r'localhost|'  # localhost...
+        r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|'  # ...or ipv4
+        r'\[?[A-F0-9]*:[A-F0-9:]+\]?)'  # ...or ipv6
+        r'(?::\d+)?'  # optional port
+        r'(?:/?|[/?]\S+)$', re.IGNORECASE)
+    return re.match(regex, url) is not None
+def verify_checksum(file_path, expected_checksum):
+    sha256_hash = hashlib.sha256()
+    with open(file_path, 'rb') as f:
+        for byte_block in iter(lambda: f.read(4096), b''):
+            sha256_hash.update(byte_block)
+    return sha256_hash.hexdigest() == expected_checksum
+def normalize_title(title):
+    # Normalize the string to 'NFKD' form and encode to 'ascii' ignoring non-ascii characters
+    title = unicodedata.normalize('NFKD', title).encode('ascii', 'ignore').decode('ascii')
+    title = title.replace('/', '_').replace('\\', '_').replace(':', '_').replace('"', '').replace('*', '').replace('?',
+                                                                                                                   '').replace(
+        '<', '').replace('>', '').replace('|', '')
+    return title
+def clean_youtube_url(url):
+    parsed_url = urlparse(url)
+    query_params = parse_qs(parsed_url.query)
+    if 'list' in query_params:
+        query_params.pop('list')
+    cleaned_query = urlencode(query_params, doseq=True)
+    cleaned_url = urlunparse(parsed_url._replace(query=cleaned_query))
+    return cleaned_url
+def sanitize_filename(filename):
+    # Remove invalid characters and replace spaces with underscores
+    sanitized = re.sub(r'[<>:"/\\|?*]', '', filename)
+    sanitized = re.sub(r'\s+', ' ', sanitized).strip()
+    return sanitized
+def format_transcription(content):
+    # Replace '\n' with actual line breaks
+    content = content.replace('\\n', '\n')
+    # Split the content by newlines first
+    lines = content.split('\n')
+    formatted_lines = []
+    for line in lines:
+        # Add extra space after periods for better readability
+        line = line.replace('.', '. ').replace('.  ', '. ')
+        # Split into sentences using a more comprehensive regex
+        sentences = re.split('(?<=[.!?]) +', line)
+        # Trim whitespace from each sentence and add a line break
+        formatted_sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
+        # Join the formatted sentences
+        formatted_lines.append(' '.join(formatted_sentences))
+    # Join the lines with HTML line breaks
+    formatted_content = '<br>'.join(formatted_lines)
+    return formatted_content
+def format_file_path(file_path, fallback_path=None):
+    if file_path and os.path.exists(file_path):
+        logging.debug(f"File exists: {file_path}")
+        return file_path
+    elif fallback_path and os.path.exists(fallback_path):
+        logging.debug(f"File does not exist: {file_path}. Returning fallback path: {fallback_path}")
+        return fallback_path
+    else:
+        logging.debug(f"File does not exist: {file_path}. No fallback path available.")
+        return None
+#
+# End of Sanitization/Verification Functions
+#######################################################################################################################
+#######################################################################################################################
+#
+# DB Config Loading
+def get_db_config():
+    config = configparser.ConfigParser()
+    config.read('config.txt')
+    return {
+        'type': config['Database']['type'],
+        'sqlite_path': config.get('Database', 'sqlite_path', fallback='media_summary.db'),
+        'elasticsearch_host': config.get('Database', 'elasticsearch_host', fallback='localhost'),
+        'elasticsearch_port': config.getint('Database', 'elasticsearch_port', fallback=9200)
+    }
+#
+# End of DB Config Loading
+#######################################################################################################################

App_Function_Libraries/Utils/__init__.py ADDED Viewed

File without changes