# Helper funcs for LLM_XXXXX.py import tiktoken, json, os, yaml from langchain_core.output_parsers.format_instructions import JSON_FORMAT_INSTRUCTIONS from transformers import AutoTokenizer import GPUtil import time import psutil import threading import torch from datetime import datetime def save_individual_prompt(prompt_template, txt_file_path_ind_prompt): with open(txt_file_path_ind_prompt, 'w',encoding='utf-8') as file: file.write(prompt_template) def remove_colons_and_double_apostrophes(text): return text.replace(":", "").replace("\"", "") def count_tokens(string, vendor, model_name): full_string = string + JSON_FORMAT_INSTRUCTIONS def run_count(full_string, model_name): # Ensure the encoding is obtained correctly. encoding = tiktoken.encoding_for_model(model_name) tokens = encoding.encode(full_string) return len(tokens) try: if vendor == 'mistral': tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1") tokens = tokenizer.tokenize(full_string) return len(tokens) else: return run_count(full_string, model_name) except Exception as e: print(f"An error occurred: {e}") return 0 class SystemLoadMonitor(): def __init__(self, logger) -> None: self.monitoring_thread = None self.logger = logger self.gpu_usage = {'max_cpu_usage': 0, 'max_load': 0, 'max_vram_usage': 0, "max_ram_usage": 0, 'monitoring': True} self.start_time = None self.tool_start_time = None self.has_GPU = torch.cuda.is_available() self.monitor_interval = 2 def start_monitoring_usage(self): self.start_time = time.time() self.monitoring_thread = threading.Thread(target=self.monitor_usage, args=(self.monitor_interval,)) self.monitoring_thread.start() def stop_inference_timer(self): # Stop inference timer and record elapsed time self.inference_time = time.time() - self.start_time # Immediately start the tool timer self.tool_start_time = time.time() def monitor_usage(self, interval): while self.gpu_usage['monitoring']: # GPU monitoring if self.has_GPU: GPUs = GPUtil.getGPUs() for gpu in GPUs: self.gpu_usage['max_load'] = max(self.gpu_usage['max_load'], gpu.load) # Convert memory usage to GB memory_usage_gb = gpu.memoryUsed / 1024.0 self.gpu_usage['max_vram_usage'] = max(self.gpu_usage.get('max_vram_usage', 0), memory_usage_gb) # RAM monitoring ram_usage = psutil.virtual_memory().used / (1024.0 ** 3) # Get RAM usage in GB self.gpu_usage['max_ram_usage'] = max(self.gpu_usage.get('max_ram_usage', 0), ram_usage) # CPU monitoring cpu_usage = psutil.cpu_percent(interval=None) self.gpu_usage['max_cpu_usage'] = max(self.gpu_usage.get('max_cpu_usage', 0), cpu_usage) time.sleep(interval) def get_current_datetime(self): # Get the current date and time now = datetime.now() # Format it as a string, replacing colons with underscores datetime_iso = now.strftime('%Y_%m_%dT%H_%M_%S') return datetime_iso def stop_monitoring_report_usage(self): report = {} self.gpu_usage['monitoring'] = False self.monitoring_thread.join() # Calculate tool time by checking if tool_start_time is set if self.tool_start_time: tool_time = time.time() - self.tool_start_time else: tool_time = 0 report = {'inference_time_s': str(round(self.inference_time,2)), 'tool_time_s': str(round(tool_time, 2)), 'max_cpu': str(round(self.gpu_usage['max_cpu_usage'],2)), 'max_ram_gb': str(round(self.gpu_usage['max_ram_usage'],2)), 'current_time': self.get_current_datetime(), } self.logger.info(f"Inference Time: {round(self.inference_time,2)} seconds") self.logger.info(f"Tool Time: {round(tool_time,2)} seconds") self.logger.info(f"Max CPU Usage: {round(self.gpu_usage['max_cpu_usage'],2)}%") self.logger.info(f"Max RAM Usage: {round(self.gpu_usage['max_ram_usage'],2)}GB") if self.has_GPU: report.update({'max_gpu_load': str(round(self.gpu_usage['max_load']*100,2))}) report.update({'max_gpu_vram_gb': str(round(self.gpu_usage['max_vram_usage'],2))}) self.logger.info(f"Max GPU Load: {round(self.gpu_usage['max_load']*100,2)}%") self.logger.info(f"Max GPU Memory Usage: {round(self.gpu_usage['max_vram_usage'],2)}GB") else: report.update({'max_gpu_load': str(0)}) report.update({'max_gpu_vram_gb': str(0)}) return report